[med-svn] [gmap] 01/10: New upstream version 2017-08-15

Alex Mestiashvili malex-guest at moszumanska.debian.org
Mon Sep 4 12:22:00 UTC 2017


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository gmap.

commit 67575fae4370ed179578b3481f531fe2cbde3742
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Wed Aug 30 21:21:36 2017 +0200

    New upstream version 2017-08-15
---
 ChangeLog                              |   674 +
 Makefile.in                            |     5 +-
 VERSION                                |     2 +-
 config/ax_cpuid_intel.m4               |    11 +-
 config/ax_cpuid_non_intel.m4           |    37 +-
 config/ax_ext.m4                       |    70 +-
 configure                              |   957 +-
 configure.ac                           |   222 +-
 src/Makefile.am                        |   109 +-
 src/Makefile.in                        |  7740 +++++-
 src/block.c                            |    10 +-
 src/chimera.c                          |    45 +-
 src/chimera.h                          |     5 +-
 src/chrom.c                            |    89 +-
 src/cigar.c                            |  1647 ++
 src/cigar.h                            |    33 +
 src/config.h.in                        |     3 -
 src/cpuid.c                            |    46 +-
 src/datadir.c                          |     9 +-
 src/diag.c                             |    96 +-
 src/dynprog.c                          |    13 +-
 src/dynprog.h                          |     3 +-
 src/dynprog_cdna.c                     |     4 +-
 src/dynprog_end.c                      |    53 +-
 src/dynprog_end.h                      |     8 +-
 src/dynprog_genome.c                   |   161 +-
 src/dynprog_simd.c                     |    50 +-
 src/dynprog_simd.h                     |     4 +
 src/dynprog_single.c                   |     7 +-
 src/genome.c                           |    12 +-
 src/genome128_hr.c                     | 11583 +++++---
 src/genome128_hr.h                     |     7 +-
 src/get-genome.c                       |   328 +-
 src/gmap.c                             |  1069 +-
 src/gmap_select.c                      |    24 +
 src/gmapl_select.c                     |    24 +
 src/gsnap.c                            |   287 +-
 src/gsnap_select.c                     |    24 +
 src/gsnapl_select.c                    |    24 +
 src/iit-read.c                         |   818 +-
 src/iit-read.h                         |    19 +-
 src/iit-write.c                        |    41 +-
 src/iit-write.h                        |     5 +-
 src/iit_get.c                          |     4 +-
 src/indel.c                            |   125 +-
 src/indel.h                            |     7 +-
 src/indexdb.c                          |    21 +-
 src/indexdb_hr.c                       |  2988 +-
 src/interval.c                         |    66 +-
 src/interval.h                         |     8 +-
 src/intlist.c                          |    23 +-
 src/intlist.h                          |     4 +-
 src/intron.c                           |    43 +-
 src/intron.h                           |    10 +-
 src/junction.c                         |     7 +-
 src/list.c                             |    32 +-
 src/list.h                             |     3 +-
 src/littleendian.h                     |     5 +-
 src/maxent_hr.c                        |    10 +-
 src/mem.h                              |    26 +-
 src/merge-heap.c                       |   400 +
 src/merge-heap.h                       |    19 +
 src/merge.c                            |  1110 +
 src/merge.h                            |    45 +
 src/oligoindex_hr.c                    | 45074 ++++++++++---------------------
 src/oligoindex_hr.h                    |    54 +-
 src/outbuffer.c                        |   121 +-
 src/outbuffer.h                        |     6 +-
 src/output.c                           |   190 +-
 src/output.h                           |     5 +-
 src/pair.c                             |  2804 +-
 src/pair.h                             |    55 +-
 src/pairpool.c                         |    10 +-
 src/parserange.c                       |     9 +-
 src/popcount.c                         |     8 +-
 src/popcount.h                         |     8 +-
 src/samflags.h                         |     3 +-
 src/samheader.c                        |     4 +-
 src/samprint.c                         |  3330 +--
 src/samprint.h                         |    43 +-
 src/sarray-read.c                      |  7486 +----
 src/sarray-read.h                      |    35 +-
 src/{sarray-read.c => sarray-search.c} |  2145 +-
 src/sarray-search.h                    |    32 +
 src/sequence.c                         |    36 +-
 src/sequence.h                         |     4 +-
 src/shortread.c                        |    83 +-
 src/smooth.c                           |    35 +-
 src/spanningelt.c                      |    36 +-
 src/spanningelt.h                      |     6 +-
 src/stage1hr.c                         |  3513 ++-
 src/stage1hr.h                         |    10 +-
 src/stage2.c                           |  1863 +-
 src/stage3.c                           |  1572 +-
 src/stage3.h                           |    30 +-
 src/stage3hr.c                         |  2378 +-
 src/stage3hr.h                         |    31 +-
 src/substring.c                        |   459 +-
 src/substring.h                        |    55 +-
 src/table.c                            |    28 +-
 src/table.h                            |     4 +-
 src/uniqscan.c                         |    14 +-
 tests/Makefile.in                      |     5 +-
 util/Makefile.am                       |    12 +-
 util/Makefile.in                       |    27 +-
 util/gmap_build.pl.in                  |    66 +-
 util/gtf_transcript_splicesites.pl.in  |   490 +
 107 files changed, 50304 insertions(+), 53209 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 7530342..646dd5e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,677 @@
+2017-08-15  twu
+
+    * stage3hr.c: Changed checks on circularalias to circularpos
+
+    * sarray-search.c: Disallowing any splicing solution that goes around a
+      circular origin
+
+    * pair.c: Checking that we are not at the end of the alignment before doing
+      backward steps
+
+    * gmap.c: Removed option -G for uncompressed genome
+
+    * stage1hr.c: Disallowing any splicing solution that goes around a circular
+      origin. Incrementing counter when comparing against max_gmap_improvement.
+      Fixed a memory leak.
+
+2017-07-28  twu
+
+    * stage3.c: In find_dual_break_spliceends, fixed a bug that generated
+      negative coordinates
+
+2017-06-29  twu
+
+    * Makefile.gsnaptoo.am: Added some files to the library and the include
+      directory
+
+    * table.c, table.h: Added a function needed by gstruct
+
+    * interval.c, interval.h: Added a variable to make a function compatible
+      with the gstruct version
+
+    * iit-write.c, iit-write.h: Added a variable to make a function compatible
+      with the gstruct version
+
+    * gsnap.c: Added a header file
+
+    * dynprog_genome.c: Commented out assertions that do not hold in transcript
+      alignment
+
+    * chrom.c: Removed a faulty assertion
+
+2017-06-21  twu
+
+    * stage3.c: For final call to insert_gapholders from path_compute_final,
+      filling the gap with nucleotides if queryjump == genomejump
+
+    * pair.c: For GFF3 output, not printing lines where genomestart and
+      genomeend coordinates are the same, typically resulting from a query skip
+
+    * Makefile.gsnaptoo.am: Added maxent_hr to lib and include
+
+    * stage3hr.c: Added assertions to make sure ilengths are not negative
+
+    * substring.c: In overlap checking procedures, decrementing high coordinate
+      by 1 if possible to match the procedures for clip_overlap and
+      merge_overlap in stage3hr.c
+
+2017-06-19  twu
+
+    * VERSION, index.html: Updated version number
+
+2017-06-16  twu
+
+    * gmap.c: Added to debugging statements
+
+    * stage3.c: In merge procedures, restoring original pairs to Stage3_T
+      objects if the merge fails
+
+    * gsnap.c: Turning off default of 0 for trim-mismatch-score and
+      trim-indel-score for DNA-Seq
+
+    * samprint.c: For XM, handling the case where queryseq_mate is NULL
+
+    * pair.c: For XM, handling the case where queryseq_mate is NULL
+
+2017-06-15  twu
+
+    * shortread.c: Changed memory source of longstring to IN
+
+    * samprint.c: Added back a missing else clause after checking for
+      omit_concordant_uniq_p
+
+    * stage3.c: Added debugging statements for creating and freeing Stage3_T
+      objects
+
+    * stage1hr.c: Fixed memory leaks relating to floors and anchor segments
+
+    * sequence.c: Changed memory source of all contents to IN
+
+    * pair.c: Changed memory source of all tokens to OUT
+
+    * list.c, list.h: Implemented List_to_array_out_n
+
+    * intlist.c, intlist.h: Implemented Intlist_to_char_array_in
+
+    * gmap.c: Fixed memory leaks and memory bugs relating to chimera code. 
+      Removed all references to a nonjoinable list, and using stage3list as the
+      master list for all procedures.
+
+    * genome.c: Changed source of alloc to IN
+
+2017-06-14  twu
+
+    * index.html: Updated for latest version
+
+    * configure.ac: Removed unused macros
+
+    * src, util: Merged revisions 204076 through 207268 from
+      branches/2017-03-07-multimapper-genes
+
+    * stage3hr.c: Reverted from revision 207330 (revision 205421 from
+      branches/2017-03-07-multimapper-genes) to remove nindelbreaks field, since
+      it discriminates against some equivalently good alignments
+
+    * stage3hr.c: Merged revision 205421 from
+      branches/2017-03-07-multimapper-genes to add nindelbreaks field
+
+    * Makefile.gsnaptoo.am: Added commands for building lib and include
+
+    * uniqscan.c: Added Access_controlled_cleanup
+
+    * substring.c: Merged revisions 204076 through 205371 from
+      branches/2017-03-07-multimapper-genes to remove splicecoordN and to set
+      splicecoordD_knowni and splicecoordA_knowni.
+
+    * stage1hr.c: Merged revisions 204076 through 205713 from
+      branches/2017-03-07-multimapper-genes to find DNA chimeras in paired-end
+      reads and to double-check apparent perfect matches for actual number of
+      mismatches
+
+    * sequence.c, sequence.h: Added function Sequence_stdout_header
+
+    * sarray-read.c, sarray-read.h, sarray-search.c, sarray-search.h: Merged
+      revisions 204076 through 205420 from branches/2017-03-07-multimapper-genes
+      to move search functions from sarray-read.c to sarray-search.c
+
+    * samprint.c, samprint.h: Merged revisions 204076 through 206196 from
+      branches/2017-03-07-multimapper-genes to print information in XT field for
+      transcript splicing and to handle omitting of concordant alignments
+
+    * samheader.c: Don't open a file for OUTPUT_NONE
+
+    * popcount.c, popcount.h: Modified conditions for including our own popcount
+      instructions.  No longer needed if built-in options are available
+
+    * pair.c: Modified compressed format to no longer print tokens or
+      dinucleotides
+
+    * littleendian.h: Added macros for FREAD_FLOATS and FWRITE_FLOATS
+
+    * iit-read.c, iit-read.h: Merged revisions 205322 through 206058 from
+      branches/2017-03-07-multimapper-genes to support --coding in get-genome
+      and to implement IIT_genestruct_chrpos
+
+    * gsnap.c: Merged revisions 204076 through 206184 from
+      branches/2017-03-07-multimapper-genes to add options for transcriptome
+      alignment and omitting concordant output.
+
+    * datadir.c: Modified messages when gmapdb is not found
+
+    * cigar.c: Fixed printing of "*" for cigar with mate is NULL or substrings
+      is NULL
+
+    * block.c: Merged revision 204180 from branches/2017-03-07-multimapper-genes
+      to generalize from 12-mers to oligo size for debugging output
+
+    * stage3hr.c, stage3hr.h: Merged revisions 206187 and 205714 from
+      branches/2017-03-07-multimapper-genes to add behavior for
+      --omit-concordant-uniq and --omit-concordant-mult and to add a
+      splice_score field for all splice types
+
+2017-06-13  twu
+
+    * pair.c, stage3.c: Changed type of chroffset and chrhigh from Chrpos_T to
+      Univcoord_T in trim end functions
+
+2017-06-12  twu
+
+    * gmap.c, output.c, pair.c, pair.h, src, stage3.c, stage3.h: Merged revision
+      204925 from branches/2017-04-02-genome-genome to add bedpe output
+
+    * diag.c, stage2.c: Merged revisions 207196 and 207198 from
+      branches/2017-04-02-genome-genome to improve genome-genome alignment
+
+2017-06-10  twu
+
+    * stage3.c: Using functions now in pair.c
+
+    * samprint.c: Moved some functions to pair.c
+
+    * output.c: Using functions now in pair.c
+
+    * get-genome.c: Allowing for --dump to work with --exons
+
+    * substring.h: Moved typedef of Substring_T early
+
+    * cigar.c, pair.c, pair.h, stage3hr.c, stage3hr.h: Moved some functions to
+      pair.c
+
+    * Makefile.gsnaptoo.am: Including cigar.c and cigar.h for uniqscan and
+      uniqscanl
+
+2017-05-30  twu
+
+    * substring.c, substring.h: Commenting out procedures needed for chrpos_high
+
+    * stage3hr.c, stage3hr.h: Commenting out procedures needed for chrpos_high. 
+      Using procedures from cigar.c.
+
+    * stage3.c: Using procedures from cigar.c
+
+    * stage1hr.c: Added debugging statement
+
+    * pair.c, pair.h: Moved CIGAR printing procedures to cigar.c.  Printing mate
+      cigar in XM field instead of mate chrpos_high.
+
+    * gsnap.c: Using new interfaces to Output_setup and SAM_setup
+
+    * output.c, output.h, samprint.c, samprint.h: Moved setup of merge_samechr_p
+      from output.c to samprint.c
+
+    * samprint.c, samprint.h: Moved CIGAR printing procedures to cigar.c. 
+      Printing mate cigar in XM field instead of mate chrpos_high.
+
+    * Makefile.gsnaptoo.am, cigar.c, cigar.h: Added cigar.c and cigar.h for code
+      relating to printing of CIGAR strings
+
+2017-05-25  twu
+
+    * sarray-read.c: Increased iteration condition, allowing sarray algorithm to
+      work when nmisses_allowed is zero.
+
+2017-05-13  twu
+
+    * stage3.c: In Stage3_merge_chimera, doing peelback to remove any indels at
+      the chimeric junction
+
+2017-05-11  twu
+
+    * output.c, pair.c, pair.h, samprint.c, samprint.h, stage3hr.c, stage3hr.h,
+      substring.c, substring.h: Added printing of mate chrpos high with an XM
+      field
+
+    * stage1hr.c: Removed exception for FREE_ALIGN when nstreams is 1
+
+    * iit_get.c: Commented out printing of total when reading queries from stdin
+
+    * chimera.c, chimera.h, gmap.c: Allowing search for chimera exon-exon
+      boundary to extend for 1 mismatch
+
+    * stage3.c, stage3.h: Implemented procedures Stage3_trim_left and
+      Stage3_trim_right
+
+    * gmap.c: Calling Chimera_find_breakpoint first to set bounds based on
+      sequence, and then Chimera_find_exonexon to find the exon boundary
+
+    * gmap.c: Increased value of CHIMERA_EXTEND from 8 to 20
+
+2017-05-10  twu
+
+    * parserange.c: Added null terminating character after strncpy
+
+2017-05-09  twu
+
+    * shortread.c: Fixed uninitialized variable in nextchar2 and invalid free
+      when skipping in second file
+
+2017-05-08  twu
+
+    * uniqscan.c: Using new interface to Stage1hr_setup
+
+    * gsnap.c, stage1hr.c, stage1hr.h: Added --speed option for GSNAP
+
+    * gmap.c: Set default value for maxintronlen to be 500,000
+
+2017-05-04  twu
+
+    * stage3hr.c: Changed the procedure for resolving overlapping and separate
+      alignments.  Now filtering both the overlapping and separate alignments. 
+      Using expected pairlength and pairlength deviation to select which one to
+      report.
+
+    * stage1hr.c: Turned off the shortcut to skip complete set algorithm if
+      suffix array has found something.  Turned off the shortcut for GMAP
+      pairsearch/halfmapping if nconcordant > 0
+
+    * spanningelt.c: Changed a check procedure to abort rather than exit
+
+    * spanningelt.h: Fixed a typo in a comment
+
+    * merge.c: Made Merge_diagonals non-destructive, by copying the streams into
+      the heap
+
+    * indexdb_hr.c: Added a comment about Merge_uint4 being destructive
+
+    * iit-read.c, iit-read.h: Added IIT_gene_overlapp function used by
+      get-genome with the --coding flag
+
+    * get-genome.c: Added a --coding flag to report only genes that overlap in
+      their coding regions
+
+2017-04-24  twu
+
+    * index.html: Updated for latest version
+
+    * Makefile.am: Added gtf_transcript_splicesites to CLEANFILES
+
+    * pair.c, pair.h: Taking mate_chrnum as an argument in SAM print function
+
+    * output.c, samprint.c, samprint.h: Computing chrnum and mate_chrnum at same
+      time as chrpos and mate_chrpos, to resolve issues with SAM output
+
+2017-04-22  twu
+
+    * samprint.c, stage3hr.c, stage3hr.h: Fixed issue in mate chromosome printed
+      when mate is a translocation
+
+2017-04-13  twu
+
+    * chrom.c: Fixed compare function for alpha_numeric entries
+
+    * Makefile.am: Added an entry for gtf_transcript_splicesites
+
+    * index.html: Changed for latest version
+
+    * configure.ac: Added an entry for gtf_transcript_splicesites
+
+    * gtf_transcript_splicesites.pl.in: Changed output format
+
+    * stage3.c: Revised debugging statements
+
+    * samflags.h, samprint.c, samprint.h: Adding supplementary flag.  Adding
+      information to XT field for transcript splicing.
+
+    * gsnap.c: Turning off trim mismatch for DNA-Seq
+
+2017-04-12  twu
+
+    * gmap.c: Added comments
+
+    * dynprog_end.c: Turned wideband off for extending from medial splicesite,
+      which was causing the end exon to be re-discovered as an indel
+
+2017-04-11  twu
+
+    * gtf_transcript_splicesites.pl.in: Initial import
+
+2017-03-18  twu
+
+    * substring.c: Commented out assertions that don't hold under SNP-tolerant
+      alignment
+
+    * stage3hr.c: Improved debugging statement
+
+    * stage1hr.c: Proceeding to spanning set procedure if the other end has more
+      hits than the number of concordant hits
+
+    * splicing-score.c: Added debugging calls to Maxent_hr procedures, to help
+      in development
+
+    * samprint.c: Added debugging statements
+
+    * maxent_hr.c: Added debugging statements
+
+    * Makefile.gsnaptoo.am: Added files for splicing_score
+
+    * iit-read.c: Fixed memory leak for intron-level known splicing
+
+    * junction.c: Added debugging statements
+
+    * intron.c, intron.h: Added some utility functions
+
+    * indel.c, indel.h: Modified Indel_resolve_middle_deletion to favor and
+      report intron dinucleotides for short deletions
+
+    * gsnap.c: Using new interface to Sarray_setup
+
+2017-03-17  twu
+
+    * sarray-read.c, sarray-read.h: Checking short deletions with length between
+      min_intronlength and max_deletionlen to see if they are introns
+
+2017-03-09  twu
+
+    * get-genome.c: Allowing dump of all sequences from a map file
+
+2017-03-03  twu
+
+    * pairpool.c: Allowing querypos and genomepos of 0
+
+2017-02-24  twu
+
+    * VERSION, config.site.rescomp.prd, config.site.rescomp.tst: Updated version
+      number
+
+    * chrom.c: Added a type ALPHA_NUMERIC and sorting appropriately for those.
+      Stripping "Chr" as well as "chr" from names
+
+    * pairpool.c: In Pairpool_push, not doing anything if querypos or genomepos
+      is less than or equal to 0
+
+    * stage2.c: For convert_to_nucleotides, handling the case where path is NULL
+
+    * gmap.c: Added missing brace
+
+    * gmap.c, stage3.c, stage3.h: Added option --split-large-introns and
+      implemented procedure Stage3_split
+
+    * stage2.c: Renamed variable querypos to curr_querypos in some procedures,
+      so debug9 can be used
+
+2017-02-16  twu
+
+    * iit-read.c: Handling the case in finding unique positions and splices
+      where a gene has no overlapping genes
+
+2017-02-15  twu
+
+    * archive.html, index.html: Updated for latest version
+
+    * VERSION: Updated version number
+
+    * substring.c: Fixed calculation of mandatory_trim_left and
+      mandatory_trim_right
+
+    * indexdb.c: Assigning MMAPPED to positions_high_access when appropriate, to
+      avoid free() error at end of program
+
+    * output.c: Ignoring mergedp in restricting the final result to a single
+      path
+
+    * gmap.c: Allowing value of --suboptimal-score to be a float.  Ignoring
+      mergedp in handling the final result
+
+    * gmap_build.pl.in: Added flag to build genome index in parts
+
+    * substring.c: For default alignment format, filling in stars in regions
+      where the alignment goes past the beginning or end of the genome
+
+    * dynprog_single.c, stage3.c: Added checks against non-positive values for
+      rlength and glength in Dynprog_single_gap.  Also requiring a positive
+      value for rlength in running Dynprog_single_gap over Dynprog_cdna_gap or
+      Dynprog_genome_gap.
+
+    * dynprog.c: Added debugging statements
+
+2017-02-14  twu
+
+    * stage3.c, stage3.h: Added sort comparison procedures to help with local
+      chimeric joins on each chromosome
+
+    * gmap.c: In checking for local chimeric joins, processing each chromosome
+      separately
+
+    * stage3hr.c: Not resolving inside alignment when the coordinates look like
+      a scramble, which can occur with circular chromosomes
+
+    * stage1hr.c: Fixed a memory leak for a non-concordant pair.  Fixed an
+      uninitialized variable for non-spliced alignment
+
+    * oligoindex_hr.h: Commented out obsolete code
+
+    * iit-read.c, iit-read.h: Added support for finding unique splices, and for
+      finding unique positions and splices in a set of genes
+
+    * gmap.c, gsnap.c: Fixed printing of SIMD capabilities for AVX2 and AVX512
+
+    * get-genome.c: Added ability to dump a map file, and the ability to print
+      unique positions among a set of genes
+
+    * genome128_hr.c: Changed builtin commands for trailing and leading zeroes
+      to use the long long versions for 64-bit words
+
+    * genome.c: Commented out messages to stderr for negative coordinates
+
+    * dynprog_genome.c: Increased rewards for canonical intron.  Removed
+      penalties for indels next to a splice site
+
+2017-02-08  twu
+
+    * get-genome.c: Printing presence/absence of unique splices also
+
+2017-01-31  twu
+
+    * get-genome.c, iit-read.c, iit-read.h: Added option --nunique to print
+      number of unique positions
+
+2017-01-27  twu
+
+    * oligoindex_hr.c: Fixed typos in atoi functions for SSE2 code
+
+2017-01-14  twu
+
+    * stage3hr.c: Fixed a memory leak in resolving inner splices
+
+2017-01-13  twu
+
+    * dynprog_end.c: Fixed conditional jump based on finalscore, by not checking
+      when endalign is QUERYEND_NOGAPS
+
+    * stage1hr.c: Fixed uninitialized value for successp.  Using FREE_ALIGN
+      macro
+
+    * spanningelt.c: Using MALLOC_ALIGN instead of MALLOC when needed
+
+    * indexdb_hr.c: Using MALLOC_ALIGN instead of MALLOC when needed
+
+    * oligoindex_hr.c: Including atoi.h
+
+    * samprint.c, substring.c, substring.h: Fixed coordinates reported in XT
+      field, which depend on the donor and acceptor strands
+
+    * merge.c: Using macros FREE_ALIGN and CHECK_ALIGN
+
+    * mem.h: Defined macros FREE_ALIGN and CHECK_ALIGN
+
+2017-01-10  twu
+
+    * genome128_hr.c: Fixed incorrect AVX macro
+
+    * oligoindex_hr.c: Changed _mm_bsrli_si128 to _mm_srli_si128.  Added atoi
+      and ttoc modes to all code.
+
+2017-01-09  twu
+
+    * gsnap.c: Removed option --microexon-spliceprob
+
+2017-01-06  twu
+
+    * stage1hr.c: Using alignments with most matches, even if they are
+      translocations compared with other hitpairs
+
+2017-01-02  twu
+
+    * genome128_hr.c: For handling middle rows, using <= and >= to endptr and
+      startptr, instead of < and >
+
+2017-01-01  twu
+
+    * stage3.c: Using new interface to Dynprog_end5_gap and Dynprog_end3_gap
+
+    * stage1hr.c: In identify_all_segments, filtering out diagonals <
+      querylength from the merged array
+
+    * dynprog_single.c: Using use8p_size
+
+    * dynprog_simd.h: Removing fixed definition for SIMD_MAXLENGTH_EPI8
+
+    * dynprog_simd.c: Added assertions for traceback procedures for vertical and
+      horizontal jumps not to go past the main diagonal.  Put macros around
+      memory fences in debugging print procedures.
+
+    * dynprog_end.c, dynprog_end.h: Using use8p_size and introduced parameter
+      require_pos_score_p
+
+    * dynprog_cdna.c, dynprog_genome.c: Using use8p_size
+
+    * dynprog.c, dynprog.h: Introducing an array for use8p_size that depends on
+      the mismatch type
+
+2016-12-30  twu
+
+    * stage3hr.c: Not converting splices when resolving insides of
+      paired-end-reads
+
+2016-12-29  twu
+
+    * dynprog_genome.c, gsnap.c, pair.c, pair.h, sarray-read.c, smooth.c, src,
+      stage1hr.c, stage1hr.h, stage2.c, stage3.c, stage3.h, stage3hr.c,
+      stage3hr.h, substring.c, substring.h, trunk, uniqscan.c: Merged revisions
+      201789 through 202030 from branches/2016-12-18-stage2-soa to make various
+      improvements to alignments
+
+    * stage1hr.c: Added debugging statements
+
+    * indexdb_hr.c: Checking for nmerged being 0
+
+2016-12-16  twu
+
+    * ax_ext.m4: Not adding -mno options to an Intel compiler
+
+    * indexdb_hr.c: Returning an array created by malloc, rather than
+      _mm_malloc, from the merge version of Indexdb_merge_compoundpos
+
+    * sarray-read.c: Using qsort instead of Sedgesort, because of seg faults
+      observed on Intel compiler
+
+    * Makefile.gsnaptoo.am: Including merge.c, merge.h, merge-heap.c, and
+      merge-heap.h where needed
+
+    * stage1hr.c: Providing a version of identify_all_segments for LARGE_GENOMES
+
+    * indexdb_hr.c: Cleaned up code so there are three versions of
+      Indexdb_merge_compoundpos.  Fixed the merge version.
+
+    * oligoindex_hr.c: Fixed faulty svn merge
+
+    * genome128_hr.c: Fixed faulty svn merge, and hid shift_lo and shift_hi
+      procedures
+
+    * Makefile.gsnaptoo.am, indexdb_hr.c, mem.h, merge-heap.c, merge-heap.h,
+      merge.c, merge.h, src, stage1hr.c, trunk: Merged revisions 200992 through
+      201743 from branches/2016-11-28-simd-merging to revise SIMD merge code
+
+    * spanningelt.c, spanningelt.h: Merged revisions 200992 through 201743 from
+      branches/2016-11-28-simd-merging to change a calloc to a malloc
+
+    * Makefile.gsnaptoo.am, ax_cpuid_intel.m4, ax_cpuid_non_intel.m4, ax_ext.m4,
+      configure.ac, cpuid.c, src, trunk: Merged revisions 200476 through 201735
+      from branches/2016-11-14-avx512 to make provisions for AVX-512
+
+    * gmap.c: Merged revisions 200476 through 201735 from
+      branches/2016-11-14-avx512 to change Genome_hr_user_setup to
+      Genome_hr_setup
+
+    * gmap_select.c, gmapl_select.c, gsnap_select.c, gsnapl_select.c: Merged
+      revisions 200476 through 201735 from branches/2016-11-14-avx512 to add
+      provisions for AVX-512
+
+    * genome128_hr.c, genome128_hr.h: Merged revisions 200476 through 201735
+      from branches/2016-11-14-avx512 to add shift and wrap procedures
+
+    * oligoindex_hr.c, oligoindex_hr.h: Merged revisions 200476 through 201735
+      from branches/2016-11-14-avx512 to revise algorithms substantially
+
+    * oligoindex_old.c, oligoindex_old.h: Merged revisions 200476 through 201735
+      from branches/2016-11-14-avx512 to make checking code work with current
+      code
+
+    * stage2.c: Merged revisions 200476 through 201735 from
+      branches/2016-11-14-avx512 to fix debugging comment
+
+    * sarray-read.c: Merged revisions 200476 through 201735 from
+      branches/2016-11-14-avx512 to add AVX-512 code
+
+    * stage1hr.c: Fixed uninitialized variable
+
+2016-12-13  twu
+
+    * VERSION, config.site.rescomp.prd, config.site.rescomp.tst, genome128_hr.c,
+      src, trunk: Merged revisions 201421 through 201532 from
+      branches/2016-12-09-genomebits-serial-simd to change structure of SIMD
+      code in genome128_hr.c
+
+    * index.html: Updated for version 2016-11-07
+
+    * configure.ac: Allowing sse4.1 and sse4.2 as responses to --with-simd-level
+
+    * samprint.c: Added missing pair of braces
+
+    * gsnap.c, stage1hr.c, stage1hr.h: Removed references to indel_knownsplice
+      mode for gmap
+
+2016-11-18  twu
+
+    * oligoindex_hr.c: Fixed debugging statements to use SIMD commands in count
+      procedures
+
+2016-11-16  twu
+
+    * ax_ext.m4: Removed -mno... flags for compilers
+
+    * configure.ac: Restricting response to --with-simd-level
+
+    * ax_cpuid_intel.m4: Fixed configure issue for AVX2 support using Intel
+      compiler
+
+2016-11-14  twu
+
+    * pair.c: Removed initialization of static variables
+
+    * gsnap.c, outbuffer.c, outbuffer.h, output.c, output.h: Separate output
+      files for single-end and paired-end results
+
 2016-11-08  twu
 
     * sam_sort.c: Added printing at monitor intervals
diff --git a/Makefile.in b/Makefile.in
index 36e3f57..1dc67f7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -266,6 +266,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
+LIBGMAP_SO_VERSION = @LIBGMAP_SO_VERSION@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
@@ -278,9 +279,6 @@ MAKEINFO = @MAKEINFO@
 MANIFEST_TOOL = @MANIFEST_TOOL@
 MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@
 MKDIR_P = @MKDIR_P@
-MPICC = @MPICC@
-MPILIBS = @MPILIBS@
-MPI_CFLAGS = @MPI_CFLAGS@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -305,6 +303,7 @@ SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIMD_AVX2_CFLAGS = @SIMD_AVX2_CFLAGS@
+SIMD_AVX512_CFLAGS = @SIMD_AVX512_CFLAGS@
 SIMD_SSE2_CFLAGS = @SIMD_SSE2_CFLAGS@
 SIMD_SSE4_1_CFLAGS = @SIMD_SSE4_1_CFLAGS@
 SIMD_SSE4_2_CFLAGS = @SIMD_SSE4_2_CFLAGS@
diff --git a/VERSION b/VERSION
index 6f55558..c710ef3 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2016-11-07
\ No newline at end of file
+2017-08-15
\ No newline at end of file
diff --git a/config/ax_cpuid_intel.m4 b/config/ax_cpuid_intel.m4
index d553e91..b22d412 100644
--- a/config/ax_cpuid_intel.m4
+++ b/config/ax_cpuid_intel.m4
@@ -43,7 +43,16 @@ AC_LANG_PUSH([C])
 	[AC_LANG_PROGRAM([[#include <immintrin.h>]],
                          [[return _may_i_use_cpu_feature(_FEATURE_AVX2 | _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE) ? 0 : 9;]])],
         [AC_MSG_RESULT(yes)
-         ax_cv_cpu_has_sse42_ext=yes],
+         ax_cv_cpu_has_avx2_ext=yes],
+	[AC_MSG_RESULT(no)])
+
+# Test for AVX512 support
+  AC_MSG_CHECKING(for avx512 support)
+  AC_RUN_IFELSE(
+	[AC_LANG_PROGRAM([[#include <immintrin.h>]],
+                         [[return _may_i_use_cpu_feature(_FEATURE_AVX512F | _FEATURE_AVX512CD) ? 0 : 9;]])],
+        [AC_MSG_RESULT(yes)
+         ax_cv_cpu_has_avx512_ext=yes],
 	[AC_MSG_RESULT(no)])
 
 AC_LANG_POP([C])
diff --git a/config/ax_cpuid_non_intel.m4 b/config/ax_cpuid_non_intel.m4
index 7b5636e..e2cf96b 100644
--- a/config/ax_cpuid_non_intel.m4
+++ b/config/ax_cpuid_non_intel.m4
@@ -118,15 +118,15 @@ static int check_xcr0_ymm () {
  if ((abcd[/*ECX*/2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) {
    return 9;
  } else if (!check_xcr0_ymm()) {
-   return 9;
+   return 8;
  } else {
    run_cpuid(7, 0, abcd);
    if ((abcd[/*EBX*/1] & avx2_bmi12_mask) != avx2_bmi12_mask) {
-     return 9;
+     return 7;
    } else {
      run_cpuid(0x80000001, 0, abcd);
      if ((abcd[/*ECX*/2] & lzcnt_mask) != lzcnt_mask) {
-       return 9;
+       return 6;
      } else {
        return 0;
      }
@@ -152,5 +152,36 @@ static void run_cpuid (uint32_t eax, uint32_t ecx, uint32_t *abcd) {
          ax_cv_cpu_has_bmi2_ext=yes],
 	[AC_MSG_RESULT(no)])
 
+
+# Test for AVX512 support
+  AC_MSG_CHECKING(for avx512 support)
+  AC_RUN_IFELSE(
+	[AC_LANG_PROGRAM([[#include <stdint.h>
+static void run_cpuid (uint32_t eax, uint32_t ecx, uint32_t *abcd) {
+  uint32_t ebx, edx;
+  __asm__ ("cpuid" : "+b" (ebx), "+a" (eax), "+c" (ecx), "=d" (edx));
+  abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;}
+static int check_xcr0_zmm () {
+  uint32_t xcr0;
+  uint32_t zmm_ymm_xmm = ((7 << 5) | (1 << 2) | (1 << 1));
+  __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
+  return ((xcr0 & zmm_ymm_xmm) == zmm_ymm_xmm);}]],
+[[uint32_t abcd[4];
+ uint32_t osxsave_mask = (1 << 27);
+ uint32_t avx512_mask = (/*512F*/(1 << 16) | /*512CD*/(1 << 28));
+ run_cpuid(1, 0, abcd);
+ if ((abcd[/*ECX*/2] & osxsave_mask) != osxsave_mask) {
+   return 9;
+ } else if (!check_xcr0_zmm()) {
+   return 8;
+ } else if ((abcd[/*EBX*/1] & avx512_mask) != avx512_mask) {
+   return 0; /* Should fail here, but book/Web examples skip */
+ } else {
+   return 0;
+ }]])],
+        [AC_MSG_RESULT(yes)
+         ax_cv_cpu_has_avx512_ext=yes],
+	[AC_MSG_RESULT(no)])
+
 AC_LANG_POP([C])
 ])
diff --git a/config/ax_ext.m4 b/config/ax_ext.m4
index 2832615..e7e0076 100644
--- a/config/ax_ext.m4
+++ b/config/ax_ext.m4
@@ -93,7 +93,6 @@ AC_DEFUN([AX_EXT],
           TEST_CFLAGS="-mssse3"
 	else
           TEST_CFLAGS="$SIMD_SSE2_CFLAGS -mssse3"
-          SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-ssse3"
         fi
         AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_ssse3_ext=yes], [ax_cv_ext_compile_problem=yes])
         if test x"$ax_cv_compile_ssse3_ext" != xyes; then
@@ -111,6 +110,9 @@ AC_DEFUN([AX_EXT],
             AC_MSG_RESULT([yes])
             ax_make_ssse3=yes
 	    SIMD_SSSE3_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+              SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-ssse3"
+            fi
 #           AC_DEFINE(HAVE_SSSE3,1,[Define to 1 if you support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) -- Defines run-type
           fi            
         fi
@@ -122,8 +124,6 @@ AC_DEFUN([AX_EXT],
           TEST_CFLAGS="-msse4.1"
 	else
           TEST_CFLAGS="$SIMD_SSSE3_CFLAGS -msse4.1"
-	  SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.1"
-	  SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.1"
         fi
 	AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_sse41_ext=yes], [ax_cv_ext_compile_problem=yes])
 	if test x"$ax_cv_compile_sse41_ext" != xyes; then
@@ -141,6 +141,10 @@ AC_DEFUN([AX_EXT],
             AC_MSG_RESULT([yes])
             ax_make_sse41=yes
             SIMD_SSE4_1_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+  	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.1"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.1"
+            fi
 #  	    AC_DEFINE(HAVE_SSE4_1,1,[Define to 1 if you support SSE4.1 (Streaming SIMD Extensions 4.1) instructions]) -- Not used
           fi            
 	fi
@@ -152,9 +156,6 @@ AC_DEFUN([AX_EXT],
           TEST_CFLAGS="-march=corei7"
         else
           TEST_CFLAGS="$SIMD_SSE4_1_CFLAGS -msse4.2"
-	  SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.2"
-	  SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.2"
-	  SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-sse4.2"
         fi
         AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_sse42_ext=yes], [ax_cv_ext_compile_problem=yes])
         if test x"$ax_cv_compile_sse42_ext" != xyes; then
@@ -172,6 +173,11 @@ AC_DEFUN([AX_EXT],
             AC_MSG_RESULT([yes])
             ax_make_sse42=yes
             SIMD_SSE4_2_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.2"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.2"
+	      SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-sse4.2"
+            fi
           fi
         fi
       fi
@@ -267,10 +273,6 @@ AC_DEFUN([AX_EXT],
 	  TEST_CFLAGS="-march=core-avx2"
 	else
           TEST_CFLAGS="$SIMD_SSE4_2_CFLAGS -mavx2 -mbmi2"
-	  SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx2 -mno-bmi2"
-	  SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx2 -mno-bmi2"
-	  SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx2 -mno-bmi2"
-	  SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx2 -mno-bmi2"
         fi
         AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_avx2_ext=yes], [ax_cv_ext_compile_problem=yes])
         if test x"$ax_cv_compile_avx2_ext" != xyes; then
@@ -288,6 +290,12 @@ AC_DEFUN([AX_EXT],
             AC_MSG_RESULT([yes])
             ax_make_avx2=yes
             SIMD_AVX2_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+  	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx2"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx2"
+	      SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx2"
+	      SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx2"
+            fi
 #  	    AC_DEFINE(HAVE_AVX2,1,[Define to 1 if you support AVX2 (Advanced Vector Extensions 2) instructions]) -- Defines run-type
           fi
         fi
@@ -312,6 +320,12 @@ AC_DEFUN([AX_EXT],
             AC_MSG_WARN([Your compiler supports -mbmi2 but not your linker.  Can you try another linker or update yours?])
           else
             SIMD_AVX2_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+  	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-bmi2"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-bmi2"
+	      SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-bmi2"
+	      SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-bmi2"
+	    fi
             AC_MSG_CHECKING(for _pext support)
             AC_RUN_IFELSE(
 	      [AC_LANG_PROGRAM([[#include <immintrin.h>]],
@@ -323,6 +337,41 @@ AC_DEFUN([AX_EXT],
         fi
       fi
 
+
+      if test x"$ax_cv_cpu_has_avx512_ext" = xyes; then
+        CFLAGS=
+        if test x"$ax_cv_c_compiler_vendor" = xintel; then
+          TEST_CFLAGS="-xCOMMON-AVX512"
+        else
+          TEST_CFLAGS="$SIMD_AVX2_CFLAGS -mavx512f -mavx512cd"
+        fi
+        AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_avx512_ext=yes], [ax_cv_ext_compile_problem=yes])
+        if test x"$ax_cv_compile_avx512_ext" != xyes; then
+          AC_MSG_WARN([Your CPU supports AVX512 instructions but not your compiler.  Can you try another compiler or update yours?])
+        else
+          CFLAGS=$TEST_CFLAGS
+          AC_MSG_CHECKING(for nmmintrin.h header file)
+          AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h>])],
+                         [ax_cv_link_nmmintrin_h=yes],
+		         [ax_cv_ext_linker_problem=yes])
+          if test x"$ax_cv_link_nmmintrin_h" != xyes; then
+            AC_MSG_RESULT([no])
+            AC_MSG_WARN([Your compiler supports AVX512 instructions but not your linker.])
+          else
+            AC_MSG_RESULT([yes])
+            ax_make_avx512=yes
+            SIMD_AVX512_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx512f -mno-avx512cd"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx512f -mno-avx512cd"
+	      SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx512f -mno-avx512cd"
+	      SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx512f -mno-avx512cd"
+	      SIMD_AVX2_CFLAGS="$SIMD_AVX2_CFLAGS -mno-avx512f -mno-avx512cd"
+            fi
+          fi
+        fi
+      fi
+
     ;;
   esac
 
@@ -333,6 +382,7 @@ AC_DEFUN([AX_EXT],
   AC_SUBST(SIMD_SSE4_1_CFLAGS)
   AC_SUBST(SIMD_SSE4_2_CFLAGS)
   AC_SUBST(SIMD_AVX2_CFLAGS)
+  AC_SUBST(SIMD_AVX512_CFLAGS)
 
 ])
 
diff --git a/configure b/configure
index e5f319c..ec2f52c 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2016-11-07.
+# Generated by GNU Autoconf 2.69 for gmap 2017-08-15.
 #
 # Report bugs to <Thomas Wu <twu at gene.com>>.
 #
@@ -590,8 +590,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='gmap'
 PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2016-11-07'
-PACKAGE_STRING='gmap 2016-11-07'
+PACKAGE_VERSION='2017-08-15'
+PACKAGE_STRING='gmap 2017-08-15'
 PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
 PACKAGE_URL=''
 
@@ -650,6 +650,9 @@ MAKE_SSE4_2_FALSE
 MAKE_SSE4_2_TRUE
 MAKE_AVX2_FALSE
 MAKE_AVX2_TRUE
+MAKE_AVX512_FALSE
+MAKE_AVX512_TRUE
+SIMD_AVX512_CFLAGS
 SIMD_AVX2_CFLAGS
 SIMD_SSE4_2_CFLAGS
 SIMD_SSE4_1_CFLAGS
@@ -684,10 +687,6 @@ EGREP
 GREP
 SED
 LIBTOOL
-MPI_FOUND_FALSE
-MPI_FOUND_TRUE
-MPILIBS
-MPICC
 PERL
 BINDIR
 MAINTAINER_FALSE
@@ -752,7 +751,7 @@ build_os
 build_vendor
 build_cpu
 build
-MPI_CFLAGS
+LIBGMAP_SO_VERSION
 CFLAGS
 target_alias
 host_alias
@@ -823,12 +822,10 @@ enable_bzlib
 host_alias
 target_alias
 CFLAGS
-MPI_CFLAGS
 CC
 LDFLAGS
 LIBS
 CPPFLAGS
-MPICC
 LT_SYS_LIBRARY_PATH
 CPP
 MAX_STACK_READLENGTH'
@@ -1372,7 +1369,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures gmap 2016-11-07 to adapt to many kinds of systems.
+\`configure' configures gmap 2017-08-15 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1443,7 +1440,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of gmap 2016-11-07:";;
+     short | recursive ) echo "Configuration of gmap 2017-08-15:";;
    esac
   cat <<\_ACEOF
 
@@ -1495,20 +1492,18 @@ Optional Packages:
   --with-sysroot[=DIR]    Search for dependent libraries within DIR (or the
                           compiler's sysroot if not specified).
   --with-simd-level=STRING
-                          User-selected SIMD level (sse2, ssse3, sse41, sse42,
-                          avx2)
+                          User-selected SIMD level (none, sse2, ssse3,
+                          sse41/sse4.1, sse42/sse4.2, avx2, avx512)
   --with-gmapdb=DIR       Default GMAP database directory
 
 Some influential environment variables:
   CFLAGS      Compiler flags (default: -O3 -fomit-frame-pointer)
-  MPI_CFLAGS  Compiler flags (default: -O3)
   CC          C compiler command
   LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
               nonstandard directory <lib dir>
   LIBS        libraries to pass to the linker, e.g. -l<library>
   CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
               you have headers in a nonstandard directory <include dir>
-  MPICC       MPI C compiler command
   LT_SYS_LIBRARY_PATH
               User-defined run-time library search path.
   CPP         C preprocessor
@@ -1582,7 +1577,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-gmap configure 2016-11-07
+gmap configure 2017-08-15
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1680,73 +1675,6 @@ fi
 
 } # ac_fn_c_try_link
 
-# ac_fn_c_check_func LINENO FUNC VAR
-# ----------------------------------
-# Tests whether FUNC exists, setting the cache variable VAR accordingly
-ac_fn_c_check_func ()
-{
-  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if eval \${$3+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
-   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
-#define $2 innocuous_$2
-
-/* System header to define __stub macros and hopefully few prototypes,
-    which can conflict with char $2 (); below.
-    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
-    <limits.h> exists even on freestanding compilers.  */
-
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-
-#undef $2
-
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char $2 ();
-/* The GNU C library defines this for functions which it implements
-    to always fail with ENOSYS.  Some functions are actually named
-    something starting with __ and the normal name is an alias.  */
-#if defined __stub_$2 || defined __stub___$2
-choke me
-#endif
-
-int
-main ()
-{
-return $2 ();
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  eval "$3=yes"
-else
-  eval "$3=no"
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-fi
-eval ac_res=\$$3
-	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
-
-} # ac_fn_c_check_func
-
 # ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
 # -------------------------------------------------------
 # Tests whether HEADER exists and can be compiled using the include files in
@@ -1857,6 +1785,73 @@ fi
 
 } # ac_fn_c_try_run
 
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $2 (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main ()
+{
+return $2 ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  eval "$3=yes"
+else
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+
 # ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
 # -------------------------------------------------------
 # Tests whether HEADER exists, giving a warning if it cannot be compiled using
@@ -2188,7 +2183,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by gmap $as_me 2016-11-07, which was
+It was created by gmap $as_me 2017-08-15, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -2538,8 +2533,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
 $as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-11-07" >&5
-$as_echo "2016-11-07" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-08-15" >&5
+$as_echo "2017-08-15" >&6; }
 
 
 ### Read defaults
@@ -2624,12 +2619,10 @@ else
 $as_echo "$CFLAGS" >&6; }
 fi
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking MPI_CFLAGS" >&5
-$as_echo_n "checking MPI_CFLAGS... " >&6; }
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $MPI_CFLAGS" >&5
-$as_echo "$MPI_CFLAGS" >&6; }
-
+#AC_MSG_CHECKING(MPI_CFLAGS)
+#AC_ARG_VAR([MPI_CFLAGS], [Compiler flags (default: -O3)])
+#AC_MSG_RESULT($MPI_CFLAGS)
+#AC_SUBST(MPI_CFLAGS)
 
 
 
@@ -2668,6 +2661,10 @@ ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
 
 
 
+LIBGMAP_SO_VERSION=1:0:0
+
+#AC_SUBST([LIBGMAP_API_VERSION], [1.0])
+
 # Make sure we can run config.sub.
 $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
   as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
@@ -4404,7 +4401,7 @@ fi
 
 # Define the identity of the package.
  PACKAGE='gmap'
- VERSION='2016-11-07'
+ VERSION='2017-08-15'
 
 
 cat >>confdefs.h <<_ACEOF
@@ -4718,28 +4715,6 @@ $as_echo_n "checking bindir... " >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $BINDIR" >&5
 $as_echo "$BINDIR" >&6; }
 
-# Works in conjunction with AC_PROG_LIBTOOL -- Commented out because no libraries being built
-#AC_MSG_CHECKING(whether to link statically)
-#AC_ARG_ENABLE([static-linking],
-#               AC_HELP_STRING([--enable-static-linking],
-#                              [Link binaries statically (default=no)]),
-#              [answer="$enableval"],
-#              [answer=""])
-#case x"$answer" in
-#	xyes)
-#	AC_MSG_RESULT(enabled)
-#	STATIC_LDFLAG="-all-static"
-#	;;
-#
-#	xno)
-#	AC_MSG_RESULT(disabled)
-#	;;
-#
-#	x)
-#	AC_MSG_RESULT(not specified so disabled by default)
-#	;;
-#esac
-#AC_SUBST(STATIC_LDFLAG)
 
 
 # Checks for programs.
@@ -5387,266 +5362,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler vendor" >&5
-$as_echo_n "checking for C compiler vendor... " >&6; }
-if ${ax_cv_c_compiler_vendor+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-    # note: don't check for gcc first since some other compilers define __GNUC__
-  vendors="intel:     __ICC,__ECC,__INTEL_COMPILER
-           ibm:       __xlc__,__xlC__,__IBMC__,__IBMCPP__
-           pathscale: __PATHCC__,__PATHSCALE__
-           clang:     __clang__
-           cray:      _CRAYC
-           fujitsu:   __FUJITSU
-           gnu:       __GNUC__
-           sun:       __SUNPRO_C,__SUNPRO_CC
-           hp:        __HP_cc,__HP_aCC
-           dec:       __DECC,__DECCXX,__DECC_VER,__DECCXX_VER
-           borland:   __BORLANDC__,__CODEGEARC__,__TURBOC__
-           comeau:    __COMO__
-           kai:       __KCC
-           lcc:       __LCC__
-           sgi:       __sgi,sgi
-           microsoft: _MSC_VER
-           metrowerks: __MWERKS__
-           watcom:    __WATCOMC__
-           portland:  __PGI
-	   tcc:       __TINYC__
-           unknown:   UNKNOWN"
-  for ventest in $vendors; do
-    case $ventest in
-      *:) vendor=$ventest; continue ;;
-      *)  vencpp="defined("`echo $ventest | sed 's/,/) || defined(/g'`")" ;;
-    esac
-    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-int
-main ()
-{
-
-      #if !($vencpp)
-        thisisanerror;
-      #endif
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  break
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-  done
-  ax_cv_c_compiler_vendor=`echo $vendor | cut -d: -f1`
-
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_c_compiler_vendor" >&5
-$as_echo "$ax_cv_c_compiler_vendor" >&6; }
-
-# AM_CONDITIONAL(INTEL_COMPILER,test "x$ax_cv_c_compiler_vendor" = xintel)
-
-
-
-
-
-
-
-	for ac_prog in mpicc hcc mpxlc_r mpxlc mpcc cmpicc cc
-do
-  # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_path_MPICC+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  case $MPICC in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_MPICC="$MPICC" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
-    ac_cv_path_MPICC="$as_dir/$ac_word$ac_exec_ext"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-  ;;
-esac
-fi
-MPICC=$ac_cv_path_MPICC
-if test -n "$MPICC"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $MPICC" >&5
-$as_echo "$MPICC" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-  test -n "$MPICC" && break
-done
-
-	ax_mpi_save_CC="$CC"
-	CC="$MPICC"
-
-
-
-if test x = x"$MPILIBS"; then
-	ac_fn_c_check_func "$LINENO" "MPI_Init" "ac_cv_func_MPI_Init"
-if test "x$ac_cv_func_MPI_Init" = xyes; then :
-  MPILIBS=" "
-fi
-
-fi
-
-if test x = x"$MPILIBS"; then
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for MPI_Init in -lmpi" >&5
-$as_echo_n "checking for MPI_Init in -lmpi... " >&6; }
-if ${ac_cv_lib_mpi_MPI_Init+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-lmpi  $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char MPI_Init ();
-int
-main ()
-{
-return MPI_Init ();
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  ac_cv_lib_mpi_MPI_Init=yes
-else
-  ac_cv_lib_mpi_MPI_Init=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpi_MPI_Init" >&5
-$as_echo "$ac_cv_lib_mpi_MPI_Init" >&6; }
-if test "x$ac_cv_lib_mpi_MPI_Init" = xyes; then :
-  MPILIBS="-lmpi"
-fi
-
-fi
-if test x = x"$MPILIBS"; then
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for MPI_Init in -lmpich" >&5
-$as_echo_n "checking for MPI_Init in -lmpich... " >&6; }
-if ${ac_cv_lib_mpich_MPI_Init+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-lmpich  $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char MPI_Init ();
-int
-main ()
-{
-return MPI_Init ();
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  ac_cv_lib_mpich_MPI_Init=yes
-else
-  ac_cv_lib_mpich_MPI_Init=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mpich_MPI_Init" >&5
-$as_echo "$ac_cv_lib_mpich_MPI_Init" >&6; }
-if test "x$ac_cv_lib_mpich_MPI_Init" = xyes; then :
-  MPILIBS="-lmpich"
-fi
-
-fi
-
-if test x != x"$MPILIBS"; then
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mpi.h" >&5
-$as_echo_n "checking for mpi.h... " >&6; }
-	cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <mpi.h>
-int
-main ()
-{
-
-  ;
-  return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-else
-  MPILIBS=""
-		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-
-CC="$ax_mpi_save_CC"
-
-
-
-# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
-if test x = x"$MPILIBS"; then
-
-        :
-else
-
-$as_echo "#define HAVE_MPI 1" >>confdefs.h
-
-        :
-fi
-    # Sets MPICC to use for isolated source files that need it
-# AC_PROG_CC_MPI   # This sets CC to mpicc
-
- if test "x$MPILIBS" != x; then
-  MPI_FOUND_TRUE=
-  MPI_FOUND_FALSE='#'
-else
-  MPI_FOUND_TRUE='#'
-  MPI_FOUND_FALSE=
-fi
-
-
 case `pwd` in
   *\ * | *\	*)
     { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not cope well with whitespace in \`pwd\`" >&5
@@ -13636,13 +13351,78 @@ CC=$lt_save_CC
 
 
 
-        ac_config_commands="$ac_config_commands libtool"
+        ac_config_commands="$ac_config_commands libtool"
+
+
+
+
+# Only expand once:
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler vendor" >&5
+$as_echo_n "checking for C compiler vendor... " >&6; }
+if ${ax_cv_c_compiler_vendor+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+    # note: don't check for gcc first since some other compilers define __GNUC__
+  vendors="intel:     __ICC,__ECC,__INTEL_COMPILER
+           ibm:       __xlc__,__xlC__,__IBMC__,__IBMCPP__
+           pathscale: __PATHCC__,__PATHSCALE__
+           clang:     __clang__
+           cray:      _CRAYC
+           fujitsu:   __FUJITSU
+           gnu:       __GNUC__
+           sun:       __SUNPRO_C,__SUNPRO_CC
+           hp:        __HP_cc,__HP_aCC
+           dec:       __DECC,__DECCXX,__DECC_VER,__DECCXX_VER
+           borland:   __BORLANDC__,__CODEGEARC__,__TURBOC__
+           comeau:    __COMO__
+           kai:       __KCC
+           lcc:       __LCC__
+           sgi:       __sgi,sgi
+           microsoft: _MSC_VER
+           metrowerks: __MWERKS__
+           watcom:    __WATCOMC__
+           portland:  __PGI
+	   tcc:       __TINYC__
+           unknown:   UNKNOWN"
+  for ventest in $vendors; do
+    case $ventest in
+      *:) vendor=$ventest; continue ;;
+      *)  vencpp="defined("`echo $ventest | sed 's/,/) || defined(/g'`")" ;;
+    esac
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+      #if !($vencpp)
+        thisisanerror;
+      #endif
 
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  break
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  done
+  ax_cv_c_compiler_vendor=`echo $vendor | cut -d: -f1`
 
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ax_cv_c_compiler_vendor" >&5
+$as_echo "$ax_cv_c_compiler_vendor" >&6; }
 
+# AM_CONDITIONAL(INTEL_COMPILER,test "x$ax_cv_c_compiler_vendor" = xintel)
 
-# Only expand once:
 
+# AX_MPI    # Sets MPICC to use for isolated source files that need it
+# AC_PROG_CC_MPI   # This sets CC to mpicc
+# AM_CONDITIONAL(MPI_FOUND,test "x$MPILIBS" != x)
 
 
 # Checks for libraries.
@@ -17151,30 +16931,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-#AC_MSG_CHECKING(whether sse2 is enabled)
-#AC_ARG_ENABLE([sse2],
-#	      AC_HELP_STRING([--enable-sse2],
-#                             [Enable sse2 simd commands if they compile and run (default=yes).]),
-#              [answer="$enableval"],
-#              [answer=""])
-#case x"$answer" in
-#     xyes)
-#     AC_MSG_RESULT(enabled)
-#     ax_cv_want_sse2_ext=yes
-#     ;;
-#
-#     xno)
-#     AC_MSG_RESULT(disabled by user)
-#     ax_cv_want_sse2_ext=no
-#     ;;
-#
-#     x)
-#     AC_MSG_RESULT([not specified so enabled by default])
-#     ax_cv_want_sse2_ext=yes
-#     ;;
-#esac
-
-
 
 
 
@@ -17226,125 +16982,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-
-#AC_MSG_CHECKING(whether ssse3 is enabled)
-#AC_ARG_ENABLE([ssse3],
-#	      AC_HELP_STRING([--enable-ssse3],
-#                             [Enable ssse3 simd commands if they compile and run (default=yes).  Requires that sse2 be enabled.]),
-#              [answer="$enableval"],
-#              [answer=""])
-#if test "$ax_cv_want_sse2_ext" = no; then
-#   AC_MSG_RESULT([disabled because the user disabled sse2])
-#   ax_cv_want_ssse3_ext=no
-#else
-#    case x"$answer" in
-#	 xyes)
-#	 AC_MSG_RESULT(enabled)
-#	 ax_cv_want_ssse3_ext=yes
-#	 ;;
-#
-#	 xno)
-#	 AC_MSG_RESULT(disabled by user)
-#	 ax_cv_want_ssse3_ext=no
-#	 ;;
-#
-#	 x)
-#	 AC_MSG_RESULT([not specified so enabled by default])
-#	 ax_cv_want_ssse3_ext=yes
-#	 ;;
-#    esac
-#fi
-
-
-
-#AC_MSG_CHECKING(whether sse4.1 is enabled)
-#AC_ARG_ENABLE([sse4.1],
-#	      AC_HELP_STRING([--enable-sse4.1],
-#                             [Enable sse4.1 simd commands if they compile and run (default=yes).  Requires that ssse3 be enabled.]),
-#              [answer="$enableval"],
-#              [answer=""])
-#if test "$ax_cv_want_ssse3_ext" = no; then
-#   AC_MSG_RESULT([disabled because the user disabled ssse3])
-#   ax_cv_want_sse41_ext=no
-#else
-#    case x"$answer" in
-#	 xyes)
-#	 AC_MSG_RESULT(enabled)
-#	 ax_cv_want_sse41_ext=yes
-#	 ;;
-#
-#	 xno)
-#	 AC_MSG_RESULT(disabled by user)
-#	 ax_cv_want_sse41_ext=no
-#	 ;;
-#
-#	 x)
-#	 AC_MSG_RESULT([not specified so enabled by default])
-#	 ax_cv_want_sse41_ext=yes
-#	 ;;
-#    esac
-#fi
-
-
-#AC_MSG_CHECKING(whether sse4.2 is enabled)
-#AC_ARG_ENABLE([sse4.2],
-#	      AC_HELP_STRING([--enable-sse4.2],
-#                             [Enable sse4.2 simd commands if they compile and run (default=yes).  Requires that sse4.1 be enabled.]),
-#              [answer="$enableval"],
-#              [answer=""])
-#if test "$ax_cv_want_sse41_ext" = no; then
-#   AC_MSG_RESULT([disabled because the user disabled sse4.1])
-#   ax_cv_want_sse42_ext=no
-#else
-#    case x"$answer" in
-#	 xyes)
-#	 AC_MSG_RESULT(enabled)
-#	 ax_cv_want_sse42_ext=yes
-#	 ;;
-#
-#	 xno)
-#	 AC_MSG_RESULT(disabled by user)
-#	 ax_cv_want_sse42_ext=no
-#	 ;;
-#
-#	 x)
-#	 AC_MSG_RESULT([not specified so enabled by default])
-#	 ax_cv_want_sse42_ext=yes
-#	 ;;
-#    esac
-#fi
-
-
-#AC_MSG_CHECKING(whether avx2 is enabled)
-#AC_ARG_ENABLE([avx2],
-#	      AC_HELP_STRING([--enable-avx2],
-#                             [Enable avx2 simd commands if they compile and run (default=yes).  Requires that avx be enabled.]),
-#              [answer="$enableval"],
-#              [answer=""])
-#if test "$ax_cv_want_avx_ext" = no; then
-#   AC_MSG_RESULT([disabled because the user disabled avx])
-#   ax_cv_want_avx2_ext=no
-#else
-#    case x"$answer" in
-#	 xyes)
-#	 AC_MSG_RESULT(enabled)
-#	 ax_cv_want_avx2_ext=yes
-#	 ;;
-#
-#	 xno)
-#	 AC_MSG_RESULT(disabled by user)
-#	 ax_cv_want_avx2_ext=no
-#	 ;;
-#
-#	 x)
-#	 AC_MSG_RESULT([not specified so enabled by default])
-#	 ax_cv_want_avx2_ext=yes
-#	 ;;
-#    esac
-#fi
-
-
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for whether simd is enabled" >&5
 $as_echo_n "checking for whether simd is enabled... " >&6; }
 # Check whether --enable-simd was given.
@@ -17538,7 +17175,40 @@ _ACEOF
 if ac_fn_c_try_run "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
-         ax_cv_cpu_has_sse42_ext=yes
+         ax_cv_cpu_has_avx2_ext=yes
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+
+# Test for AVX512 support
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for avx512 support" >&5
+$as_echo_n "checking for avx512 support... " >&6; }
+  if test "$cross_compiling" = yes; then :
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run test program while cross compiling
+See \`config.log' for more details" "$LINENO" 5; }
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <immintrin.h>
+int
+main ()
+{
+return _may_i_use_cpu_feature(_FEATURE_AVX512F | _FEATURE_AVX512CD) ? 0 : 9;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+         ax_cv_cpu_has_avx512_ext=yes
 else
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
@@ -17834,15 +17504,15 @@ uint32_t abcd[4];
  if ((abcd[/*ECX*/2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) {
    return 9;
  } else if (!check_xcr0_ymm()) {
-   return 9;
+   return 8;
  } else {
    run_cpuid(7, 0, abcd);
    if ((abcd[/*EBX*/1] & avx2_bmi12_mask) != avx2_bmi12_mask) {
-     return 9;
+     return 7;
    } else {
      run_cpuid(0x80000001, 0, abcd);
      if ((abcd[/*ECX*/2] & lzcnt_mask) != lzcnt_mask) {
-       return 9;
+       return 6;
      } else {
        return 0;
      }
@@ -17905,6 +17575,61 @@ rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
 fi
 
 
+
+# Test for AVX512 support
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for avx512 support" >&5
+$as_echo_n "checking for avx512 support... " >&6; }
+  if test "$cross_compiling" = yes; then :
+  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run test program while cross compiling
+See \`config.log' for more details" "$LINENO" 5; }
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdint.h>
+static void run_cpuid (uint32_t eax, uint32_t ecx, uint32_t *abcd) {
+  uint32_t ebx, edx;
+  __asm__ ("cpuid" : "+b" (ebx), "+a" (eax), "+c" (ecx), "=d" (edx));
+  abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;}
+static int check_xcr0_zmm () {
+  uint32_t xcr0;
+  uint32_t zmm_ymm_xmm = ((7 << 5) | (1 << 2) | (1 << 1));
+  __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
+  return ((xcr0 & zmm_ymm_xmm) == zmm_ymm_xmm);}
+int
+main ()
+{
+uint32_t abcd[4];
+ uint32_t osxsave_mask = (1 << 27);
+ uint32_t avx512_mask = (/*512F*/(1 << 16) | /*512CD*/(1 << 28));
+ run_cpuid(1, 0, abcd);
+ if ((abcd[/*ECX*/2] & osxsave_mask) != osxsave_mask) {
+   return 9;
+ } else if (!check_xcr0_zmm()) {
+   return 8;
+ } else if ((abcd[/*EBX*/1] & avx512_mask) != avx512_mask) {
+   return 0; /* Should fail here, but book/Web examples skip */
+ } else {
+   return 0;
+ }
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+         ax_cv_cpu_has_avx512_ext=yes
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -18068,7 +17793,6 @@ $as_echo "yes" >&6; }
           TEST_CFLAGS="-mssse3"
 	else
           TEST_CFLAGS="$SIMD_SSE2_CFLAGS -mssse3"
-          SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-ssse3"
         fi
         as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh`
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5
@@ -18142,6 +17866,9 @@ $as_echo "$as_me: WARNING: Your compiler supports SSSE3 instructions but not you
 $as_echo "yes" >&6; }
             ax_make_ssse3=yes
 	    SIMD_SSSE3_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+              SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-ssse3"
+            fi
 #           AC_DEFINE(HAVE_SSSE3,1,[Define to 1 if you support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) -- Defines run-type
           fi
         fi
@@ -18153,8 +17880,6 @@ $as_echo "yes" >&6; }
           TEST_CFLAGS="-msse4.1"
 	else
           TEST_CFLAGS="$SIMD_SSSE3_CFLAGS -msse4.1"
-	  SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.1"
-	  SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.1"
         fi
 	as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh`
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5
@@ -18228,6 +17953,10 @@ $as_echo "$as_me: WARNING: Your compiler supports SSE4.1 instructions but not yo
 $as_echo "yes" >&6; }
             ax_make_sse41=yes
             SIMD_SSE4_1_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+  	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.1"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.1"
+            fi
 #  	    AC_DEFINE(HAVE_SSE4_1,1,[Define to 1 if you support SSE4.1 (Streaming SIMD Extensions 4.1) instructions]) -- Not used
           fi
 	fi
@@ -18239,9 +17968,6 @@ $as_echo "yes" >&6; }
           TEST_CFLAGS="-march=corei7"
         else
           TEST_CFLAGS="$SIMD_SSE4_1_CFLAGS -msse4.2"
-	  SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.2"
-	  SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.2"
-	  SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-sse4.2"
         fi
         as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh`
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5
@@ -18315,6 +18041,11 @@ $as_echo "$as_me: WARNING: Your compiler supports SSE4.2 instructions but not yo
 $as_echo "yes" >&6; }
             ax_make_sse42=yes
             SIMD_SSE4_2_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.2"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.2"
+	      SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-sse4.2"
+            fi
           fi
         fi
       fi
@@ -18621,10 +18352,6 @@ fi
 	  TEST_CFLAGS="-march=core-avx2"
 	else
           TEST_CFLAGS="$SIMD_SSE4_2_CFLAGS -mavx2 -mbmi2"
-	  SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx2 -mno-bmi2"
-	  SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx2 -mno-bmi2"
-	  SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx2 -mno-bmi2"
-	  SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx2 -mno-bmi2"
         fi
         as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh`
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5
@@ -18698,6 +18425,12 @@ $as_echo "$as_me: WARNING: Your compiler supports AVX2 instructions but not your
 $as_echo "yes" >&6; }
             ax_make_avx2=yes
             SIMD_AVX2_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+  	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx2"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx2"
+	      SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx2"
+	      SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx2"
+            fi
 #  	    AC_DEFINE(HAVE_AVX2,1,[Define to 1 if you support AVX2 (Advanced Vector Extensions 2) instructions]) -- Defines run-type
           fi
         fi
@@ -18775,6 +18508,12 @@ rm -f core conftest.err conftest.$ac_objext \
 $as_echo "$as_me: WARNING: Your compiler supports -mbmi2 but not your linker.  Can you try another linker or update yours?" >&2;}
           else
             SIMD_AVX2_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+  	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-bmi2"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-bmi2"
+	      SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-bmi2"
+	      SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-bmi2"
+	    fi
             { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _pext support" >&5
 $as_echo_n "checking for _pext support... " >&6; }
             if test "$cross_compiling" = yes; then :
@@ -18812,6 +18551,97 @@ fi
         fi
       fi
 
+
+      if test x"$ax_cv_cpu_has_avx512_ext" = xyes; then
+        CFLAGS=
+        if test x"$ax_cv_c_compiler_vendor" = xintel; then
+          TEST_CFLAGS="-xCOMMON-AVX512"
+        else
+          TEST_CFLAGS="$SIMD_AVX2_CFLAGS -mavx512f -mavx512cd"
+        fi
+        as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh`
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5
+$as_echo_n "checking whether C compiler accepts $TEST_CFLAGS... " >&6; }
+if eval \${$as_CACHEVAR+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+
+  ax_check_save_flags=$CFLAGS
+  CFLAGS="$CFLAGS  $TEST_CFLAGS"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  eval "$as_CACHEVAR=yes"
+else
+  eval "$as_CACHEVAR=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  CFLAGS=$ax_check_save_flags
+fi
+eval ac_res=\$$as_CACHEVAR
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+if test x"`eval 'as_val=${'$as_CACHEVAR'};$as_echo "$as_val"'`" = xyes; then :
+  ax_cv_compile_avx512_ext=yes
+else
+  ax_cv_ext_compile_problem=yes
+fi
+
+        if test x"$ax_cv_compile_avx512_ext" != xyes; then
+          { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your CPU supports AVX512 instructions but not your compiler.  Can you try another compiler or update yours?" >&5
+$as_echo "$as_me: WARNING: Your CPU supports AVX512 instructions but not your compiler.  Can you try another compiler or update yours?" >&2;}
+        else
+          CFLAGS=$TEST_CFLAGS
+          { $as_echo "$as_me:${as_lineno-$LINENO}: checking for nmmintrin.h header file" >&5
+$as_echo_n "checking for nmmintrin.h header file... " >&6; }
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <nmmintrin.h>
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ax_cv_link_nmmintrin_h=yes
+else
+  ax_cv_ext_linker_problem=yes
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+          if test x"$ax_cv_link_nmmintrin_h" != xyes; then
+            { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+            { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your compiler supports AVX512 instructions but not your linker." >&5
+$as_echo "$as_me: WARNING: Your compiler supports AVX512 instructions but not your linker." >&2;}
+          else
+            { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+            ax_make_avx512=yes
+            SIMD_AVX512_CFLAGS=$CFLAGS
+            if test x"$ax_cv_c_compiler_vendor" != xintel; then
+	      SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx512f -mno-avx512cd"
+	      SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx512f -mno-avx512cd"
+	      SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx512f -mno-avx512cd"
+	      SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx512f -mno-avx512cd"
+	      SIMD_AVX2_CFLAGS="$SIMD_AVX2_CFLAGS -mno-avx512f -mno-avx512cd"
+            fi
+          fi
+        fi
+      fi
+
     ;;
   esac
 
@@ -18824,8 +18654,11 @@ fi
 
 
 
+
 if test "x$ax_cv_want_simd" = xno; then
   compile_level=none
+elif test "x$ax_make_avx512" = xyes; then
+  compile_level=avx512
 elif test "x$ax_make_avx2" = xyes; then
   compile_level=avx2
 elif test "x$ax_make_sse42" = xyes; then
@@ -18856,10 +18689,43 @@ $as_echo_n "checking for user-selected SIMD level... " >&6; }
 	compile_level=$answer
 	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $compile_level" >&5
 $as_echo "$compile_level" >&6; }
+	case $compile_level in
+	avx512)
+	;;
+	avx2)
+	;;
+	sse42)
+	;;
+	sse4.2)
+	compile_level = sse42;
+	;;
+	sse41)
+	;;
+	sse4.1)
+	compile_level = sse41;
+	;;
+	ssse3)
+	;;
+	sse2)
+	;;
+	none)
+	;;
+	*)
+	as_fn_error $? "Compiler level $compile_level not recognized.  Allowed values: none, sse2, ssse3, sse4.1 (or sse41), sse4.2 (or sse42), avx2, avx512" "$LINENO" 5
+	;;
+	esac
 fi
 
 
 
+ if test "$compile_level" = avx512; then
+  MAKE_AVX512_TRUE=
+  MAKE_AVX512_FALSE='#'
+else
+  MAKE_AVX512_TRUE='#'
+  MAKE_AVX512_FALSE=
+fi
+
  if test "$compile_level" = avx2; then
   MAKE_AVX2_TRUE=
   MAKE_AVX2_FALSE='#'
@@ -19488,6 +19354,7 @@ ac_config_files="$ac_config_files Makefile"
 
 ac_config_files="$ac_config_files src/Makefile"
 
+#AC_CONFIG_FILES([lib/gmap-${LIBGMAP_API_VERSION}.pc:lib/gmap.pc.in])
 #AC_CONFIG_FILES([mpi/Makefile])
 ac_config_files="$ac_config_files util/Makefile"
 
@@ -19515,6 +19382,8 @@ ac_config_files="$ac_config_files util/ensembl_genes.pl"
 
 ac_config_files="$ac_config_files util/gtf_splicesites.pl"
 
+ac_config_files="$ac_config_files util/gtf_transcript_splicesites.pl"
+
 ac_config_files="$ac_config_files util/gtf_introns.pl"
 
 ac_config_files="$ac_config_files util/gtf_genes.pl"
@@ -19687,11 +19556,11 @@ if test -z "${MAINTAINER_TRUE}" && test -z "${MAINTAINER_FALSE}"; then
   as_fn_error $? "conditional \"MAINTAINER\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
-if test -z "${MPI_FOUND_TRUE}" && test -z "${MPI_FOUND_FALSE}"; then
-  as_fn_error $? "conditional \"MPI_FOUND\" was never defined.
+
+if test -z "${MAKE_AVX512_TRUE}" && test -z "${MAKE_AVX512_FALSE}"; then
+  as_fn_error $? "conditional \"MAKE_AVX512\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
 fi
-
 if test -z "${MAKE_AVX2_TRUE}" && test -z "${MAKE_AVX2_FALSE}"; then
   as_fn_error $? "conditional \"MAKE_AVX2\" was never defined.
 Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -20109,7 +19978,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by gmap $as_me 2016-11-07, which was
+This file was extended by gmap $as_me 2017-08-15, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -20175,7 +20044,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-gmap config.status 2016-11-07
+gmap config.status 2017-08-15
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
@@ -20605,6 +20474,7 @@ do
     "util/psl_genes.pl") CONFIG_FILES="$CONFIG_FILES util/psl_genes.pl" ;;
     "util/ensembl_genes.pl") CONFIG_FILES="$CONFIG_FILES util/ensembl_genes.pl" ;;
     "util/gtf_splicesites.pl") CONFIG_FILES="$CONFIG_FILES util/gtf_splicesites.pl" ;;
+    "util/gtf_transcript_splicesites.pl") CONFIG_FILES="$CONFIG_FILES util/gtf_transcript_splicesites.pl" ;;
     "util/gtf_introns.pl") CONFIG_FILES="$CONFIG_FILES util/gtf_introns.pl" ;;
     "util/gtf_genes.pl") CONFIG_FILES="$CONFIG_FILES util/gtf_genes.pl" ;;
     "util/gff3_splicesites.pl") CONFIG_FILES="$CONFIG_FILES util/gff3_splicesites.pl" ;;
@@ -22049,6 +21919,13 @@ $as_echo_n "checking compile level... " >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $compile_level" >&5
 $as_echo "$compile_level" >&6; }
 
+if test "$compile_level" = avx512; then
+   { $as_echo "$as_me:${as_lineno-$LINENO}: checking AVX512 compiler flags to be used" >&5
+$as_echo_n "checking AVX512 compiler flags to be used... " >&6; }
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: $SIMD_AVX512_CFLAGS" >&5
+$as_echo "$SIMD_AVX512_CFLAGS" >&6; }
+fi
+
 if test "$compile_level" = avx2; then
    { $as_echo "$as_me:${as_lineno-$LINENO}: checking AVX2 compiler flags to be used" >&5
 $as_echo_n "checking AVX2 compiler flags to be used... " >&6; }
diff --git a/configure.ac b/configure.ac
index d4061f5..624b732 100644
--- a/configure.ac
+++ b/configure.ac
@@ -73,10 +73,10 @@ else
 	AC_MSG_RESULT($CFLAGS)
 fi
 
-AC_MSG_CHECKING(MPI_CFLAGS)
-AC_ARG_VAR([MPI_CFLAGS], [Compiler flags (default: -O3)])
-AC_MSG_RESULT($MPI_CFLAGS)
-AC_SUBST(MPI_CFLAGS)
+#AC_MSG_CHECKING(MPI_CFLAGS)
+#AC_ARG_VAR([MPI_CFLAGS], [Compiler flags (default: -O3)])
+#AC_MSG_RESULT($MPI_CFLAGS)
+#AC_SUBST(MPI_CFLAGS)
 
 
 AC_CONFIG_SRCDIR([src/gmap.c])
@@ -86,6 +86,9 @@ AC_CONFIG_AUX_DIR([config])
 
 AC_CONFIG_MACRO_DIR([config])
 
+AC_SUBST([LIBGMAP_SO_VERSION], [1:0:0])
+#AC_SUBST([LIBGMAP_API_VERSION], [1.0])
+
 AC_CANONICAL_SYSTEM
 
 AC_MSG_CHECKING(for non-Intel CPU)
@@ -135,28 +138,6 @@ ACX_EXPAND(BINDIR,$bindir)
 AC_SUBST(BINDIR)
 AC_MSG_RESULT($BINDIR)
 
-# Works in conjunction with AC_PROG_LIBTOOL -- Commented out because no libraries being built
-#AC_MSG_CHECKING(whether to link statically)
-#AC_ARG_ENABLE([static-linking],
-#               AC_HELP_STRING([--enable-static-linking],
-#                              [Link binaries statically (default=no)]),
-#              [answer="$enableval"],
-#              [answer=""])
-#case x"$answer" in
-#	xyes)
-#	AC_MSG_RESULT(enabled)
-#	STATIC_LDFLAG="-all-static"
-#	;;
-#	
-#	xno)
-#	AC_MSG_RESULT(disabled)
-#	;;
-#
-#	x)
-#	AC_MSG_RESULT(not specified so disabled by default)
-#	;;
-#esac	
-#AC_SUBST(STATIC_LDFLAG)
 
 
 # Checks for programs.
@@ -165,19 +146,18 @@ ACX_PATH_PERL
 
 AC_PROG_CC
 AM_PROG_CC_C_O
+AC_PROG_LIBTOOL
 AX_COMPILER_VENDOR
 # AM_CONDITIONAL(INTEL_COMPILER,test "x$ax_cv_c_compiler_vendor" = xintel)
 
 
-AX_MPI    # Sets MPICC to use for isolated source files that need it
+# AX_MPI    # Sets MPICC to use for isolated source files that need it
 # AC_PROG_CC_MPI   # This sets CC to mpicc
+# AM_CONDITIONAL(MPI_FOUND,test "x$MPILIBS" != x)
 
-AM_CONDITIONAL(MPI_FOUND,test "x$MPILIBS" != x)
-
-AC_PROG_LIBTOOL
 
 # Checks for libraries.
-AC_CHECK_LIB(m, rint)
+AC_CHECK_LIB([m], rint)
 
 #AC_CHECK_LIB(popt, poptGetContext, [answer="yes"], [answer="no"])
 #if test x"$answer" == xyes; then
@@ -365,151 +345,8 @@ ACX_BUILTIN_POPCOUNT
 ACX_ASM_BSR
 
 
-#AC_MSG_CHECKING(whether sse2 is enabled)
-#AC_ARG_ENABLE([sse2],
-#	      AC_HELP_STRING([--enable-sse2],
-#                             [Enable sse2 simd commands if they compile and run (default=yes).]),
-#              [answer="$enableval"],
-#              [answer=""])
-#case x"$answer" in
-#     xyes)
-#     AC_MSG_RESULT(enabled)
-#     ax_cv_want_sse2_ext=yes
-#     ;;
-#
-#     xno)
-#     AC_MSG_RESULT(disabled by user)
-#     ax_cv_want_sse2_ext=no
-#     ;;
-#
-#     x)
-#     AC_MSG_RESULT([not specified so enabled by default])
-#     ax_cv_want_sse2_ext=yes
-#     ;;
-#esac	
-
-
 ACX_SSE2_SHIFT_DEFECT
 
-
-#AC_MSG_CHECKING(whether ssse3 is enabled)
-#AC_ARG_ENABLE([ssse3],
-#	      AC_HELP_STRING([--enable-ssse3],
-#                             [Enable ssse3 simd commands if they compile and run (default=yes).  Requires that sse2 be enabled.]),
-#              [answer="$enableval"],
-#              [answer=""])
-#if test "$ax_cv_want_sse2_ext" = no; then
-#   AC_MSG_RESULT([disabled because the user disabled sse2])
-#   ax_cv_want_ssse3_ext=no
-#else
-#    case x"$answer" in
-#	 xyes)
-#	 AC_MSG_RESULT(enabled)
-#	 ax_cv_want_ssse3_ext=yes
-#	 ;;
-#
-#	 xno)
-#	 AC_MSG_RESULT(disabled by user)
-#	 ax_cv_want_ssse3_ext=no
-#	 ;;
-#
-#	 x)
-#	 AC_MSG_RESULT([not specified so enabled by default])
-#	 ax_cv_want_ssse3_ext=yes
-#	 ;;
-#    esac	
-#fi
-
-
-
-#AC_MSG_CHECKING(whether sse4.1 is enabled)
-#AC_ARG_ENABLE([sse4.1],
-#	      AC_HELP_STRING([--enable-sse4.1],
-#                             [Enable sse4.1 simd commands if they compile and run (default=yes).  Requires that ssse3 be enabled.]),
-#              [answer="$enableval"],
-#              [answer=""])
-#if test "$ax_cv_want_ssse3_ext" = no; then
-#   AC_MSG_RESULT([disabled because the user disabled ssse3])
-#   ax_cv_want_sse41_ext=no
-#else
-#    case x"$answer" in
-#	 xyes)
-#	 AC_MSG_RESULT(enabled)
-#	 ax_cv_want_sse41_ext=yes
-#	 ;;
-#
-#	 xno)
-#	 AC_MSG_RESULT(disabled by user)
-#	 ax_cv_want_sse41_ext=no
-#	 ;;
-#
-#	 x)
-#	 AC_MSG_RESULT([not specified so enabled by default])
-#	 ax_cv_want_sse41_ext=yes
-#	 ;;
-#    esac	
-#fi
-
-
-#AC_MSG_CHECKING(whether sse4.2 is enabled)
-#AC_ARG_ENABLE([sse4.2],
-#	      AC_HELP_STRING([--enable-sse4.2],
-#                             [Enable sse4.2 simd commands if they compile and run (default=yes).  Requires that sse4.1 be enabled.]),
-#              [answer="$enableval"],
-#              [answer=""])
-#if test "$ax_cv_want_sse41_ext" = no; then
-#   AC_MSG_RESULT([disabled because the user disabled sse4.1])
-#   ax_cv_want_sse42_ext=no
-#else
-#    case x"$answer" in
-#	 xyes)
-#	 AC_MSG_RESULT(enabled)
-#	 ax_cv_want_sse42_ext=yes
-#	 ;;
-#
-#	 xno)
-#	 AC_MSG_RESULT(disabled by user)
-#	 ax_cv_want_sse42_ext=no
-#	 ;;
-#
-#	 x)
-#	 AC_MSG_RESULT([not specified so enabled by default])
-#	 ax_cv_want_sse42_ext=yes
-#	 ;;
-#    esac	
-#fi
-
-
-#AC_MSG_CHECKING(whether avx2 is enabled)
-#AC_ARG_ENABLE([avx2],
-#	      AC_HELP_STRING([--enable-avx2],
-#                             [Enable avx2 simd commands if they compile and run (default=yes).  Requires that avx be enabled.]),
-#              [answer="$enableval"],
-#              [answer=""])
-#if test "$ax_cv_want_avx_ext" = no; then
-#   AC_MSG_RESULT([disabled because the user disabled avx])
-#   ax_cv_want_avx2_ext=no
-#else
-#    case x"$answer" in
-#	 xyes)
-#	 AC_MSG_RESULT(enabled)
-#	 ax_cv_want_avx2_ext=yes
-#	 ;;
-#
-#	 xno)
-#	 AC_MSG_RESULT(disabled by user)
-#	 ax_cv_want_avx2_ext=no
-#	 ;;
-#
-#	 x)
-#	 AC_MSG_RESULT([not specified so enabled by default])
-#	 ax_cv_want_avx2_ext=yes
-#	 ;;
-#    esac	
-#fi
-
-
-
 AC_MSG_CHECKING(for whether simd is enabled)
 AC_ARG_ENABLE([simd],
 	      AC_HELP_STRING([--enable-simd],
@@ -543,6 +380,8 @@ fi
 AX_EXT
 if test "x$ax_cv_want_simd" = xno; then
   compile_level=none
+elif test "x$ax_make_avx512" = xyes; then
+  compile_level=avx512
 elif test "x$ax_make_avx2" = xyes; then
   compile_level=avx2
 elif test "x$ax_make_sse42" = xyes; then
@@ -561,17 +400,43 @@ fi
 # User-selected compile level
 AC_ARG_WITH([simd-level],
             AC_HELP_STRING([--with-simd-level=STRING],
-                           [User-selected SIMD level (sse2, ssse3, sse41, sse42, avx2)]),
+                           [User-selected SIMD level (none, sse2, ssse3, sse41/sse4.1, sse42/sse4.2, avx2, avx512)]),
             [answer="$withval"],
             [answer=""])
 if test x"$answer" != x; then
 	AC_MSG_CHECKING(for user-selected SIMD level)
 	compile_level=$answer
 	AC_MSG_RESULT($compile_level)
+	case $compile_level in
+	avx512)
+	;;
+	avx2)
+	;;
+	sse42)
+	;;
+	sse4.2)
+	compile_level = sse42;
+	;;
+	sse41)
+	;;
+	sse4.1)
+	compile_level = sse41;
+	;;
+	ssse3)
+	;;
+	sse2)
+	;;
+	none)
+	;;
+	*)
+	AC_MSG_ERROR([Compiler level $compile_level not recognized.  Allowed values: none, sse2, ssse3, sse4.1 (or sse41), sse4.2 (or sse42), avx2, avx512])
+	;;
+	esac
 fi
 
 
 
+AM_CONDITIONAL(MAKE_AVX512,[test "$compile_level" = avx512])
 AM_CONDITIONAL(MAKE_AVX2,[test "$compile_level" = avx2])
 AM_CONDITIONAL(MAKE_SSE4_2,[test "$compile_level" = sse42])
 AM_CONDITIONAL(MAKE_SSE4_1,[test "$compile_level" = sse41])
@@ -697,6 +562,7 @@ AC_SUBST(BZLIB_LIBS)
 
 AC_CONFIG_FILES([Makefile])
 AC_CONFIG_FILES([src/Makefile])
+#AC_CONFIG_FILES([lib/gmap-${LIBGMAP_API_VERSION}.pc:lib/gmap.pc.in])
 #AC_CONFIG_FILES([mpi/Makefile])
 AC_CONFIG_FILES([util/Makefile])
 AC_CONFIG_FILES([util/gmap_compress.pl])
@@ -711,6 +577,7 @@ AC_CONFIG_FILES([util/psl_introns.pl])
 AC_CONFIG_FILES([util/psl_genes.pl])
 AC_CONFIG_FILES([util/ensembl_genes.pl])
 AC_CONFIG_FILES([util/gtf_splicesites.pl])
+AC_CONFIG_FILES([util/gtf_transcript_splicesites.pl])
 AC_CONFIG_FILES([util/gtf_introns.pl])
 AC_CONFIG_FILES([util/gtf_genes.pl])
 AC_CONFIG_FILES([util/gff3_splicesites.pl])
@@ -820,6 +687,11 @@ AC_MSG_RESULT($POPCNT_CFLAGS)
 AC_MSG_CHECKING(compile level)
 AC_MSG_RESULT($compile_level)
 
+if test "$compile_level" = avx512; then
+   AC_MSG_CHECKING(AVX512 compiler flags to be used)
+   AC_MSG_RESULT($SIMD_AVX512_CFLAGS)
+fi
+
 if test "$compile_level" = avx2; then
    AC_MSG_CHECKING(AVX2 compiler flags to be used)
    AC_MSG_RESULT($SIMD_AVX2_CFLAGS)
diff --git a/src/Makefile.am b/src/Makefile.am
index c567e17..f73ce68 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -8,6 +8,32 @@ EXTRA_DIST = mpidebug.c mpidebug.h master.c master.h
 # get-genome so they can handle both small and large genomes at run
 # time
 
+#lib_LTLIBRARIES = libgmap- at LIBGMAP_API_VERSION@.la
+lib_LTLIBRARIES = libgmap.la
+
+
+
+include_HEADERS = fopen.h bool.h types.h separator.h comp.h \
+ except.h assert.h mem.h \
+ intlistdef.h intlist.h listdef.h list.h \
+ doublelist.h \
+ littleendian.h bigendian.h \
+ interval.h uintlist.h uint8list.h \
+ iitdef.h iit-read.h iit-write.h parserange.h \
+ univinterval.h iit-read-univ.h \
+ table.h tableuint.h uinttable.h \
+ stopwatch.h semaphore.h access.h \
+ chrom.h filestring.h \
+ md5.h complement.h bzip2.h sequence.h \
+ genomicpos.h \
+ bitpack64-read.h bitpack64-readtwo.h \
+ maxent_hr.h \
+ popcount.h genome128_hr.h \
+ compress.h bytecoding.h sarray-read.h \
+ mode.h chrnum.h genome.h samflags.h \
+ datadir.h
+
+
 bin_PROGRAMS = cpuid gmap gmapl get-genome gmapindex iit_store iit_get iit_dump \
                gsnap gsnapl uniqscan uniqscanl snpindex cmetindex atoiindex \
                sam_sort
@@ -45,6 +71,40 @@ if MAKE_AVX2
   bin_PROGRAMS += gsnap.avx2
   bin_PROGRAMS += gsnapl.avx2
 endif
+if MAKE_AVX512
+  bin_PROGRAMS += gmap.avx512
+  bin_PROGRAMS += gmapl.avx512
+  bin_PROGRAMS += gsnap.avx512
+  bin_PROGRAMS += gsnapl.avx512
+endif
+
+
+LIBGMAP_LA_FILES = fopen.h bool.h types.h separator.h comp.h \
+ except.c except.h assert.c assert.h mem.c mem.h \
+ intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
+ doublelist.c doublelist.h \
+ littleendian.c littleendian.h bigendian.c bigendian.h \
+ interval.c interval.h uintlist.c uintlist.h uint8list.c uint8list.h \
+ iit-read.c iit-read.h iit-write.c iit-write.h parserange.c parserange.h \
+ univinterval.c univinterval.h iit-read-univ.c iit-read-univ.h \
+ stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
+ table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h \
+ chrom.c chrom.h filestring.c filestring.h \
+ md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
+ genomicpos.c genomicpos.h \
+ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ maxent_hr.c maxent_hr.h \
+ popcount.c popcount.h genome128_hr.c genome128_hr.h \
+ compress.c compress.h bytecoding.c bytecoding.h sarray-read.c sarray-read.h \
+ mode.h chrnum.c chrnum.h genome.c genome.h \
+ datadir.c datadir.h
+
+libgmap_CC = $(PTHREAD_CC)
+libgmap_la_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) -fPIC -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\"
+libgmap_la_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) -version-info $(LIBGMAP_SO_VERSION)
+libgmap_la_LIBADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_libgmap_la_SOURCES = $(LIBGMAP_LA_FILES)
+
 
 
 CPUID_FILES = bool.h cpuid.c cpuid.h
@@ -98,6 +158,7 @@ GMAP_FILES = fopen.h bool.h types.h separator.h comp.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  genome-write.c genome-write.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h block.c block.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
@@ -160,6 +221,12 @@ gmap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
 gmap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
 dist_gmap_avx2_SOURCES = $(GMAP_FILES)
 
+gmap_avx512_CC = $(PTHREAD_CC)
+gmap_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS)
+gmap_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gmap_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_gmap_avx512_SOURCES = $(GMAP_FILES)
+
 
 
 GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
@@ -176,6 +243,7 @@ GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  genome-write.c genome-write.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h block.c block.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
@@ -238,6 +306,12 @@ gmapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
 gmapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
 dist_gmapl_avx2_SOURCES = $(GMAPL_FILES)
 
+gmapl_avx512_CC = $(PTHREAD_CC)
+gmapl_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS)
+gmapl_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gmapl_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_gmapl_avx512_SOURCES = $(GMAPL_FILES)
+
 
 GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
  except.c except.h assert.c assert.h mem.c mem.h \
@@ -253,11 +327,12 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
  genome.c genome.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
  chrnum.c chrnum.h \
- maxent_hr.c maxent_hr.h samflags.h samprint.c samprint.h \
+ maxent_hr.c maxent_hr.h cigar.c cigar.h samflags.h samprint.c samprint.h \
  mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
  spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
  comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
@@ -272,7 +347,8 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
  splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
  bytecoding.c bytecoding.h \
  univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \
- stage1hr.c stage1hr.h \
+ sarray-search.c sarray-search.h \
+ merge-heap.c merge-heap.h stage1hr.c stage1hr.h \
  request.c request.h resulthr.c resulthr.h output.c output.h \
  inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
  datadir.c datadir.h mode.h parserange.c parserange.h \
@@ -318,6 +394,12 @@ gsnap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
 gsnap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
 dist_gsnap_avx2_SOURCES = $(GSNAP_FILES)
 
+gsnap_avx512_CC = $(PTHREAD_CC)
+gsnap_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS)
+gsnap_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gsnap_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_gsnap_avx512_SOURCES = $(GSNAP_FILES)
+
 
 
 
@@ -335,11 +417,12 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
  genome.c genome.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
  chrnum.c chrnum.h \
- maxent_hr.c maxent_hr.h samflags.h samprint.c samprint.h \
+ maxent_hr.c maxent_hr.h cigar.c cigar.h samflags.h samprint.c samprint.h \
  mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
  spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
  comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
@@ -352,7 +435,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
  chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
  splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
  splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
- stage1hr.c stage1hr.h \
+ merge-heap.c merge-heap.h stage1hr.c stage1hr.h \
  request.c request.h resulthr.c resulthr.h output.c output.h \
  inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
  datadir.c datadir.h mode.h parserange.c parserange.h \
@@ -397,6 +480,12 @@ gsnapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
 gsnapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
 dist_gsnapl_avx2_SOURCES = $(GSNAPL_FILES)
 
+gsnapl_avx512_CC = $(PTHREAD_CC)
+gsnapl_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS)
+gsnapl_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gsnapl_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_gsnapl_avx512_SOURCES = $(GSNAPL_FILES)
+
 
 # Build as a non-SIMD program
 UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
@@ -412,6 +501,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
  genome.c genome.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
@@ -419,6 +509,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
  maxent_hr.c maxent_hr.h \
  mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
  spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
+ cigar.c cigar.h \
  comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
  orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
  intron.c intron.h boyer-moore.c boyer-moore.h changepoint.c changepoint.h pbinom.c pbinom.h \
@@ -431,7 +522,8 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
  splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
  bytecoding.c bytecoding.h \
  univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \
- stage1hr.c stage1hr.h resulthr.c resulthr.h \
+ sarray-search.c sarray-search.h \
+ merge-heap.c merge-heap.h stage1hr.c stage1hr.h resulthr.c resulthr.h \
  datadir.c datadir.h mode.h parserange.c parserange.h \
  getopt.c getopt1.c getopt.h uniqscan.c
 
@@ -456,6 +548,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
  genome.c genome.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
@@ -463,6 +556,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
  maxent_hr.c maxent_hr.h \
  mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
  spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
+ cigar.c cigar.h \
  comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
  orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
  intron.c intron.h boyer-moore.c boyer-moore.h changepoint.c changepoint.h pbinom.c pbinom.h \
@@ -473,7 +567,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
  chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
  splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
  splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
- stage1hr.c stage1hr.h resulthr.c resulthr.h \
+ merge-heap.c merge-heap.h stage1hr.c stage1hr.h resulthr.c resulthr.h \
  datadir.c datadir.h mode.h parserange.c parserange.h \
  getopt.c getopt1.c getopt.h uniqscan.c
 
@@ -675,6 +769,7 @@ iit_dump_LDADD = $(PTHREAD_LIBS)
 dist_iit_dump_SOURCES = $(IIT_DUMP_FILES)
 
 
+
 SAM_SORT_FILES = bool.h types.h \
  except.c except.h assert.c assert.h mem.c mem.h \
  littleendian.c littleendian.h bigendian.c bigendian.h \
@@ -709,7 +804,7 @@ dist_sam_sort_SOURCES = $(SAM_SORT_FILES)
 # genome.c genome.h \
 # genomicpos.c genomicpos.h \
 # chrnum.c chrnum.h \
-# maxent.c maxent.h \
+# maxent.c maxent.h maxent_hr.c maxent_hr.h \
 # branchpoint.c branchpoint.h \
 # parserange.c parserange.h datadir.c datadir.h getopt.c getopt1.c getopt.h splicing-score.c
 #
diff --git a/src/Makefile.in b/src/Makefile.in
index a2e957d..2b2e097 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -14,6 +14,8 @@
 
 @SET_MAKE@
 
+
+
 VPATH = @srcdir@
 am__is_gnu_make = { \
   if test -z '$(MAKELEVEL)'; then \
@@ -97,7 +99,7 @@ bin_PROGRAMS = cpuid$(EXEEXT) gmap$(EXEEXT) gmapl$(EXEEXT) \
 	sam_sort$(EXEEXT) gmap.nosimd$(EXEEXT) gmapl.nosimd$(EXEEXT) \
 	gsnap.nosimd$(EXEEXT) gsnapl.nosimd$(EXEEXT) $(am__EXEEXT_1) \
 	$(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \
-	$(am__EXEEXT_5)
+	$(am__EXEEXT_5) $(am__EXEEXT_6)
 @MAKE_SSE2_TRUE at am__append_1 = gmap.sse2 gmapl.sse2 gsnap.sse2 \
 @MAKE_SSE2_TRUE@	gsnapl.sse2
 @MAKE_SSSE3_TRUE at am__append_2 = gmap.ssse3 gmapl.ssse3 gsnap.ssse3 \
@@ -108,6 +110,8 @@ bin_PROGRAMS = cpuid$(EXEEXT) gmap$(EXEEXT) gmapl$(EXEEXT) \
 @MAKE_SSE4_2_TRUE@	gsnapl.sse42
 @MAKE_AVX2_TRUE at am__append_5 = gmap.avx2 gmapl.avx2 gsnap.avx2 \
 @MAKE_AVX2_TRUE@	gsnapl.avx2
+ at MAKE_AVX512_TRUE@am__append_6 = gmap.avx512 gmapl.avx512 gsnap.avx512 \
+ at MAKE_AVX512_TRUE@	gsnapl.avx512
 subdir = src
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
@@ -135,11 +139,73 @@ am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
 	$(top_srcdir)/config/ax_ext.m4 $(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 	$(ACLOCAL_M4)
-DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \
+	$(am__DIST_COMMON)
 mkinstalldirs = $(install_sh) -d
 CONFIG_HEADER = config.h
 CONFIG_CLEAN_FILES =
 CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+    $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+    *) f=$$p;; \
+  esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+  srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+  for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+  for p in $$list; do echo "$$p $$p"; done | \
+  sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+  $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+    if (++n[$$2] == $(am__install_max)) \
+      { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+    END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+  sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+  sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+  test -z "$$files" \
+    || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+    || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+         $(am__cd) "$$dir" && rm -f $$files; }; \
+  }
+am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \
+	"$(DESTDIR)$(includedir)"
+LTLIBRARIES = $(lib_LTLIBRARIES)
+am__DEPENDENCIES_1 =
+libgmap_la_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_1)
+am__objects_1 = libgmap_la-except.lo libgmap_la-assert.lo \
+	libgmap_la-mem.lo libgmap_la-intlist.lo libgmap_la-list.lo \
+	libgmap_la-doublelist.lo libgmap_la-littleendian.lo \
+	libgmap_la-bigendian.lo libgmap_la-interval.lo \
+	libgmap_la-uintlist.lo libgmap_la-uint8list.lo \
+	libgmap_la-iit-read.lo libgmap_la-iit-write.lo \
+	libgmap_la-parserange.lo libgmap_la-univinterval.lo \
+	libgmap_la-iit-read-univ.lo libgmap_la-stopwatch.lo \
+	libgmap_la-semaphore.lo libgmap_la-access.lo \
+	libgmap_la-table.lo libgmap_la-tableuint.lo \
+	libgmap_la-uinttable.lo libgmap_la-chrom.lo \
+	libgmap_la-filestring.lo libgmap_la-md5.lo libgmap_la-bzip2.lo \
+	libgmap_la-sequence.lo libgmap_la-genomicpos.lo \
+	libgmap_la-bitpack64-read.lo libgmap_la-bitpack64-readtwo.lo \
+	libgmap_la-maxent_hr.lo libgmap_la-popcount.lo \
+	libgmap_la-genome128_hr.lo libgmap_la-compress.lo \
+	libgmap_la-bytecoding.lo libgmap_la-sarray-read.lo \
+	libgmap_la-chrnum.lo libgmap_la-genome.lo \
+	libgmap_la-datadir.lo
+dist_libgmap_la_OBJECTS = $(am__objects_1)
+libgmap_la_OBJECTS = $(dist_libgmap_la_OBJECTS)
+AM_V_lt = $(am__v_lt_ at AM_V@)
+am__v_lt_ = $(am__v_lt_ at AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 = 
+libgmap_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(libgmap_la_CFLAGS) \
+	$(CFLAGS) $(libgmap_la_LDFLAGS) $(LDFLAGS) -o $@
 @MAKE_SSE2_TRUE at am__EXEEXT_1 = gmap.sse2$(EXEEXT) gmapl.sse2$(EXEEXT) \
 @MAKE_SSE2_TRUE@	gsnap.sse2$(EXEEXT) gsnapl.sse2$(EXEEXT)
 @MAKE_SSSE3_TRUE at am__EXEEXT_2 = gmap.ssse3$(EXEEXT) \
@@ -153,9 +219,11 @@ CONFIG_CLEAN_VPATH_FILES =
 @MAKE_SSE4_2_TRUE@	gsnapl.sse42$(EXEEXT)
 @MAKE_AVX2_TRUE at am__EXEEXT_5 = gmap.avx2$(EXEEXT) gmapl.avx2$(EXEEXT) \
 @MAKE_AVX2_TRUE@	gsnap.avx2$(EXEEXT) gsnapl.avx2$(EXEEXT)
-am__installdirs = "$(DESTDIR)$(bindir)"
+ at MAKE_AVX512_TRUE@am__EXEEXT_6 = gmap.avx512$(EXEEXT) \
+ at MAKE_AVX512_TRUE@	gmapl.avx512$(EXEEXT) gsnap.avx512$(EXEEXT) \
+ at MAKE_AVX512_TRUE@	gsnapl.avx512$(EXEEXT)
 PROGRAMS = $(bin_PROGRAMS)
-am__objects_1 = atoiindex-except.$(OBJEXT) atoiindex-assert.$(OBJEXT) \
+am__objects_2 = atoiindex-except.$(OBJEXT) atoiindex-assert.$(OBJEXT) \
 	atoiindex-mem.$(OBJEXT) atoiindex-littleendian.$(OBJEXT) \
 	atoiindex-bigendian.$(OBJEXT) atoiindex-genomicpos.$(OBJEXT) \
 	atoiindex-stopwatch.$(OBJEXT) atoiindex-semaphore.$(OBJEXT) \
@@ -180,19 +248,14 @@ am__objects_1 = atoiindex-except.$(OBJEXT) atoiindex-assert.$(OBJEXT) \
 	atoiindex-datadir.$(OBJEXT) atoiindex-parserange.$(OBJEXT) \
 	atoiindex-getopt.$(OBJEXT) atoiindex-getopt1.$(OBJEXT) \
 	atoiindex-atoiindex.$(OBJEXT)
-dist_atoiindex_OBJECTS = $(am__objects_1)
+dist_atoiindex_OBJECTS = $(am__objects_2)
 atoiindex_OBJECTS = $(dist_atoiindex_OBJECTS)
-am__DEPENDENCIES_1 =
 atoiindex_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
-AM_V_lt = $(am__v_lt_ at AM_V@)
-am__v_lt_ = $(am__v_lt_ at AM_DEFAULT_V@)
-am__v_lt_0 = --silent
-am__v_lt_1 = 
 atoiindex_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(atoiindex_CFLAGS) \
 	$(CFLAGS) $(atoiindex_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_2 = cmetindex-except.$(OBJEXT) cmetindex-assert.$(OBJEXT) \
+am__objects_3 = cmetindex-except.$(OBJEXT) cmetindex-assert.$(OBJEXT) \
 	cmetindex-mem.$(OBJEXT) cmetindex-littleendian.$(OBJEXT) \
 	cmetindex-bigendian.$(OBJEXT) cmetindex-genomicpos.$(OBJEXT) \
 	cmetindex-stopwatch.$(OBJEXT) cmetindex-semaphore.$(OBJEXT) \
@@ -217,21 +280,21 @@ am__objects_2 = cmetindex-except.$(OBJEXT) cmetindex-assert.$(OBJEXT) \
 	cmetindex-datadir.$(OBJEXT) cmetindex-parserange.$(OBJEXT) \
 	cmetindex-getopt.$(OBJEXT) cmetindex-getopt1.$(OBJEXT) \
 	cmetindex-cmetindex.$(OBJEXT)
-dist_cmetindex_OBJECTS = $(am__objects_2)
+dist_cmetindex_OBJECTS = $(am__objects_3)
 cmetindex_OBJECTS = $(dist_cmetindex_OBJECTS)
 cmetindex_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 cmetindex_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(cmetindex_CFLAGS) \
 	$(CFLAGS) $(cmetindex_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_3 = cpuid-cpuid.$(OBJEXT)
-dist_cpuid_OBJECTS = $(am__objects_3)
+am__objects_4 = cpuid-cpuid.$(OBJEXT)
+dist_cpuid_OBJECTS = $(am__objects_4)
 cpuid_OBJECTS = $(dist_cpuid_OBJECTS)
 cpuid_DEPENDENCIES =
 cpuid_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(cpuid_CFLAGS) $(CFLAGS) \
 	$(cpuid_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_4 = get_genome-except.$(OBJEXT) \
+am__objects_5 = get_genome-except.$(OBJEXT) \
 	get_genome-assert.$(OBJEXT) get_genome-mem.$(OBJEXT) \
 	get_genome-intlist.$(OBJEXT) get_genome-list.$(OBJEXT) \
 	get_genome-littleendian.$(OBJEXT) \
@@ -248,21 +311,21 @@ am__objects_4 = get_genome-except.$(OBJEXT) \
 	get_genome-datadir.$(OBJEXT) get_genome-parserange.$(OBJEXT) \
 	get_genome-getopt.$(OBJEXT) get_genome-getopt1.$(OBJEXT) \
 	get_genome-get-genome.$(OBJEXT)
-dist_get_genome_OBJECTS = $(am__objects_4)
+dist_get_genome_OBJECTS = $(am__objects_5)
 get_genome_OBJECTS = $(dist_get_genome_OBJECTS)
 get_genome_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 get_genome_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(get_genome_CFLAGS) \
 	$(CFLAGS) $(get_genome_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_5 = gmap-cpuid.$(OBJEXT) gmap-gmap_select.$(OBJEXT)
-dist_gmap_OBJECTS = $(am__objects_5)
+am__objects_6 = gmap-cpuid.$(OBJEXT) gmap-gmap_select.$(OBJEXT)
+dist_gmap_OBJECTS = $(am__objects_6)
 gmap_OBJECTS = $(dist_gmap_OBJECTS)
 gmap_DEPENDENCIES =
 gmap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_CFLAGS) $(CFLAGS) \
 	$(gmap_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_6 = gmap_avx2-except.$(OBJEXT) gmap_avx2-assert.$(OBJEXT) \
+am__objects_7 = gmap_avx2-except.$(OBJEXT) gmap_avx2-assert.$(OBJEXT) \
 	gmap_avx2-mem.$(OBJEXT) gmap_avx2-intlist.$(OBJEXT) \
 	gmap_avx2-list.$(OBJEXT) gmap_avx2-littleendian.$(OBJEXT) \
 	gmap_avx2-bigendian.$(OBJEXT) gmap_avx2-univinterval.$(OBJEXT) \
@@ -280,15 +343,16 @@ am__objects_6 = gmap_avx2-except.$(OBJEXT) gmap_avx2-assert.$(OBJEXT) \
 	gmap_avx2-genome-write.$(OBJEXT) \
 	gmap_avx2-bitpack64-read.$(OBJEXT) \
 	gmap_avx2-bitpack64-readtwo.$(OBJEXT) \
-	gmap_avx2-indexdb.$(OBJEXT) gmap_avx2-indexdb_hr.$(OBJEXT) \
-	gmap_avx2-oligo.$(OBJEXT) gmap_avx2-block.$(OBJEXT) \
-	gmap_avx2-chrom.$(OBJEXT) gmap_avx2-segmentpos.$(OBJEXT) \
-	gmap_avx2-chrnum.$(OBJEXT) gmap_avx2-uinttable.$(OBJEXT) \
-	gmap_avx2-gregion.$(OBJEXT) gmap_avx2-match.$(OBJEXT) \
-	gmap_avx2-matchpool.$(OBJEXT) gmap_avx2-diagnostic.$(OBJEXT) \
-	gmap_avx2-stage1.$(OBJEXT) gmap_avx2-diag.$(OBJEXT) \
-	gmap_avx2-diagpool.$(OBJEXT) gmap_avx2-cmet.$(OBJEXT) \
-	gmap_avx2-atoi.$(OBJEXT) gmap_avx2-orderstat.$(OBJEXT) \
+	gmap_avx2-merge.$(OBJEXT) gmap_avx2-indexdb.$(OBJEXT) \
+	gmap_avx2-indexdb_hr.$(OBJEXT) gmap_avx2-oligo.$(OBJEXT) \
+	gmap_avx2-block.$(OBJEXT) gmap_avx2-chrom.$(OBJEXT) \
+	gmap_avx2-segmentpos.$(OBJEXT) gmap_avx2-chrnum.$(OBJEXT) \
+	gmap_avx2-uinttable.$(OBJEXT) gmap_avx2-gregion.$(OBJEXT) \
+	gmap_avx2-match.$(OBJEXT) gmap_avx2-matchpool.$(OBJEXT) \
+	gmap_avx2-diagnostic.$(OBJEXT) gmap_avx2-stage1.$(OBJEXT) \
+	gmap_avx2-diag.$(OBJEXT) gmap_avx2-diagpool.$(OBJEXT) \
+	gmap_avx2-cmet.$(OBJEXT) gmap_avx2-atoi.$(OBJEXT) \
+	gmap_avx2-orderstat.$(OBJEXT) \
 	gmap_avx2-oligoindex_hr.$(OBJEXT) gmap_avx2-intron.$(OBJEXT) \
 	gmap_avx2-maxent.$(OBJEXT) gmap_avx2-maxent_hr.$(OBJEXT) \
 	gmap_avx2-pair.$(OBJEXT) gmap_avx2-pairpool.$(OBJEXT) \
@@ -310,14 +374,79 @@ am__objects_6 = gmap_avx2-except.$(OBJEXT) gmap_avx2-assert.$(OBJEXT) \
 	gmap_avx2-chimera.$(OBJEXT) gmap_avx2-datadir.$(OBJEXT) \
 	gmap_avx2-parserange.$(OBJEXT) gmap_avx2-getopt.$(OBJEXT) \
 	gmap_avx2-getopt1.$(OBJEXT) gmap_avx2-gmap.$(OBJEXT)
-dist_gmap_avx2_OBJECTS = $(am__objects_6)
+dist_gmap_avx2_OBJECTS = $(am__objects_7)
 gmap_avx2_OBJECTS = $(dist_gmap_avx2_OBJECTS)
 gmap_avx2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmap_avx2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_avx2_CFLAGS) \
 	$(CFLAGS) $(gmap_avx2_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_7 = gmap_nosimd-except.$(OBJEXT) \
+am__objects_8 = gmap_avx512-except.$(OBJEXT) \
+	gmap_avx512-assert.$(OBJEXT) gmap_avx512-mem.$(OBJEXT) \
+	gmap_avx512-intlist.$(OBJEXT) gmap_avx512-list.$(OBJEXT) \
+	gmap_avx512-littleendian.$(OBJEXT) \
+	gmap_avx512-bigendian.$(OBJEXT) \
+	gmap_avx512-univinterval.$(OBJEXT) \
+	gmap_avx512-interval.$(OBJEXT) gmap_avx512-uintlist.$(OBJEXT) \
+	gmap_avx512-stopwatch.$(OBJEXT) \
+	gmap_avx512-semaphore.$(OBJEXT) gmap_avx512-access.$(OBJEXT) \
+	gmap_avx512-filestring.$(OBJEXT) \
+	gmap_avx512-iit-read-univ.$(OBJEXT) \
+	gmap_avx512-iit-read.$(OBJEXT) gmap_avx512-md5.$(OBJEXT) \
+	gmap_avx512-bzip2.$(OBJEXT) gmap_avx512-sequence.$(OBJEXT) \
+	gmap_avx512-reader.$(OBJEXT) gmap_avx512-genomicpos.$(OBJEXT) \
+	gmap_avx512-compress.$(OBJEXT) \
+	gmap_avx512-compress-write.$(OBJEXT) \
+	gmap_avx512-gbuffer.$(OBJEXT) gmap_avx512-genome.$(OBJEXT) \
+	gmap_avx512-popcount.$(OBJEXT) \
+	gmap_avx512-genome128_hr.$(OBJEXT) \
+	gmap_avx512-genome_sites.$(OBJEXT) \
+	gmap_avx512-genome-write.$(OBJEXT) \
+	gmap_avx512-bitpack64-read.$(OBJEXT) \
+	gmap_avx512-bitpack64-readtwo.$(OBJEXT) \
+	gmap_avx512-merge.$(OBJEXT) gmap_avx512-indexdb.$(OBJEXT) \
+	gmap_avx512-indexdb_hr.$(OBJEXT) gmap_avx512-oligo.$(OBJEXT) \
+	gmap_avx512-block.$(OBJEXT) gmap_avx512-chrom.$(OBJEXT) \
+	gmap_avx512-segmentpos.$(OBJEXT) gmap_avx512-chrnum.$(OBJEXT) \
+	gmap_avx512-uinttable.$(OBJEXT) gmap_avx512-gregion.$(OBJEXT) \
+	gmap_avx512-match.$(OBJEXT) gmap_avx512-matchpool.$(OBJEXT) \
+	gmap_avx512-diagnostic.$(OBJEXT) gmap_avx512-stage1.$(OBJEXT) \
+	gmap_avx512-diag.$(OBJEXT) gmap_avx512-diagpool.$(OBJEXT) \
+	gmap_avx512-cmet.$(OBJEXT) gmap_avx512-atoi.$(OBJEXT) \
+	gmap_avx512-orderstat.$(OBJEXT) \
+	gmap_avx512-oligoindex_hr.$(OBJEXT) \
+	gmap_avx512-intron.$(OBJEXT) gmap_avx512-maxent.$(OBJEXT) \
+	gmap_avx512-maxent_hr.$(OBJEXT) gmap_avx512-pair.$(OBJEXT) \
+	gmap_avx512-pairpool.$(OBJEXT) gmap_avx512-cellpool.$(OBJEXT) \
+	gmap_avx512-stage2.$(OBJEXT) gmap_avx512-doublelist.$(OBJEXT) \
+	gmap_avx512-smooth.$(OBJEXT) \
+	gmap_avx512-splicestringpool.$(OBJEXT) \
+	gmap_avx512-splicetrie_build.$(OBJEXT) \
+	gmap_avx512-splicetrie.$(OBJEXT) \
+	gmap_avx512-boyer-moore.$(OBJEXT) \
+	gmap_avx512-dynprog.$(OBJEXT) \
+	gmap_avx512-dynprog_simd.$(OBJEXT) \
+	gmap_avx512-dynprog_single.$(OBJEXT) \
+	gmap_avx512-dynprog_genome.$(OBJEXT) \
+	gmap_avx512-dynprog_cdna.$(OBJEXT) \
+	gmap_avx512-dynprog_end.$(OBJEXT) \
+	gmap_avx512-translation.$(OBJEXT) gmap_avx512-pbinom.$(OBJEXT) \
+	gmap_avx512-changepoint.$(OBJEXT) gmap_avx512-stage3.$(OBJEXT) \
+	gmap_avx512-request.$(OBJEXT) gmap_avx512-result.$(OBJEXT) \
+	gmap_avx512-output.$(OBJEXT) gmap_avx512-inbuffer.$(OBJEXT) \
+	gmap_avx512-samheader.$(OBJEXT) \
+	gmap_avx512-outbuffer.$(OBJEXT) gmap_avx512-chimera.$(OBJEXT) \
+	gmap_avx512-datadir.$(OBJEXT) gmap_avx512-parserange.$(OBJEXT) \
+	gmap_avx512-getopt.$(OBJEXT) gmap_avx512-getopt1.$(OBJEXT) \
+	gmap_avx512-gmap.$(OBJEXT)
+dist_gmap_avx512_OBJECTS = $(am__objects_8)
+gmap_avx512_OBJECTS = $(dist_gmap_avx512_OBJECTS)
+gmap_avx512_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_1)
+gmap_avx512_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_avx512_CFLAGS) \
+	$(CFLAGS) $(gmap_avx512_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_9 = gmap_nosimd-except.$(OBJEXT) \
 	gmap_nosimd-assert.$(OBJEXT) gmap_nosimd-mem.$(OBJEXT) \
 	gmap_nosimd-intlist.$(OBJEXT) gmap_nosimd-list.$(OBJEXT) \
 	gmap_nosimd-littleendian.$(OBJEXT) \
@@ -340,12 +469,12 @@ am__objects_7 = gmap_nosimd-except.$(OBJEXT) \
 	gmap_nosimd-genome-write.$(OBJEXT) \
 	gmap_nosimd-bitpack64-read.$(OBJEXT) \
 	gmap_nosimd-bitpack64-readtwo.$(OBJEXT) \
-	gmap_nosimd-indexdb.$(OBJEXT) gmap_nosimd-indexdb_hr.$(OBJEXT) \
-	gmap_nosimd-oligo.$(OBJEXT) gmap_nosimd-block.$(OBJEXT) \
-	gmap_nosimd-chrom.$(OBJEXT) gmap_nosimd-segmentpos.$(OBJEXT) \
-	gmap_nosimd-chrnum.$(OBJEXT) gmap_nosimd-uinttable.$(OBJEXT) \
-	gmap_nosimd-gregion.$(OBJEXT) gmap_nosimd-match.$(OBJEXT) \
-	gmap_nosimd-matchpool.$(OBJEXT) \
+	gmap_nosimd-merge.$(OBJEXT) gmap_nosimd-indexdb.$(OBJEXT) \
+	gmap_nosimd-indexdb_hr.$(OBJEXT) gmap_nosimd-oligo.$(OBJEXT) \
+	gmap_nosimd-block.$(OBJEXT) gmap_nosimd-chrom.$(OBJEXT) \
+	gmap_nosimd-segmentpos.$(OBJEXT) gmap_nosimd-chrnum.$(OBJEXT) \
+	gmap_nosimd-uinttable.$(OBJEXT) gmap_nosimd-gregion.$(OBJEXT) \
+	gmap_nosimd-match.$(OBJEXT) gmap_nosimd-matchpool.$(OBJEXT) \
 	gmap_nosimd-diagnostic.$(OBJEXT) gmap_nosimd-stage1.$(OBJEXT) \
 	gmap_nosimd-diag.$(OBJEXT) gmap_nosimd-diagpool.$(OBJEXT) \
 	gmap_nosimd-cmet.$(OBJEXT) gmap_nosimd-atoi.$(OBJEXT) \
@@ -375,14 +504,14 @@ am__objects_7 = gmap_nosimd-except.$(OBJEXT) \
 	gmap_nosimd-datadir.$(OBJEXT) gmap_nosimd-parserange.$(OBJEXT) \
 	gmap_nosimd-getopt.$(OBJEXT) gmap_nosimd-getopt1.$(OBJEXT) \
 	gmap_nosimd-gmap.$(OBJEXT)
-dist_gmap_nosimd_OBJECTS = $(am__objects_7)
+dist_gmap_nosimd_OBJECTS = $(am__objects_9)
 gmap_nosimd_OBJECTS = $(dist_gmap_nosimd_OBJECTS)
 gmap_nosimd_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmap_nosimd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_nosimd_CFLAGS) \
 	$(CFLAGS) $(gmap_nosimd_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_8 = gmap_sse2-except.$(OBJEXT) gmap_sse2-assert.$(OBJEXT) \
+am__objects_10 = gmap_sse2-except.$(OBJEXT) gmap_sse2-assert.$(OBJEXT) \
 	gmap_sse2-mem.$(OBJEXT) gmap_sse2-intlist.$(OBJEXT) \
 	gmap_sse2-list.$(OBJEXT) gmap_sse2-littleendian.$(OBJEXT) \
 	gmap_sse2-bigendian.$(OBJEXT) gmap_sse2-univinterval.$(OBJEXT) \
@@ -400,15 +529,16 @@ am__objects_8 = gmap_sse2-except.$(OBJEXT) gmap_sse2-assert.$(OBJEXT) \
 	gmap_sse2-genome-write.$(OBJEXT) \
 	gmap_sse2-bitpack64-read.$(OBJEXT) \
 	gmap_sse2-bitpack64-readtwo.$(OBJEXT) \
-	gmap_sse2-indexdb.$(OBJEXT) gmap_sse2-indexdb_hr.$(OBJEXT) \
-	gmap_sse2-oligo.$(OBJEXT) gmap_sse2-block.$(OBJEXT) \
-	gmap_sse2-chrom.$(OBJEXT) gmap_sse2-segmentpos.$(OBJEXT) \
-	gmap_sse2-chrnum.$(OBJEXT) gmap_sse2-uinttable.$(OBJEXT) \
-	gmap_sse2-gregion.$(OBJEXT) gmap_sse2-match.$(OBJEXT) \
-	gmap_sse2-matchpool.$(OBJEXT) gmap_sse2-diagnostic.$(OBJEXT) \
-	gmap_sse2-stage1.$(OBJEXT) gmap_sse2-diag.$(OBJEXT) \
-	gmap_sse2-diagpool.$(OBJEXT) gmap_sse2-cmet.$(OBJEXT) \
-	gmap_sse2-atoi.$(OBJEXT) gmap_sse2-orderstat.$(OBJEXT) \
+	gmap_sse2-merge.$(OBJEXT) gmap_sse2-indexdb.$(OBJEXT) \
+	gmap_sse2-indexdb_hr.$(OBJEXT) gmap_sse2-oligo.$(OBJEXT) \
+	gmap_sse2-block.$(OBJEXT) gmap_sse2-chrom.$(OBJEXT) \
+	gmap_sse2-segmentpos.$(OBJEXT) gmap_sse2-chrnum.$(OBJEXT) \
+	gmap_sse2-uinttable.$(OBJEXT) gmap_sse2-gregion.$(OBJEXT) \
+	gmap_sse2-match.$(OBJEXT) gmap_sse2-matchpool.$(OBJEXT) \
+	gmap_sse2-diagnostic.$(OBJEXT) gmap_sse2-stage1.$(OBJEXT) \
+	gmap_sse2-diag.$(OBJEXT) gmap_sse2-diagpool.$(OBJEXT) \
+	gmap_sse2-cmet.$(OBJEXT) gmap_sse2-atoi.$(OBJEXT) \
+	gmap_sse2-orderstat.$(OBJEXT) \
 	gmap_sse2-oligoindex_hr.$(OBJEXT) gmap_sse2-intron.$(OBJEXT) \
 	gmap_sse2-maxent.$(OBJEXT) gmap_sse2-maxent_hr.$(OBJEXT) \
 	gmap_sse2-pair.$(OBJEXT) gmap_sse2-pairpool.$(OBJEXT) \
@@ -430,14 +560,14 @@ am__objects_8 = gmap_sse2-except.$(OBJEXT) gmap_sse2-assert.$(OBJEXT) \
 	gmap_sse2-chimera.$(OBJEXT) gmap_sse2-datadir.$(OBJEXT) \
 	gmap_sse2-parserange.$(OBJEXT) gmap_sse2-getopt.$(OBJEXT) \
 	gmap_sse2-getopt1.$(OBJEXT) gmap_sse2-gmap.$(OBJEXT)
-dist_gmap_sse2_OBJECTS = $(am__objects_8)
+dist_gmap_sse2_OBJECTS = $(am__objects_10)
 gmap_sse2_OBJECTS = $(dist_gmap_sse2_OBJECTS)
 gmap_sse2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmap_sse2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_sse2_CFLAGS) \
 	$(CFLAGS) $(gmap_sse2_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_9 = gmap_sse41-except.$(OBJEXT) \
+am__objects_11 = gmap_sse41-except.$(OBJEXT) \
 	gmap_sse41-assert.$(OBJEXT) gmap_sse41-mem.$(OBJEXT) \
 	gmap_sse41-intlist.$(OBJEXT) gmap_sse41-list.$(OBJEXT) \
 	gmap_sse41-littleendian.$(OBJEXT) \
@@ -459,15 +589,16 @@ am__objects_9 = gmap_sse41-except.$(OBJEXT) \
 	gmap_sse41-genome-write.$(OBJEXT) \
 	gmap_sse41-bitpack64-read.$(OBJEXT) \
 	gmap_sse41-bitpack64-readtwo.$(OBJEXT) \
-	gmap_sse41-indexdb.$(OBJEXT) gmap_sse41-indexdb_hr.$(OBJEXT) \
-	gmap_sse41-oligo.$(OBJEXT) gmap_sse41-block.$(OBJEXT) \
-	gmap_sse41-chrom.$(OBJEXT) gmap_sse41-segmentpos.$(OBJEXT) \
-	gmap_sse41-chrnum.$(OBJEXT) gmap_sse41-uinttable.$(OBJEXT) \
-	gmap_sse41-gregion.$(OBJEXT) gmap_sse41-match.$(OBJEXT) \
-	gmap_sse41-matchpool.$(OBJEXT) gmap_sse41-diagnostic.$(OBJEXT) \
-	gmap_sse41-stage1.$(OBJEXT) gmap_sse41-diag.$(OBJEXT) \
-	gmap_sse41-diagpool.$(OBJEXT) gmap_sse41-cmet.$(OBJEXT) \
-	gmap_sse41-atoi.$(OBJEXT) gmap_sse41-orderstat.$(OBJEXT) \
+	gmap_sse41-merge.$(OBJEXT) gmap_sse41-indexdb.$(OBJEXT) \
+	gmap_sse41-indexdb_hr.$(OBJEXT) gmap_sse41-oligo.$(OBJEXT) \
+	gmap_sse41-block.$(OBJEXT) gmap_sse41-chrom.$(OBJEXT) \
+	gmap_sse41-segmentpos.$(OBJEXT) gmap_sse41-chrnum.$(OBJEXT) \
+	gmap_sse41-uinttable.$(OBJEXT) gmap_sse41-gregion.$(OBJEXT) \
+	gmap_sse41-match.$(OBJEXT) gmap_sse41-matchpool.$(OBJEXT) \
+	gmap_sse41-diagnostic.$(OBJEXT) gmap_sse41-stage1.$(OBJEXT) \
+	gmap_sse41-diag.$(OBJEXT) gmap_sse41-diagpool.$(OBJEXT) \
+	gmap_sse41-cmet.$(OBJEXT) gmap_sse41-atoi.$(OBJEXT) \
+	gmap_sse41-orderstat.$(OBJEXT) \
 	gmap_sse41-oligoindex_hr.$(OBJEXT) gmap_sse41-intron.$(OBJEXT) \
 	gmap_sse41-maxent.$(OBJEXT) gmap_sse41-maxent_hr.$(OBJEXT) \
 	gmap_sse41-pair.$(OBJEXT) gmap_sse41-pairpool.$(OBJEXT) \
@@ -490,14 +621,14 @@ am__objects_9 = gmap_sse41-except.$(OBJEXT) \
 	gmap_sse41-chimera.$(OBJEXT) gmap_sse41-datadir.$(OBJEXT) \
 	gmap_sse41-parserange.$(OBJEXT) gmap_sse41-getopt.$(OBJEXT) \
 	gmap_sse41-getopt1.$(OBJEXT) gmap_sse41-gmap.$(OBJEXT)
-dist_gmap_sse41_OBJECTS = $(am__objects_9)
+dist_gmap_sse41_OBJECTS = $(am__objects_11)
 gmap_sse41_OBJECTS = $(dist_gmap_sse41_OBJECTS)
 gmap_sse41_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmap_sse41_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_sse41_CFLAGS) \
 	$(CFLAGS) $(gmap_sse41_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_10 = gmap_sse42-except.$(OBJEXT) \
+am__objects_12 = gmap_sse42-except.$(OBJEXT) \
 	gmap_sse42-assert.$(OBJEXT) gmap_sse42-mem.$(OBJEXT) \
 	gmap_sse42-intlist.$(OBJEXT) gmap_sse42-list.$(OBJEXT) \
 	gmap_sse42-littleendian.$(OBJEXT) \
@@ -519,15 +650,16 @@ am__objects_10 = gmap_sse42-except.$(OBJEXT) \
 	gmap_sse42-genome-write.$(OBJEXT) \
 	gmap_sse42-bitpack64-read.$(OBJEXT) \
 	gmap_sse42-bitpack64-readtwo.$(OBJEXT) \
-	gmap_sse42-indexdb.$(OBJEXT) gmap_sse42-indexdb_hr.$(OBJEXT) \
-	gmap_sse42-oligo.$(OBJEXT) gmap_sse42-block.$(OBJEXT) \
-	gmap_sse42-chrom.$(OBJEXT) gmap_sse42-segmentpos.$(OBJEXT) \
-	gmap_sse42-chrnum.$(OBJEXT) gmap_sse42-uinttable.$(OBJEXT) \
-	gmap_sse42-gregion.$(OBJEXT) gmap_sse42-match.$(OBJEXT) \
-	gmap_sse42-matchpool.$(OBJEXT) gmap_sse42-diagnostic.$(OBJEXT) \
-	gmap_sse42-stage1.$(OBJEXT) gmap_sse42-diag.$(OBJEXT) \
-	gmap_sse42-diagpool.$(OBJEXT) gmap_sse42-cmet.$(OBJEXT) \
-	gmap_sse42-atoi.$(OBJEXT) gmap_sse42-orderstat.$(OBJEXT) \
+	gmap_sse42-merge.$(OBJEXT) gmap_sse42-indexdb.$(OBJEXT) \
+	gmap_sse42-indexdb_hr.$(OBJEXT) gmap_sse42-oligo.$(OBJEXT) \
+	gmap_sse42-block.$(OBJEXT) gmap_sse42-chrom.$(OBJEXT) \
+	gmap_sse42-segmentpos.$(OBJEXT) gmap_sse42-chrnum.$(OBJEXT) \
+	gmap_sse42-uinttable.$(OBJEXT) gmap_sse42-gregion.$(OBJEXT) \
+	gmap_sse42-match.$(OBJEXT) gmap_sse42-matchpool.$(OBJEXT) \
+	gmap_sse42-diagnostic.$(OBJEXT) gmap_sse42-stage1.$(OBJEXT) \
+	gmap_sse42-diag.$(OBJEXT) gmap_sse42-diagpool.$(OBJEXT) \
+	gmap_sse42-cmet.$(OBJEXT) gmap_sse42-atoi.$(OBJEXT) \
+	gmap_sse42-orderstat.$(OBJEXT) \
 	gmap_sse42-oligoindex_hr.$(OBJEXT) gmap_sse42-intron.$(OBJEXT) \
 	gmap_sse42-maxent.$(OBJEXT) gmap_sse42-maxent_hr.$(OBJEXT) \
 	gmap_sse42-pair.$(OBJEXT) gmap_sse42-pairpool.$(OBJEXT) \
@@ -550,14 +682,14 @@ am__objects_10 = gmap_sse42-except.$(OBJEXT) \
 	gmap_sse42-chimera.$(OBJEXT) gmap_sse42-datadir.$(OBJEXT) \
 	gmap_sse42-parserange.$(OBJEXT) gmap_sse42-getopt.$(OBJEXT) \
 	gmap_sse42-getopt1.$(OBJEXT) gmap_sse42-gmap.$(OBJEXT)
-dist_gmap_sse42_OBJECTS = $(am__objects_10)
+dist_gmap_sse42_OBJECTS = $(am__objects_12)
 gmap_sse42_OBJECTS = $(dist_gmap_sse42_OBJECTS)
 gmap_sse42_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmap_sse42_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_sse42_CFLAGS) \
 	$(CFLAGS) $(gmap_sse42_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_11 = gmap_ssse3-except.$(OBJEXT) \
+am__objects_13 = gmap_ssse3-except.$(OBJEXT) \
 	gmap_ssse3-assert.$(OBJEXT) gmap_ssse3-mem.$(OBJEXT) \
 	gmap_ssse3-intlist.$(OBJEXT) gmap_ssse3-list.$(OBJEXT) \
 	gmap_ssse3-littleendian.$(OBJEXT) \
@@ -579,15 +711,16 @@ am__objects_11 = gmap_ssse3-except.$(OBJEXT) \
 	gmap_ssse3-genome-write.$(OBJEXT) \
 	gmap_ssse3-bitpack64-read.$(OBJEXT) \
 	gmap_ssse3-bitpack64-readtwo.$(OBJEXT) \
-	gmap_ssse3-indexdb.$(OBJEXT) gmap_ssse3-indexdb_hr.$(OBJEXT) \
-	gmap_ssse3-oligo.$(OBJEXT) gmap_ssse3-block.$(OBJEXT) \
-	gmap_ssse3-chrom.$(OBJEXT) gmap_ssse3-segmentpos.$(OBJEXT) \
-	gmap_ssse3-chrnum.$(OBJEXT) gmap_ssse3-uinttable.$(OBJEXT) \
-	gmap_ssse3-gregion.$(OBJEXT) gmap_ssse3-match.$(OBJEXT) \
-	gmap_ssse3-matchpool.$(OBJEXT) gmap_ssse3-diagnostic.$(OBJEXT) \
-	gmap_ssse3-stage1.$(OBJEXT) gmap_ssse3-diag.$(OBJEXT) \
-	gmap_ssse3-diagpool.$(OBJEXT) gmap_ssse3-cmet.$(OBJEXT) \
-	gmap_ssse3-atoi.$(OBJEXT) gmap_ssse3-orderstat.$(OBJEXT) \
+	gmap_ssse3-merge.$(OBJEXT) gmap_ssse3-indexdb.$(OBJEXT) \
+	gmap_ssse3-indexdb_hr.$(OBJEXT) gmap_ssse3-oligo.$(OBJEXT) \
+	gmap_ssse3-block.$(OBJEXT) gmap_ssse3-chrom.$(OBJEXT) \
+	gmap_ssse3-segmentpos.$(OBJEXT) gmap_ssse3-chrnum.$(OBJEXT) \
+	gmap_ssse3-uinttable.$(OBJEXT) gmap_ssse3-gregion.$(OBJEXT) \
+	gmap_ssse3-match.$(OBJEXT) gmap_ssse3-matchpool.$(OBJEXT) \
+	gmap_ssse3-diagnostic.$(OBJEXT) gmap_ssse3-stage1.$(OBJEXT) \
+	gmap_ssse3-diag.$(OBJEXT) gmap_ssse3-diagpool.$(OBJEXT) \
+	gmap_ssse3-cmet.$(OBJEXT) gmap_ssse3-atoi.$(OBJEXT) \
+	gmap_ssse3-orderstat.$(OBJEXT) \
 	gmap_ssse3-oligoindex_hr.$(OBJEXT) gmap_ssse3-intron.$(OBJEXT) \
 	gmap_ssse3-maxent.$(OBJEXT) gmap_ssse3-maxent_hr.$(OBJEXT) \
 	gmap_ssse3-pair.$(OBJEXT) gmap_ssse3-pairpool.$(OBJEXT) \
@@ -610,14 +743,14 @@ am__objects_11 = gmap_ssse3-except.$(OBJEXT) \
 	gmap_ssse3-chimera.$(OBJEXT) gmap_ssse3-datadir.$(OBJEXT) \
 	gmap_ssse3-parserange.$(OBJEXT) gmap_ssse3-getopt.$(OBJEXT) \
 	gmap_ssse3-getopt1.$(OBJEXT) gmap_ssse3-gmap.$(OBJEXT)
-dist_gmap_ssse3_OBJECTS = $(am__objects_11)
+dist_gmap_ssse3_OBJECTS = $(am__objects_13)
 gmap_ssse3_OBJECTS = $(dist_gmap_ssse3_OBJECTS)
 gmap_ssse3_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmap_ssse3_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_ssse3_CFLAGS) \
 	$(CFLAGS) $(gmap_ssse3_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_12 = gmapindex-except.$(OBJEXT) gmapindex-assert.$(OBJEXT) \
+am__objects_14 = gmapindex-except.$(OBJEXT) gmapindex-assert.$(OBJEXT) \
 	gmapindex-mem.$(OBJEXT) gmapindex-intlist.$(OBJEXT) \
 	gmapindex-list.$(OBJEXT) gmapindex-littleendian.$(OBJEXT) \
 	gmapindex-bigendian.$(OBJEXT) gmapindex-univinterval.$(OBJEXT) \
@@ -645,21 +778,21 @@ am__objects_12 = gmapindex-except.$(OBJEXT) gmapindex-assert.$(OBJEXT) \
 	gmapindex-bytecoding.$(OBJEXT) \
 	gmapindex-sarray-write.$(OBJEXT) \
 	gmapindex-parserange.$(OBJEXT) gmapindex-gmapindex.$(OBJEXT)
-dist_gmapindex_OBJECTS = $(am__objects_12)
+dist_gmapindex_OBJECTS = $(am__objects_14)
 gmapindex_OBJECTS = $(dist_gmapindex_OBJECTS)
 gmapindex_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmapindex_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapindex_CFLAGS) \
 	$(CFLAGS) $(gmapindex_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_13 = gmapl-cpuid.$(OBJEXT) gmapl-gmapl_select.$(OBJEXT)
-dist_gmapl_OBJECTS = $(am__objects_13)
+am__objects_15 = gmapl-cpuid.$(OBJEXT) gmapl-gmapl_select.$(OBJEXT)
+dist_gmapl_OBJECTS = $(am__objects_15)
 gmapl_OBJECTS = $(dist_gmapl_OBJECTS)
 gmapl_DEPENDENCIES =
 gmapl_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_CFLAGS) $(CFLAGS) \
 	$(gmapl_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_14 = gmapl_avx2-except.$(OBJEXT) \
+am__objects_16 = gmapl_avx2-except.$(OBJEXT) \
 	gmapl_avx2-assert.$(OBJEXT) gmapl_avx2-mem.$(OBJEXT) \
 	gmapl_avx2-intlist.$(OBJEXT) gmapl_avx2-list.$(OBJEXT) \
 	gmapl_avx2-littleendian.$(OBJEXT) \
@@ -682,15 +815,16 @@ am__objects_14 = gmapl_avx2-except.$(OBJEXT) \
 	gmapl_avx2-genome-write.$(OBJEXT) \
 	gmapl_avx2-bitpack64-read.$(OBJEXT) \
 	gmapl_avx2-bitpack64-readtwo.$(OBJEXT) \
-	gmapl_avx2-indexdb.$(OBJEXT) gmapl_avx2-indexdb_hr.$(OBJEXT) \
-	gmapl_avx2-oligo.$(OBJEXT) gmapl_avx2-block.$(OBJEXT) \
-	gmapl_avx2-chrom.$(OBJEXT) gmapl_avx2-segmentpos.$(OBJEXT) \
-	gmapl_avx2-chrnum.$(OBJEXT) gmapl_avx2-uinttable.$(OBJEXT) \
-	gmapl_avx2-gregion.$(OBJEXT) gmapl_avx2-match.$(OBJEXT) \
-	gmapl_avx2-matchpool.$(OBJEXT) gmapl_avx2-diagnostic.$(OBJEXT) \
-	gmapl_avx2-stage1.$(OBJEXT) gmapl_avx2-diag.$(OBJEXT) \
-	gmapl_avx2-diagpool.$(OBJEXT) gmapl_avx2-cmet.$(OBJEXT) \
-	gmapl_avx2-atoi.$(OBJEXT) gmapl_avx2-orderstat.$(OBJEXT) \
+	gmapl_avx2-merge.$(OBJEXT) gmapl_avx2-indexdb.$(OBJEXT) \
+	gmapl_avx2-indexdb_hr.$(OBJEXT) gmapl_avx2-oligo.$(OBJEXT) \
+	gmapl_avx2-block.$(OBJEXT) gmapl_avx2-chrom.$(OBJEXT) \
+	gmapl_avx2-segmentpos.$(OBJEXT) gmapl_avx2-chrnum.$(OBJEXT) \
+	gmapl_avx2-uinttable.$(OBJEXT) gmapl_avx2-gregion.$(OBJEXT) \
+	gmapl_avx2-match.$(OBJEXT) gmapl_avx2-matchpool.$(OBJEXT) \
+	gmapl_avx2-diagnostic.$(OBJEXT) gmapl_avx2-stage1.$(OBJEXT) \
+	gmapl_avx2-diag.$(OBJEXT) gmapl_avx2-diagpool.$(OBJEXT) \
+	gmapl_avx2-cmet.$(OBJEXT) gmapl_avx2-atoi.$(OBJEXT) \
+	gmapl_avx2-orderstat.$(OBJEXT) \
 	gmapl_avx2-oligoindex_hr.$(OBJEXT) gmapl_avx2-intron.$(OBJEXT) \
 	gmapl_avx2-maxent.$(OBJEXT) gmapl_avx2-maxent_hr.$(OBJEXT) \
 	gmapl_avx2-pair.$(OBJEXT) gmapl_avx2-pairpool.$(OBJEXT) \
@@ -713,14 +847,87 @@ am__objects_14 = gmapl_avx2-except.$(OBJEXT) \
 	gmapl_avx2-chimera.$(OBJEXT) gmapl_avx2-datadir.$(OBJEXT) \
 	gmapl_avx2-parserange.$(OBJEXT) gmapl_avx2-getopt.$(OBJEXT) \
 	gmapl_avx2-getopt1.$(OBJEXT) gmapl_avx2-gmap.$(OBJEXT)
-dist_gmapl_avx2_OBJECTS = $(am__objects_14)
+dist_gmapl_avx2_OBJECTS = $(am__objects_16)
 gmapl_avx2_OBJECTS = $(dist_gmapl_avx2_OBJECTS)
 gmapl_avx2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmapl_avx2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_avx2_CFLAGS) \
 	$(CFLAGS) $(gmapl_avx2_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_15 = gmapl_nosimd-except.$(OBJEXT) \
+am__objects_17 = gmapl_avx512-except.$(OBJEXT) \
+	gmapl_avx512-assert.$(OBJEXT) gmapl_avx512-mem.$(OBJEXT) \
+	gmapl_avx512-intlist.$(OBJEXT) gmapl_avx512-list.$(OBJEXT) \
+	gmapl_avx512-littleendian.$(OBJEXT) \
+	gmapl_avx512-bigendian.$(OBJEXT) \
+	gmapl_avx512-univinterval.$(OBJEXT) \
+	gmapl_avx512-interval.$(OBJEXT) \
+	gmapl_avx512-uintlist.$(OBJEXT) \
+	gmapl_avx512-uint8list.$(OBJEXT) \
+	gmapl_avx512-stopwatch.$(OBJEXT) \
+	gmapl_avx512-semaphore.$(OBJEXT) gmapl_avx512-access.$(OBJEXT) \
+	gmapl_avx512-filestring.$(OBJEXT) \
+	gmapl_avx512-iit-read-univ.$(OBJEXT) \
+	gmapl_avx512-iit-read.$(OBJEXT) gmapl_avx512-md5.$(OBJEXT) \
+	gmapl_avx512-bzip2.$(OBJEXT) gmapl_avx512-sequence.$(OBJEXT) \
+	gmapl_avx512-reader.$(OBJEXT) \
+	gmapl_avx512-genomicpos.$(OBJEXT) \
+	gmapl_avx512-compress.$(OBJEXT) \
+	gmapl_avx512-compress-write.$(OBJEXT) \
+	gmapl_avx512-gbuffer.$(OBJEXT) gmapl_avx512-genome.$(OBJEXT) \
+	gmapl_avx512-popcount.$(OBJEXT) \
+	gmapl_avx512-genome128_hr.$(OBJEXT) \
+	gmapl_avx512-genome_sites.$(OBJEXT) \
+	gmapl_avx512-genome-write.$(OBJEXT) \
+	gmapl_avx512-bitpack64-read.$(OBJEXT) \
+	gmapl_avx512-bitpack64-readtwo.$(OBJEXT) \
+	gmapl_avx512-merge.$(OBJEXT) gmapl_avx512-indexdb.$(OBJEXT) \
+	gmapl_avx512-indexdb_hr.$(OBJEXT) gmapl_avx512-oligo.$(OBJEXT) \
+	gmapl_avx512-block.$(OBJEXT) gmapl_avx512-chrom.$(OBJEXT) \
+	gmapl_avx512-segmentpos.$(OBJEXT) \
+	gmapl_avx512-chrnum.$(OBJEXT) gmapl_avx512-uinttable.$(OBJEXT) \
+	gmapl_avx512-gregion.$(OBJEXT) gmapl_avx512-match.$(OBJEXT) \
+	gmapl_avx512-matchpool.$(OBJEXT) \
+	gmapl_avx512-diagnostic.$(OBJEXT) \
+	gmapl_avx512-stage1.$(OBJEXT) gmapl_avx512-diag.$(OBJEXT) \
+	gmapl_avx512-diagpool.$(OBJEXT) gmapl_avx512-cmet.$(OBJEXT) \
+	gmapl_avx512-atoi.$(OBJEXT) gmapl_avx512-orderstat.$(OBJEXT) \
+	gmapl_avx512-oligoindex_hr.$(OBJEXT) \
+	gmapl_avx512-intron.$(OBJEXT) gmapl_avx512-maxent.$(OBJEXT) \
+	gmapl_avx512-maxent_hr.$(OBJEXT) gmapl_avx512-pair.$(OBJEXT) \
+	gmapl_avx512-pairpool.$(OBJEXT) \
+	gmapl_avx512-cellpool.$(OBJEXT) gmapl_avx512-stage2.$(OBJEXT) \
+	gmapl_avx512-doublelist.$(OBJEXT) \
+	gmapl_avx512-smooth.$(OBJEXT) \
+	gmapl_avx512-splicestringpool.$(OBJEXT) \
+	gmapl_avx512-splicetrie_build.$(OBJEXT) \
+	gmapl_avx512-splicetrie.$(OBJEXT) \
+	gmapl_avx512-boyer-moore.$(OBJEXT) \
+	gmapl_avx512-dynprog.$(OBJEXT) \
+	gmapl_avx512-dynprog_simd.$(OBJEXT) \
+	gmapl_avx512-dynprog_single.$(OBJEXT) \
+	gmapl_avx512-dynprog_genome.$(OBJEXT) \
+	gmapl_avx512-dynprog_cdna.$(OBJEXT) \
+	gmapl_avx512-dynprog_end.$(OBJEXT) \
+	gmapl_avx512-translation.$(OBJEXT) \
+	gmapl_avx512-pbinom.$(OBJEXT) \
+	gmapl_avx512-changepoint.$(OBJEXT) \
+	gmapl_avx512-stage3.$(OBJEXT) gmapl_avx512-request.$(OBJEXT) \
+	gmapl_avx512-result.$(OBJEXT) gmapl_avx512-output.$(OBJEXT) \
+	gmapl_avx512-inbuffer.$(OBJEXT) \
+	gmapl_avx512-samheader.$(OBJEXT) \
+	gmapl_avx512-outbuffer.$(OBJEXT) \
+	gmapl_avx512-chimera.$(OBJEXT) gmapl_avx512-datadir.$(OBJEXT) \
+	gmapl_avx512-parserange.$(OBJEXT) \
+	gmapl_avx512-getopt.$(OBJEXT) gmapl_avx512-getopt1.$(OBJEXT) \
+	gmapl_avx512-gmap.$(OBJEXT)
+dist_gmapl_avx512_OBJECTS = $(am__objects_17)
+gmapl_avx512_OBJECTS = $(dist_gmapl_avx512_OBJECTS)
+gmapl_avx512_DEPENDENCIES = $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+gmapl_avx512_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_avx512_CFLAGS) \
+	$(CFLAGS) $(gmapl_avx512_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_18 = gmapl_nosimd-except.$(OBJEXT) \
 	gmapl_nosimd-assert.$(OBJEXT) gmapl_nosimd-mem.$(OBJEXT) \
 	gmapl_nosimd-intlist.$(OBJEXT) gmapl_nosimd-list.$(OBJEXT) \
 	gmapl_nosimd-littleendian.$(OBJEXT) \
@@ -746,7 +953,7 @@ am__objects_15 = gmapl_nosimd-except.$(OBJEXT) \
 	gmapl_nosimd-genome-write.$(OBJEXT) \
 	gmapl_nosimd-bitpack64-read.$(OBJEXT) \
 	gmapl_nosimd-bitpack64-readtwo.$(OBJEXT) \
-	gmapl_nosimd-indexdb.$(OBJEXT) \
+	gmapl_nosimd-merge.$(OBJEXT) gmapl_nosimd-indexdb.$(OBJEXT) \
 	gmapl_nosimd-indexdb_hr.$(OBJEXT) gmapl_nosimd-oligo.$(OBJEXT) \
 	gmapl_nosimd-block.$(OBJEXT) gmapl_nosimd-chrom.$(OBJEXT) \
 	gmapl_nosimd-segmentpos.$(OBJEXT) \
@@ -786,14 +993,14 @@ am__objects_15 = gmapl_nosimd-except.$(OBJEXT) \
 	gmapl_nosimd-parserange.$(OBJEXT) \
 	gmapl_nosimd-getopt.$(OBJEXT) gmapl_nosimd-getopt1.$(OBJEXT) \
 	gmapl_nosimd-gmap.$(OBJEXT)
-dist_gmapl_nosimd_OBJECTS = $(am__objects_15)
+dist_gmapl_nosimd_OBJECTS = $(am__objects_18)
 gmapl_nosimd_OBJECTS = $(dist_gmapl_nosimd_OBJECTS)
 gmapl_nosimd_DEPENDENCIES = $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 gmapl_nosimd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_nosimd_CFLAGS) \
 	$(CFLAGS) $(gmapl_nosimd_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_16 = gmapl_sse2-except.$(OBJEXT) \
+am__objects_19 = gmapl_sse2-except.$(OBJEXT) \
 	gmapl_sse2-assert.$(OBJEXT) gmapl_sse2-mem.$(OBJEXT) \
 	gmapl_sse2-intlist.$(OBJEXT) gmapl_sse2-list.$(OBJEXT) \
 	gmapl_sse2-littleendian.$(OBJEXT) \
@@ -816,15 +1023,16 @@ am__objects_16 = gmapl_sse2-except.$(OBJEXT) \
 	gmapl_sse2-genome-write.$(OBJEXT) \
 	gmapl_sse2-bitpack64-read.$(OBJEXT) \
 	gmapl_sse2-bitpack64-readtwo.$(OBJEXT) \
-	gmapl_sse2-indexdb.$(OBJEXT) gmapl_sse2-indexdb_hr.$(OBJEXT) \
-	gmapl_sse2-oligo.$(OBJEXT) gmapl_sse2-block.$(OBJEXT) \
-	gmapl_sse2-chrom.$(OBJEXT) gmapl_sse2-segmentpos.$(OBJEXT) \
-	gmapl_sse2-chrnum.$(OBJEXT) gmapl_sse2-uinttable.$(OBJEXT) \
-	gmapl_sse2-gregion.$(OBJEXT) gmapl_sse2-match.$(OBJEXT) \
-	gmapl_sse2-matchpool.$(OBJEXT) gmapl_sse2-diagnostic.$(OBJEXT) \
-	gmapl_sse2-stage1.$(OBJEXT) gmapl_sse2-diag.$(OBJEXT) \
-	gmapl_sse2-diagpool.$(OBJEXT) gmapl_sse2-cmet.$(OBJEXT) \
-	gmapl_sse2-atoi.$(OBJEXT) gmapl_sse2-orderstat.$(OBJEXT) \
+	gmapl_sse2-merge.$(OBJEXT) gmapl_sse2-indexdb.$(OBJEXT) \
+	gmapl_sse2-indexdb_hr.$(OBJEXT) gmapl_sse2-oligo.$(OBJEXT) \
+	gmapl_sse2-block.$(OBJEXT) gmapl_sse2-chrom.$(OBJEXT) \
+	gmapl_sse2-segmentpos.$(OBJEXT) gmapl_sse2-chrnum.$(OBJEXT) \
+	gmapl_sse2-uinttable.$(OBJEXT) gmapl_sse2-gregion.$(OBJEXT) \
+	gmapl_sse2-match.$(OBJEXT) gmapl_sse2-matchpool.$(OBJEXT) \
+	gmapl_sse2-diagnostic.$(OBJEXT) gmapl_sse2-stage1.$(OBJEXT) \
+	gmapl_sse2-diag.$(OBJEXT) gmapl_sse2-diagpool.$(OBJEXT) \
+	gmapl_sse2-cmet.$(OBJEXT) gmapl_sse2-atoi.$(OBJEXT) \
+	gmapl_sse2-orderstat.$(OBJEXT) \
 	gmapl_sse2-oligoindex_hr.$(OBJEXT) gmapl_sse2-intron.$(OBJEXT) \
 	gmapl_sse2-maxent.$(OBJEXT) gmapl_sse2-maxent_hr.$(OBJEXT) \
 	gmapl_sse2-pair.$(OBJEXT) gmapl_sse2-pairpool.$(OBJEXT) \
@@ -847,14 +1055,14 @@ am__objects_16 = gmapl_sse2-except.$(OBJEXT) \
 	gmapl_sse2-chimera.$(OBJEXT) gmapl_sse2-datadir.$(OBJEXT) \
 	gmapl_sse2-parserange.$(OBJEXT) gmapl_sse2-getopt.$(OBJEXT) \
 	gmapl_sse2-getopt1.$(OBJEXT) gmapl_sse2-gmap.$(OBJEXT)
-dist_gmapl_sse2_OBJECTS = $(am__objects_16)
+dist_gmapl_sse2_OBJECTS = $(am__objects_19)
 gmapl_sse2_OBJECTS = $(dist_gmapl_sse2_OBJECTS)
 gmapl_sse2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmapl_sse2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_sse2_CFLAGS) \
 	$(CFLAGS) $(gmapl_sse2_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_17 = gmapl_sse41-except.$(OBJEXT) \
+am__objects_20 = gmapl_sse41-except.$(OBJEXT) \
 	gmapl_sse41-assert.$(OBJEXT) gmapl_sse41-mem.$(OBJEXT) \
 	gmapl_sse41-intlist.$(OBJEXT) gmapl_sse41-list.$(OBJEXT) \
 	gmapl_sse41-littleendian.$(OBJEXT) \
@@ -878,12 +1086,12 @@ am__objects_17 = gmapl_sse41-except.$(OBJEXT) \
 	gmapl_sse41-genome-write.$(OBJEXT) \
 	gmapl_sse41-bitpack64-read.$(OBJEXT) \
 	gmapl_sse41-bitpack64-readtwo.$(OBJEXT) \
-	gmapl_sse41-indexdb.$(OBJEXT) gmapl_sse41-indexdb_hr.$(OBJEXT) \
-	gmapl_sse41-oligo.$(OBJEXT) gmapl_sse41-block.$(OBJEXT) \
-	gmapl_sse41-chrom.$(OBJEXT) gmapl_sse41-segmentpos.$(OBJEXT) \
-	gmapl_sse41-chrnum.$(OBJEXT) gmapl_sse41-uinttable.$(OBJEXT) \
-	gmapl_sse41-gregion.$(OBJEXT) gmapl_sse41-match.$(OBJEXT) \
-	gmapl_sse41-matchpool.$(OBJEXT) \
+	gmapl_sse41-merge.$(OBJEXT) gmapl_sse41-indexdb.$(OBJEXT) \
+	gmapl_sse41-indexdb_hr.$(OBJEXT) gmapl_sse41-oligo.$(OBJEXT) \
+	gmapl_sse41-block.$(OBJEXT) gmapl_sse41-chrom.$(OBJEXT) \
+	gmapl_sse41-segmentpos.$(OBJEXT) gmapl_sse41-chrnum.$(OBJEXT) \
+	gmapl_sse41-uinttable.$(OBJEXT) gmapl_sse41-gregion.$(OBJEXT) \
+	gmapl_sse41-match.$(OBJEXT) gmapl_sse41-matchpool.$(OBJEXT) \
 	gmapl_sse41-diagnostic.$(OBJEXT) gmapl_sse41-stage1.$(OBJEXT) \
 	gmapl_sse41-diag.$(OBJEXT) gmapl_sse41-diagpool.$(OBJEXT) \
 	gmapl_sse41-cmet.$(OBJEXT) gmapl_sse41-atoi.$(OBJEXT) \
@@ -913,14 +1121,14 @@ am__objects_17 = gmapl_sse41-except.$(OBJEXT) \
 	gmapl_sse41-datadir.$(OBJEXT) gmapl_sse41-parserange.$(OBJEXT) \
 	gmapl_sse41-getopt.$(OBJEXT) gmapl_sse41-getopt1.$(OBJEXT) \
 	gmapl_sse41-gmap.$(OBJEXT)
-dist_gmapl_sse41_OBJECTS = $(am__objects_17)
+dist_gmapl_sse41_OBJECTS = $(am__objects_20)
 gmapl_sse41_OBJECTS = $(dist_gmapl_sse41_OBJECTS)
 gmapl_sse41_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmapl_sse41_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_sse41_CFLAGS) \
 	$(CFLAGS) $(gmapl_sse41_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_18 = gmapl_sse42-except.$(OBJEXT) \
+am__objects_21 = gmapl_sse42-except.$(OBJEXT) \
 	gmapl_sse42-assert.$(OBJEXT) gmapl_sse42-mem.$(OBJEXT) \
 	gmapl_sse42-intlist.$(OBJEXT) gmapl_sse42-list.$(OBJEXT) \
 	gmapl_sse42-littleendian.$(OBJEXT) \
@@ -944,12 +1152,12 @@ am__objects_18 = gmapl_sse42-except.$(OBJEXT) \
 	gmapl_sse42-genome-write.$(OBJEXT) \
 	gmapl_sse42-bitpack64-read.$(OBJEXT) \
 	gmapl_sse42-bitpack64-readtwo.$(OBJEXT) \
-	gmapl_sse42-indexdb.$(OBJEXT) gmapl_sse42-indexdb_hr.$(OBJEXT) \
-	gmapl_sse42-oligo.$(OBJEXT) gmapl_sse42-block.$(OBJEXT) \
-	gmapl_sse42-chrom.$(OBJEXT) gmapl_sse42-segmentpos.$(OBJEXT) \
-	gmapl_sse42-chrnum.$(OBJEXT) gmapl_sse42-uinttable.$(OBJEXT) \
-	gmapl_sse42-gregion.$(OBJEXT) gmapl_sse42-match.$(OBJEXT) \
-	gmapl_sse42-matchpool.$(OBJEXT) \
+	gmapl_sse42-merge.$(OBJEXT) gmapl_sse42-indexdb.$(OBJEXT) \
+	gmapl_sse42-indexdb_hr.$(OBJEXT) gmapl_sse42-oligo.$(OBJEXT) \
+	gmapl_sse42-block.$(OBJEXT) gmapl_sse42-chrom.$(OBJEXT) \
+	gmapl_sse42-segmentpos.$(OBJEXT) gmapl_sse42-chrnum.$(OBJEXT) \
+	gmapl_sse42-uinttable.$(OBJEXT) gmapl_sse42-gregion.$(OBJEXT) \
+	gmapl_sse42-match.$(OBJEXT) gmapl_sse42-matchpool.$(OBJEXT) \
 	gmapl_sse42-diagnostic.$(OBJEXT) gmapl_sse42-stage1.$(OBJEXT) \
 	gmapl_sse42-diag.$(OBJEXT) gmapl_sse42-diagpool.$(OBJEXT) \
 	gmapl_sse42-cmet.$(OBJEXT) gmapl_sse42-atoi.$(OBJEXT) \
@@ -979,14 +1187,14 @@ am__objects_18 = gmapl_sse42-except.$(OBJEXT) \
 	gmapl_sse42-datadir.$(OBJEXT) gmapl_sse42-parserange.$(OBJEXT) \
 	gmapl_sse42-getopt.$(OBJEXT) gmapl_sse42-getopt1.$(OBJEXT) \
 	gmapl_sse42-gmap.$(OBJEXT)
-dist_gmapl_sse42_OBJECTS = $(am__objects_18)
+dist_gmapl_sse42_OBJECTS = $(am__objects_21)
 gmapl_sse42_OBJECTS = $(dist_gmapl_sse42_OBJECTS)
 gmapl_sse42_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmapl_sse42_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_sse42_CFLAGS) \
 	$(CFLAGS) $(gmapl_sse42_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_19 = gmapl_ssse3-except.$(OBJEXT) \
+am__objects_22 = gmapl_ssse3-except.$(OBJEXT) \
 	gmapl_ssse3-assert.$(OBJEXT) gmapl_ssse3-mem.$(OBJEXT) \
 	gmapl_ssse3-intlist.$(OBJEXT) gmapl_ssse3-list.$(OBJEXT) \
 	gmapl_ssse3-littleendian.$(OBJEXT) \
@@ -1010,12 +1218,12 @@ am__objects_19 = gmapl_ssse3-except.$(OBJEXT) \
 	gmapl_ssse3-genome-write.$(OBJEXT) \
 	gmapl_ssse3-bitpack64-read.$(OBJEXT) \
 	gmapl_ssse3-bitpack64-readtwo.$(OBJEXT) \
-	gmapl_ssse3-indexdb.$(OBJEXT) gmapl_ssse3-indexdb_hr.$(OBJEXT) \
-	gmapl_ssse3-oligo.$(OBJEXT) gmapl_ssse3-block.$(OBJEXT) \
-	gmapl_ssse3-chrom.$(OBJEXT) gmapl_ssse3-segmentpos.$(OBJEXT) \
-	gmapl_ssse3-chrnum.$(OBJEXT) gmapl_ssse3-uinttable.$(OBJEXT) \
-	gmapl_ssse3-gregion.$(OBJEXT) gmapl_ssse3-match.$(OBJEXT) \
-	gmapl_ssse3-matchpool.$(OBJEXT) \
+	gmapl_ssse3-merge.$(OBJEXT) gmapl_ssse3-indexdb.$(OBJEXT) \
+	gmapl_ssse3-indexdb_hr.$(OBJEXT) gmapl_ssse3-oligo.$(OBJEXT) \
+	gmapl_ssse3-block.$(OBJEXT) gmapl_ssse3-chrom.$(OBJEXT) \
+	gmapl_ssse3-segmentpos.$(OBJEXT) gmapl_ssse3-chrnum.$(OBJEXT) \
+	gmapl_ssse3-uinttable.$(OBJEXT) gmapl_ssse3-gregion.$(OBJEXT) \
+	gmapl_ssse3-match.$(OBJEXT) gmapl_ssse3-matchpool.$(OBJEXT) \
 	gmapl_ssse3-diagnostic.$(OBJEXT) gmapl_ssse3-stage1.$(OBJEXT) \
 	gmapl_ssse3-diag.$(OBJEXT) gmapl_ssse3-diagpool.$(OBJEXT) \
 	gmapl_ssse3-cmet.$(OBJEXT) gmapl_ssse3-atoi.$(OBJEXT) \
@@ -1045,21 +1253,21 @@ am__objects_19 = gmapl_ssse3-except.$(OBJEXT) \
 	gmapl_ssse3-datadir.$(OBJEXT) gmapl_ssse3-parserange.$(OBJEXT) \
 	gmapl_ssse3-getopt.$(OBJEXT) gmapl_ssse3-getopt1.$(OBJEXT) \
 	gmapl_ssse3-gmap.$(OBJEXT)
-dist_gmapl_ssse3_OBJECTS = $(am__objects_19)
+dist_gmapl_ssse3_OBJECTS = $(am__objects_22)
 gmapl_ssse3_OBJECTS = $(dist_gmapl_ssse3_OBJECTS)
 gmapl_ssse3_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gmapl_ssse3_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_ssse3_CFLAGS) \
 	$(CFLAGS) $(gmapl_ssse3_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_20 = gsnap-cpuid.$(OBJEXT) gsnap-gsnap_select.$(OBJEXT)
-dist_gsnap_OBJECTS = $(am__objects_20)
+am__objects_23 = gsnap-cpuid.$(OBJEXT) gsnap-gsnap_select.$(OBJEXT)
+dist_gsnap_OBJECTS = $(am__objects_23)
 gsnap_OBJECTS = $(dist_gsnap_OBJECTS)
 gsnap_DEPENDENCIES =
 gsnap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_CFLAGS) $(CFLAGS) \
 	$(gsnap_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_21 = gsnap_avx2-except.$(OBJEXT) \
+am__objects_24 = gsnap_avx2-except.$(OBJEXT) \
 	gsnap_avx2-assert.$(OBJEXT) gsnap_avx2-mem.$(OBJEXT) \
 	gsnap_avx2-intlist.$(OBJEXT) gsnap_avx2-list.$(OBJEXT) \
 	gsnap_avx2-littleendian.$(OBJEXT) \
@@ -1078,10 +1286,11 @@ am__objects_21 = gsnap_avx2-except.$(OBJEXT) \
 	gsnap_avx2-genome_sites.$(OBJEXT) \
 	gsnap_avx2-bitpack64-read.$(OBJEXT) \
 	gsnap_avx2-bitpack64-readtwo.$(OBJEXT) \
-	gsnap_avx2-indexdb.$(OBJEXT) gsnap_avx2-indexdb_hr.$(OBJEXT) \
-	gsnap_avx2-oligo.$(OBJEXT) gsnap_avx2-chrom.$(OBJEXT) \
-	gsnap_avx2-segmentpos.$(OBJEXT) gsnap_avx2-chrnum.$(OBJEXT) \
-	gsnap_avx2-maxent_hr.$(OBJEXT) gsnap_avx2-samprint.$(OBJEXT) \
+	gsnap_avx2-merge.$(OBJEXT) gsnap_avx2-indexdb.$(OBJEXT) \
+	gsnap_avx2-indexdb_hr.$(OBJEXT) gsnap_avx2-oligo.$(OBJEXT) \
+	gsnap_avx2-chrom.$(OBJEXT) gsnap_avx2-segmentpos.$(OBJEXT) \
+	gsnap_avx2-chrnum.$(OBJEXT) gsnap_avx2-maxent_hr.$(OBJEXT) \
+	gsnap_avx2-cigar.$(OBJEXT) gsnap_avx2-samprint.$(OBJEXT) \
 	gsnap_avx2-mapq.$(OBJEXT) gsnap_avx2-shortread.$(OBJEXT) \
 	gsnap_avx2-substring.$(OBJEXT) gsnap_avx2-junction.$(OBJEXT) \
 	gsnap_avx2-stage3hr.$(OBJEXT) gsnap_avx2-spanningelt.$(OBJEXT) \
@@ -1107,21 +1316,101 @@ am__objects_21 = gsnap_avx2-except.$(OBJEXT) \
 	gsnap_avx2-bitpack64-access.$(OBJEXT) \
 	gsnap_avx2-bytecoding.$(OBJEXT) gsnap_avx2-univdiag.$(OBJEXT) \
 	gsnap_avx2-sedgesort.$(OBJEXT) \
-	gsnap_avx2-sarray-read.$(OBJEXT) gsnap_avx2-stage1hr.$(OBJEXT) \
+	gsnap_avx2-sarray-read.$(OBJEXT) \
+	gsnap_avx2-sarray-search.$(OBJEXT) \
+	gsnap_avx2-merge-heap.$(OBJEXT) gsnap_avx2-stage1hr.$(OBJEXT) \
 	gsnap_avx2-request.$(OBJEXT) gsnap_avx2-resulthr.$(OBJEXT) \
 	gsnap_avx2-output.$(OBJEXT) gsnap_avx2-inbuffer.$(OBJEXT) \
 	gsnap_avx2-samheader.$(OBJEXT) gsnap_avx2-outbuffer.$(OBJEXT) \
 	gsnap_avx2-datadir.$(OBJEXT) gsnap_avx2-parserange.$(OBJEXT) \
 	gsnap_avx2-getopt.$(OBJEXT) gsnap_avx2-getopt1.$(OBJEXT) \
 	gsnap_avx2-gsnap.$(OBJEXT)
-dist_gsnap_avx2_OBJECTS = $(am__objects_21)
+dist_gsnap_avx2_OBJECTS = $(am__objects_24)
 gsnap_avx2_OBJECTS = $(dist_gsnap_avx2_OBJECTS)
 gsnap_avx2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gsnap_avx2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_avx2_CFLAGS) \
 	$(CFLAGS) $(gsnap_avx2_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_22 = gsnap_nosimd-except.$(OBJEXT) \
+am__objects_25 = gsnap_avx512-except.$(OBJEXT) \
+	gsnap_avx512-assert.$(OBJEXT) gsnap_avx512-mem.$(OBJEXT) \
+	gsnap_avx512-intlist.$(OBJEXT) gsnap_avx512-list.$(OBJEXT) \
+	gsnap_avx512-littleendian.$(OBJEXT) \
+	gsnap_avx512-bigendian.$(OBJEXT) \
+	gsnap_avx512-univinterval.$(OBJEXT) \
+	gsnap_avx512-interval.$(OBJEXT) \
+	gsnap_avx512-uintlist.$(OBJEXT) \
+	gsnap_avx512-stopwatch.$(OBJEXT) \
+	gsnap_avx512-semaphore.$(OBJEXT) gsnap_avx512-access.$(OBJEXT) \
+	gsnap_avx512-filestring.$(OBJEXT) \
+	gsnap_avx512-iit-read-univ.$(OBJEXT) \
+	gsnap_avx512-iit-read.$(OBJEXT) gsnap_avx512-md5.$(OBJEXT) \
+	gsnap_avx512-bzip2.$(OBJEXT) gsnap_avx512-sequence.$(OBJEXT) \
+	gsnap_avx512-reader.$(OBJEXT) \
+	gsnap_avx512-genomicpos.$(OBJEXT) \
+	gsnap_avx512-compress.$(OBJEXT) gsnap_avx512-genome.$(OBJEXT) \
+	gsnap_avx512-popcount.$(OBJEXT) \
+	gsnap_avx512-genome128_hr.$(OBJEXT) \
+	gsnap_avx512-genome_sites.$(OBJEXT) \
+	gsnap_avx512-bitpack64-read.$(OBJEXT) \
+	gsnap_avx512-bitpack64-readtwo.$(OBJEXT) \
+	gsnap_avx512-merge.$(OBJEXT) gsnap_avx512-indexdb.$(OBJEXT) \
+	gsnap_avx512-indexdb_hr.$(OBJEXT) gsnap_avx512-oligo.$(OBJEXT) \
+	gsnap_avx512-chrom.$(OBJEXT) gsnap_avx512-segmentpos.$(OBJEXT) \
+	gsnap_avx512-chrnum.$(OBJEXT) gsnap_avx512-maxent_hr.$(OBJEXT) \
+	gsnap_avx512-cigar.$(OBJEXT) gsnap_avx512-samprint.$(OBJEXT) \
+	gsnap_avx512-mapq.$(OBJEXT) gsnap_avx512-shortread.$(OBJEXT) \
+	gsnap_avx512-substring.$(OBJEXT) \
+	gsnap_avx512-junction.$(OBJEXT) \
+	gsnap_avx512-stage3hr.$(OBJEXT) \
+	gsnap_avx512-spanningelt.$(OBJEXT) gsnap_avx512-cmet.$(OBJEXT) \
+	gsnap_avx512-atoi.$(OBJEXT) gsnap_avx512-maxent.$(OBJEXT) \
+	gsnap_avx512-pair.$(OBJEXT) gsnap_avx512-pairpool.$(OBJEXT) \
+	gsnap_avx512-diag.$(OBJEXT) gsnap_avx512-diagpool.$(OBJEXT) \
+	gsnap_avx512-orderstat.$(OBJEXT) \
+	gsnap_avx512-oligoindex_hr.$(OBJEXT) \
+	gsnap_avx512-cellpool.$(OBJEXT) gsnap_avx512-stage2.$(OBJEXT) \
+	gsnap_avx512-intron.$(OBJEXT) \
+	gsnap_avx512-boyer-moore.$(OBJEXT) \
+	gsnap_avx512-changepoint.$(OBJEXT) \
+	gsnap_avx512-pbinom.$(OBJEXT) gsnap_avx512-dynprog.$(OBJEXT) \
+	gsnap_avx512-dynprog_simd.$(OBJEXT) \
+	gsnap_avx512-dynprog_single.$(OBJEXT) \
+	gsnap_avx512-dynprog_genome.$(OBJEXT) \
+	gsnap_avx512-dynprog_cdna.$(OBJEXT) \
+	gsnap_avx512-dynprog_end.$(OBJEXT) \
+	gsnap_avx512-gbuffer.$(OBJEXT) \
+	gsnap_avx512-doublelist.$(OBJEXT) \
+	gsnap_avx512-smooth.$(OBJEXT) gsnap_avx512-chimera.$(OBJEXT) \
+	gsnap_avx512-stage3.$(OBJEXT) \
+	gsnap_avx512-splicestringpool.$(OBJEXT) \
+	gsnap_avx512-splicetrie_build.$(OBJEXT) \
+	gsnap_avx512-splicetrie.$(OBJEXT) \
+	gsnap_avx512-splice.$(OBJEXT) gsnap_avx512-indel.$(OBJEXT) \
+	gsnap_avx512-bitpack64-access.$(OBJEXT) \
+	gsnap_avx512-bytecoding.$(OBJEXT) \
+	gsnap_avx512-univdiag.$(OBJEXT) \
+	gsnap_avx512-sedgesort.$(OBJEXT) \
+	gsnap_avx512-sarray-read.$(OBJEXT) \
+	gsnap_avx512-sarray-search.$(OBJEXT) \
+	gsnap_avx512-merge-heap.$(OBJEXT) \
+	gsnap_avx512-stage1hr.$(OBJEXT) gsnap_avx512-request.$(OBJEXT) \
+	gsnap_avx512-resulthr.$(OBJEXT) gsnap_avx512-output.$(OBJEXT) \
+	gsnap_avx512-inbuffer.$(OBJEXT) \
+	gsnap_avx512-samheader.$(OBJEXT) \
+	gsnap_avx512-outbuffer.$(OBJEXT) \
+	gsnap_avx512-datadir.$(OBJEXT) \
+	gsnap_avx512-parserange.$(OBJEXT) \
+	gsnap_avx512-getopt.$(OBJEXT) gsnap_avx512-getopt1.$(OBJEXT) \
+	gsnap_avx512-gsnap.$(OBJEXT)
+dist_gsnap_avx512_OBJECTS = $(am__objects_25)
+gsnap_avx512_OBJECTS = $(dist_gsnap_avx512_OBJECTS)
+gsnap_avx512_DEPENDENCIES = $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+gsnap_avx512_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_avx512_CFLAGS) \
+	$(CFLAGS) $(gsnap_avx512_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_26 = gsnap_nosimd-except.$(OBJEXT) \
 	gsnap_nosimd-assert.$(OBJEXT) gsnap_nosimd-mem.$(OBJEXT) \
 	gsnap_nosimd-intlist.$(OBJEXT) gsnap_nosimd-list.$(OBJEXT) \
 	gsnap_nosimd-littleendian.$(OBJEXT) \
@@ -1143,12 +1432,12 @@ am__objects_22 = gsnap_nosimd-except.$(OBJEXT) \
 	gsnap_nosimd-genome_sites.$(OBJEXT) \
 	gsnap_nosimd-bitpack64-read.$(OBJEXT) \
 	gsnap_nosimd-bitpack64-readtwo.$(OBJEXT) \
-	gsnap_nosimd-indexdb.$(OBJEXT) \
+	gsnap_nosimd-merge.$(OBJEXT) gsnap_nosimd-indexdb.$(OBJEXT) \
 	gsnap_nosimd-indexdb_hr.$(OBJEXT) gsnap_nosimd-oligo.$(OBJEXT) \
 	gsnap_nosimd-chrom.$(OBJEXT) gsnap_nosimd-segmentpos.$(OBJEXT) \
 	gsnap_nosimd-chrnum.$(OBJEXT) gsnap_nosimd-maxent_hr.$(OBJEXT) \
-	gsnap_nosimd-samprint.$(OBJEXT) gsnap_nosimd-mapq.$(OBJEXT) \
-	gsnap_nosimd-shortread.$(OBJEXT) \
+	gsnap_nosimd-cigar.$(OBJEXT) gsnap_nosimd-samprint.$(OBJEXT) \
+	gsnap_nosimd-mapq.$(OBJEXT) gsnap_nosimd-shortread.$(OBJEXT) \
 	gsnap_nosimd-substring.$(OBJEXT) \
 	gsnap_nosimd-junction.$(OBJEXT) \
 	gsnap_nosimd-stage3hr.$(OBJEXT) \
@@ -1181,6 +1470,8 @@ am__objects_22 = gsnap_nosimd-except.$(OBJEXT) \
 	gsnap_nosimd-univdiag.$(OBJEXT) \
 	gsnap_nosimd-sedgesort.$(OBJEXT) \
 	gsnap_nosimd-sarray-read.$(OBJEXT) \
+	gsnap_nosimd-sarray-search.$(OBJEXT) \
+	gsnap_nosimd-merge-heap.$(OBJEXT) \
 	gsnap_nosimd-stage1hr.$(OBJEXT) gsnap_nosimd-request.$(OBJEXT) \
 	gsnap_nosimd-resulthr.$(OBJEXT) gsnap_nosimd-output.$(OBJEXT) \
 	gsnap_nosimd-inbuffer.$(OBJEXT) \
@@ -1190,14 +1481,14 @@ am__objects_22 = gsnap_nosimd-except.$(OBJEXT) \
 	gsnap_nosimd-parserange.$(OBJEXT) \
 	gsnap_nosimd-getopt.$(OBJEXT) gsnap_nosimd-getopt1.$(OBJEXT) \
 	gsnap_nosimd-gsnap.$(OBJEXT)
-dist_gsnap_nosimd_OBJECTS = $(am__objects_22)
+dist_gsnap_nosimd_OBJECTS = $(am__objects_26)
 gsnap_nosimd_OBJECTS = $(dist_gsnap_nosimd_OBJECTS)
 gsnap_nosimd_DEPENDENCIES = $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 gsnap_nosimd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_nosimd_CFLAGS) \
 	$(CFLAGS) $(gsnap_nosimd_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_23 = gsnap_sse2-except.$(OBJEXT) \
+am__objects_27 = gsnap_sse2-except.$(OBJEXT) \
 	gsnap_sse2-assert.$(OBJEXT) gsnap_sse2-mem.$(OBJEXT) \
 	gsnap_sse2-intlist.$(OBJEXT) gsnap_sse2-list.$(OBJEXT) \
 	gsnap_sse2-littleendian.$(OBJEXT) \
@@ -1216,10 +1507,11 @@ am__objects_23 = gsnap_sse2-except.$(OBJEXT) \
 	gsnap_sse2-genome_sites.$(OBJEXT) \
 	gsnap_sse2-bitpack64-read.$(OBJEXT) \
 	gsnap_sse2-bitpack64-readtwo.$(OBJEXT) \
-	gsnap_sse2-indexdb.$(OBJEXT) gsnap_sse2-indexdb_hr.$(OBJEXT) \
-	gsnap_sse2-oligo.$(OBJEXT) gsnap_sse2-chrom.$(OBJEXT) \
-	gsnap_sse2-segmentpos.$(OBJEXT) gsnap_sse2-chrnum.$(OBJEXT) \
-	gsnap_sse2-maxent_hr.$(OBJEXT) gsnap_sse2-samprint.$(OBJEXT) \
+	gsnap_sse2-merge.$(OBJEXT) gsnap_sse2-indexdb.$(OBJEXT) \
+	gsnap_sse2-indexdb_hr.$(OBJEXT) gsnap_sse2-oligo.$(OBJEXT) \
+	gsnap_sse2-chrom.$(OBJEXT) gsnap_sse2-segmentpos.$(OBJEXT) \
+	gsnap_sse2-chrnum.$(OBJEXT) gsnap_sse2-maxent_hr.$(OBJEXT) \
+	gsnap_sse2-cigar.$(OBJEXT) gsnap_sse2-samprint.$(OBJEXT) \
 	gsnap_sse2-mapq.$(OBJEXT) gsnap_sse2-shortread.$(OBJEXT) \
 	gsnap_sse2-substring.$(OBJEXT) gsnap_sse2-junction.$(OBJEXT) \
 	gsnap_sse2-stage3hr.$(OBJEXT) gsnap_sse2-spanningelt.$(OBJEXT) \
@@ -1245,21 +1537,23 @@ am__objects_23 = gsnap_sse2-except.$(OBJEXT) \
 	gsnap_sse2-bitpack64-access.$(OBJEXT) \
 	gsnap_sse2-bytecoding.$(OBJEXT) gsnap_sse2-univdiag.$(OBJEXT) \
 	gsnap_sse2-sedgesort.$(OBJEXT) \
-	gsnap_sse2-sarray-read.$(OBJEXT) gsnap_sse2-stage1hr.$(OBJEXT) \
+	gsnap_sse2-sarray-read.$(OBJEXT) \
+	gsnap_sse2-sarray-search.$(OBJEXT) \
+	gsnap_sse2-merge-heap.$(OBJEXT) gsnap_sse2-stage1hr.$(OBJEXT) \
 	gsnap_sse2-request.$(OBJEXT) gsnap_sse2-resulthr.$(OBJEXT) \
 	gsnap_sse2-output.$(OBJEXT) gsnap_sse2-inbuffer.$(OBJEXT) \
 	gsnap_sse2-samheader.$(OBJEXT) gsnap_sse2-outbuffer.$(OBJEXT) \
 	gsnap_sse2-datadir.$(OBJEXT) gsnap_sse2-parserange.$(OBJEXT) \
 	gsnap_sse2-getopt.$(OBJEXT) gsnap_sse2-getopt1.$(OBJEXT) \
 	gsnap_sse2-gsnap.$(OBJEXT)
-dist_gsnap_sse2_OBJECTS = $(am__objects_23)
+dist_gsnap_sse2_OBJECTS = $(am__objects_27)
 gsnap_sse2_OBJECTS = $(dist_gsnap_sse2_OBJECTS)
 gsnap_sse2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gsnap_sse2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_sse2_CFLAGS) \
 	$(CFLAGS) $(gsnap_sse2_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_24 = gsnap_sse41-except.$(OBJEXT) \
+am__objects_28 = gsnap_sse41-except.$(OBJEXT) \
 	gsnap_sse41-assert.$(OBJEXT) gsnap_sse41-mem.$(OBJEXT) \
 	gsnap_sse41-intlist.$(OBJEXT) gsnap_sse41-list.$(OBJEXT) \
 	gsnap_sse41-littleendian.$(OBJEXT) \
@@ -1279,10 +1573,11 @@ am__objects_24 = gsnap_sse41-except.$(OBJEXT) \
 	gsnap_sse41-genome_sites.$(OBJEXT) \
 	gsnap_sse41-bitpack64-read.$(OBJEXT) \
 	gsnap_sse41-bitpack64-readtwo.$(OBJEXT) \
-	gsnap_sse41-indexdb.$(OBJEXT) gsnap_sse41-indexdb_hr.$(OBJEXT) \
-	gsnap_sse41-oligo.$(OBJEXT) gsnap_sse41-chrom.$(OBJEXT) \
-	gsnap_sse41-segmentpos.$(OBJEXT) gsnap_sse41-chrnum.$(OBJEXT) \
-	gsnap_sse41-maxent_hr.$(OBJEXT) gsnap_sse41-samprint.$(OBJEXT) \
+	gsnap_sse41-merge.$(OBJEXT) gsnap_sse41-indexdb.$(OBJEXT) \
+	gsnap_sse41-indexdb_hr.$(OBJEXT) gsnap_sse41-oligo.$(OBJEXT) \
+	gsnap_sse41-chrom.$(OBJEXT) gsnap_sse41-segmentpos.$(OBJEXT) \
+	gsnap_sse41-chrnum.$(OBJEXT) gsnap_sse41-maxent_hr.$(OBJEXT) \
+	gsnap_sse41-cigar.$(OBJEXT) gsnap_sse41-samprint.$(OBJEXT) \
 	gsnap_sse41-mapq.$(OBJEXT) gsnap_sse41-shortread.$(OBJEXT) \
 	gsnap_sse41-substring.$(OBJEXT) gsnap_sse41-junction.$(OBJEXT) \
 	gsnap_sse41-stage3hr.$(OBJEXT) \
@@ -1312,20 +1607,22 @@ am__objects_24 = gsnap_sse41-except.$(OBJEXT) \
 	gsnap_sse41-bytecoding.$(OBJEXT) \
 	gsnap_sse41-univdiag.$(OBJEXT) gsnap_sse41-sedgesort.$(OBJEXT) \
 	gsnap_sse41-sarray-read.$(OBJEXT) \
+	gsnap_sse41-sarray-search.$(OBJEXT) \
+	gsnap_sse41-merge-heap.$(OBJEXT) \
 	gsnap_sse41-stage1hr.$(OBJEXT) gsnap_sse41-request.$(OBJEXT) \
 	gsnap_sse41-resulthr.$(OBJEXT) gsnap_sse41-output.$(OBJEXT) \
 	gsnap_sse41-inbuffer.$(OBJEXT) gsnap_sse41-samheader.$(OBJEXT) \
 	gsnap_sse41-outbuffer.$(OBJEXT) gsnap_sse41-datadir.$(OBJEXT) \
 	gsnap_sse41-parserange.$(OBJEXT) gsnap_sse41-getopt.$(OBJEXT) \
 	gsnap_sse41-getopt1.$(OBJEXT) gsnap_sse41-gsnap.$(OBJEXT)
-dist_gsnap_sse41_OBJECTS = $(am__objects_24)
+dist_gsnap_sse41_OBJECTS = $(am__objects_28)
 gsnap_sse41_OBJECTS = $(dist_gsnap_sse41_OBJECTS)
 gsnap_sse41_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gsnap_sse41_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_sse41_CFLAGS) \
 	$(CFLAGS) $(gsnap_sse41_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_25 = gsnap_sse42-except.$(OBJEXT) \
+am__objects_29 = gsnap_sse42-except.$(OBJEXT) \
 	gsnap_sse42-assert.$(OBJEXT) gsnap_sse42-mem.$(OBJEXT) \
 	gsnap_sse42-intlist.$(OBJEXT) gsnap_sse42-list.$(OBJEXT) \
 	gsnap_sse42-littleendian.$(OBJEXT) \
@@ -1345,10 +1642,11 @@ am__objects_25 = gsnap_sse42-except.$(OBJEXT) \
 	gsnap_sse42-genome_sites.$(OBJEXT) \
 	gsnap_sse42-bitpack64-read.$(OBJEXT) \
 	gsnap_sse42-bitpack64-readtwo.$(OBJEXT) \
-	gsnap_sse42-indexdb.$(OBJEXT) gsnap_sse42-indexdb_hr.$(OBJEXT) \
-	gsnap_sse42-oligo.$(OBJEXT) gsnap_sse42-chrom.$(OBJEXT) \
-	gsnap_sse42-segmentpos.$(OBJEXT) gsnap_sse42-chrnum.$(OBJEXT) \
-	gsnap_sse42-maxent_hr.$(OBJEXT) gsnap_sse42-samprint.$(OBJEXT) \
+	gsnap_sse42-merge.$(OBJEXT) gsnap_sse42-indexdb.$(OBJEXT) \
+	gsnap_sse42-indexdb_hr.$(OBJEXT) gsnap_sse42-oligo.$(OBJEXT) \
+	gsnap_sse42-chrom.$(OBJEXT) gsnap_sse42-segmentpos.$(OBJEXT) \
+	gsnap_sse42-chrnum.$(OBJEXT) gsnap_sse42-maxent_hr.$(OBJEXT) \
+	gsnap_sse42-cigar.$(OBJEXT) gsnap_sse42-samprint.$(OBJEXT) \
 	gsnap_sse42-mapq.$(OBJEXT) gsnap_sse42-shortread.$(OBJEXT) \
 	gsnap_sse42-substring.$(OBJEXT) gsnap_sse42-junction.$(OBJEXT) \
 	gsnap_sse42-stage3hr.$(OBJEXT) \
@@ -1378,20 +1676,22 @@ am__objects_25 = gsnap_sse42-except.$(OBJEXT) \
 	gsnap_sse42-bytecoding.$(OBJEXT) \
 	gsnap_sse42-univdiag.$(OBJEXT) gsnap_sse42-sedgesort.$(OBJEXT) \
 	gsnap_sse42-sarray-read.$(OBJEXT) \
+	gsnap_sse42-sarray-search.$(OBJEXT) \
+	gsnap_sse42-merge-heap.$(OBJEXT) \
 	gsnap_sse42-stage1hr.$(OBJEXT) gsnap_sse42-request.$(OBJEXT) \
 	gsnap_sse42-resulthr.$(OBJEXT) gsnap_sse42-output.$(OBJEXT) \
 	gsnap_sse42-inbuffer.$(OBJEXT) gsnap_sse42-samheader.$(OBJEXT) \
 	gsnap_sse42-outbuffer.$(OBJEXT) gsnap_sse42-datadir.$(OBJEXT) \
 	gsnap_sse42-parserange.$(OBJEXT) gsnap_sse42-getopt.$(OBJEXT) \
 	gsnap_sse42-getopt1.$(OBJEXT) gsnap_sse42-gsnap.$(OBJEXT)
-dist_gsnap_sse42_OBJECTS = $(am__objects_25)
+dist_gsnap_sse42_OBJECTS = $(am__objects_29)
 gsnap_sse42_OBJECTS = $(dist_gsnap_sse42_OBJECTS)
 gsnap_sse42_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gsnap_sse42_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_sse42_CFLAGS) \
 	$(CFLAGS) $(gsnap_sse42_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_26 = gsnap_ssse3-except.$(OBJEXT) \
+am__objects_30 = gsnap_ssse3-except.$(OBJEXT) \
 	gsnap_ssse3-assert.$(OBJEXT) gsnap_ssse3-mem.$(OBJEXT) \
 	gsnap_ssse3-intlist.$(OBJEXT) gsnap_ssse3-list.$(OBJEXT) \
 	gsnap_ssse3-littleendian.$(OBJEXT) \
@@ -1411,10 +1711,11 @@ am__objects_26 = gsnap_ssse3-except.$(OBJEXT) \
 	gsnap_ssse3-genome_sites.$(OBJEXT) \
 	gsnap_ssse3-bitpack64-read.$(OBJEXT) \
 	gsnap_ssse3-bitpack64-readtwo.$(OBJEXT) \
-	gsnap_ssse3-indexdb.$(OBJEXT) gsnap_ssse3-indexdb_hr.$(OBJEXT) \
-	gsnap_ssse3-oligo.$(OBJEXT) gsnap_ssse3-chrom.$(OBJEXT) \
-	gsnap_ssse3-segmentpos.$(OBJEXT) gsnap_ssse3-chrnum.$(OBJEXT) \
-	gsnap_ssse3-maxent_hr.$(OBJEXT) gsnap_ssse3-samprint.$(OBJEXT) \
+	gsnap_ssse3-merge.$(OBJEXT) gsnap_ssse3-indexdb.$(OBJEXT) \
+	gsnap_ssse3-indexdb_hr.$(OBJEXT) gsnap_ssse3-oligo.$(OBJEXT) \
+	gsnap_ssse3-chrom.$(OBJEXT) gsnap_ssse3-segmentpos.$(OBJEXT) \
+	gsnap_ssse3-chrnum.$(OBJEXT) gsnap_ssse3-maxent_hr.$(OBJEXT) \
+	gsnap_ssse3-cigar.$(OBJEXT) gsnap_ssse3-samprint.$(OBJEXT) \
 	gsnap_ssse3-mapq.$(OBJEXT) gsnap_ssse3-shortread.$(OBJEXT) \
 	gsnap_ssse3-substring.$(OBJEXT) gsnap_ssse3-junction.$(OBJEXT) \
 	gsnap_ssse3-stage3hr.$(OBJEXT) \
@@ -1444,27 +1745,29 @@ am__objects_26 = gsnap_ssse3-except.$(OBJEXT) \
 	gsnap_ssse3-bytecoding.$(OBJEXT) \
 	gsnap_ssse3-univdiag.$(OBJEXT) gsnap_ssse3-sedgesort.$(OBJEXT) \
 	gsnap_ssse3-sarray-read.$(OBJEXT) \
+	gsnap_ssse3-sarray-search.$(OBJEXT) \
+	gsnap_ssse3-merge-heap.$(OBJEXT) \
 	gsnap_ssse3-stage1hr.$(OBJEXT) gsnap_ssse3-request.$(OBJEXT) \
 	gsnap_ssse3-resulthr.$(OBJEXT) gsnap_ssse3-output.$(OBJEXT) \
 	gsnap_ssse3-inbuffer.$(OBJEXT) gsnap_ssse3-samheader.$(OBJEXT) \
 	gsnap_ssse3-outbuffer.$(OBJEXT) gsnap_ssse3-datadir.$(OBJEXT) \
 	gsnap_ssse3-parserange.$(OBJEXT) gsnap_ssse3-getopt.$(OBJEXT) \
 	gsnap_ssse3-getopt1.$(OBJEXT) gsnap_ssse3-gsnap.$(OBJEXT)
-dist_gsnap_ssse3_OBJECTS = $(am__objects_26)
+dist_gsnap_ssse3_OBJECTS = $(am__objects_30)
 gsnap_ssse3_OBJECTS = $(dist_gsnap_ssse3_OBJECTS)
 gsnap_ssse3_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gsnap_ssse3_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_ssse3_CFLAGS) \
 	$(CFLAGS) $(gsnap_ssse3_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_27 = gsnapl-cpuid.$(OBJEXT) gsnapl-gsnapl_select.$(OBJEXT)
-dist_gsnapl_OBJECTS = $(am__objects_27)
+am__objects_31 = gsnapl-cpuid.$(OBJEXT) gsnapl-gsnapl_select.$(OBJEXT)
+dist_gsnapl_OBJECTS = $(am__objects_31)
 gsnapl_OBJECTS = $(dist_gsnapl_OBJECTS)
 gsnapl_DEPENDENCIES =
 gsnapl_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_CFLAGS) $(CFLAGS) \
 	$(gsnapl_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_28 = gsnapl_avx2-except.$(OBJEXT) \
+am__objects_32 = gsnapl_avx2-except.$(OBJEXT) \
 	gsnapl_avx2-assert.$(OBJEXT) gsnapl_avx2-mem.$(OBJEXT) \
 	gsnapl_avx2-intlist.$(OBJEXT) gsnapl_avx2-list.$(OBJEXT) \
 	gsnapl_avx2-littleendian.$(OBJEXT) \
@@ -1485,10 +1788,11 @@ am__objects_28 = gsnapl_avx2-except.$(OBJEXT) \
 	gsnapl_avx2-genome_sites.$(OBJEXT) \
 	gsnapl_avx2-bitpack64-read.$(OBJEXT) \
 	gsnapl_avx2-bitpack64-readtwo.$(OBJEXT) \
-	gsnapl_avx2-indexdb.$(OBJEXT) gsnapl_avx2-indexdb_hr.$(OBJEXT) \
-	gsnapl_avx2-oligo.$(OBJEXT) gsnapl_avx2-chrom.$(OBJEXT) \
-	gsnapl_avx2-segmentpos.$(OBJEXT) gsnapl_avx2-chrnum.$(OBJEXT) \
-	gsnapl_avx2-maxent_hr.$(OBJEXT) gsnapl_avx2-samprint.$(OBJEXT) \
+	gsnapl_avx2-merge.$(OBJEXT) gsnapl_avx2-indexdb.$(OBJEXT) \
+	gsnapl_avx2-indexdb_hr.$(OBJEXT) gsnapl_avx2-oligo.$(OBJEXT) \
+	gsnapl_avx2-chrom.$(OBJEXT) gsnapl_avx2-segmentpos.$(OBJEXT) \
+	gsnapl_avx2-chrnum.$(OBJEXT) gsnapl_avx2-maxent_hr.$(OBJEXT) \
+	gsnapl_avx2-cigar.$(OBJEXT) gsnapl_avx2-samprint.$(OBJEXT) \
 	gsnapl_avx2-mapq.$(OBJEXT) gsnapl_avx2-shortread.$(OBJEXT) \
 	gsnapl_avx2-substring.$(OBJEXT) gsnapl_avx2-junction.$(OBJEXT) \
 	gsnapl_avx2-stage3hr.$(OBJEXT) \
@@ -1515,20 +1819,102 @@ am__objects_28 = gsnapl_avx2-except.$(OBJEXT) \
 	gsnapl_avx2-splicetrie.$(OBJEXT) gsnapl_avx2-splice.$(OBJEXT) \
 	gsnapl_avx2-indel.$(OBJEXT) \
 	gsnapl_avx2-bitpack64-access.$(OBJEXT) \
+	gsnapl_avx2-merge-heap.$(OBJEXT) \
 	gsnapl_avx2-stage1hr.$(OBJEXT) gsnapl_avx2-request.$(OBJEXT) \
 	gsnapl_avx2-resulthr.$(OBJEXT) gsnapl_avx2-output.$(OBJEXT) \
 	gsnapl_avx2-inbuffer.$(OBJEXT) gsnapl_avx2-samheader.$(OBJEXT) \
 	gsnapl_avx2-outbuffer.$(OBJEXT) gsnapl_avx2-datadir.$(OBJEXT) \
 	gsnapl_avx2-parserange.$(OBJEXT) gsnapl_avx2-getopt.$(OBJEXT) \
 	gsnapl_avx2-getopt1.$(OBJEXT) gsnapl_avx2-gsnap.$(OBJEXT)
-dist_gsnapl_avx2_OBJECTS = $(am__objects_28)
+dist_gsnapl_avx2_OBJECTS = $(am__objects_32)
 gsnapl_avx2_OBJECTS = $(dist_gsnapl_avx2_OBJECTS)
 gsnapl_avx2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gsnapl_avx2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_avx2_CFLAGS) \
 	$(CFLAGS) $(gsnapl_avx2_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_29 = gsnapl_nosimd-except.$(OBJEXT) \
+am__objects_33 = gsnapl_avx512-except.$(OBJEXT) \
+	gsnapl_avx512-assert.$(OBJEXT) gsnapl_avx512-mem.$(OBJEXT) \
+	gsnapl_avx512-intlist.$(OBJEXT) gsnapl_avx512-list.$(OBJEXT) \
+	gsnapl_avx512-littleendian.$(OBJEXT) \
+	gsnapl_avx512-bigendian.$(OBJEXT) \
+	gsnapl_avx512-univinterval.$(OBJEXT) \
+	gsnapl_avx512-interval.$(OBJEXT) \
+	gsnapl_avx512-uintlist.$(OBJEXT) \
+	gsnapl_avx512-uint8list.$(OBJEXT) \
+	gsnapl_avx512-stopwatch.$(OBJEXT) \
+	gsnapl_avx512-semaphore.$(OBJEXT) \
+	gsnapl_avx512-access.$(OBJEXT) \
+	gsnapl_avx512-filestring.$(OBJEXT) \
+	gsnapl_avx512-iit-read-univ.$(OBJEXT) \
+	gsnapl_avx512-iit-read.$(OBJEXT) gsnapl_avx512-md5.$(OBJEXT) \
+	gsnapl_avx512-bzip2.$(OBJEXT) gsnapl_avx512-sequence.$(OBJEXT) \
+	gsnapl_avx512-reader.$(OBJEXT) \
+	gsnapl_avx512-genomicpos.$(OBJEXT) \
+	gsnapl_avx512-compress.$(OBJEXT) \
+	gsnapl_avx512-genome.$(OBJEXT) \
+	gsnapl_avx512-popcount.$(OBJEXT) \
+	gsnapl_avx512-genome128_hr.$(OBJEXT) \
+	gsnapl_avx512-genome_sites.$(OBJEXT) \
+	gsnapl_avx512-bitpack64-read.$(OBJEXT) \
+	gsnapl_avx512-bitpack64-readtwo.$(OBJEXT) \
+	gsnapl_avx512-merge.$(OBJEXT) gsnapl_avx512-indexdb.$(OBJEXT) \
+	gsnapl_avx512-indexdb_hr.$(OBJEXT) \
+	gsnapl_avx512-oligo.$(OBJEXT) gsnapl_avx512-chrom.$(OBJEXT) \
+	gsnapl_avx512-segmentpos.$(OBJEXT) \
+	gsnapl_avx512-chrnum.$(OBJEXT) \
+	gsnapl_avx512-maxent_hr.$(OBJEXT) \
+	gsnapl_avx512-cigar.$(OBJEXT) gsnapl_avx512-samprint.$(OBJEXT) \
+	gsnapl_avx512-mapq.$(OBJEXT) gsnapl_avx512-shortread.$(OBJEXT) \
+	gsnapl_avx512-substring.$(OBJEXT) \
+	gsnapl_avx512-junction.$(OBJEXT) \
+	gsnapl_avx512-stage3hr.$(OBJEXT) \
+	gsnapl_avx512-spanningelt.$(OBJEXT) \
+	gsnapl_avx512-cmet.$(OBJEXT) gsnapl_avx512-atoi.$(OBJEXT) \
+	gsnapl_avx512-maxent.$(OBJEXT) gsnapl_avx512-pair.$(OBJEXT) \
+	gsnapl_avx512-pairpool.$(OBJEXT) gsnapl_avx512-diag.$(OBJEXT) \
+	gsnapl_avx512-diagpool.$(OBJEXT) \
+	gsnapl_avx512-orderstat.$(OBJEXT) \
+	gsnapl_avx512-oligoindex_hr.$(OBJEXT) \
+	gsnapl_avx512-cellpool.$(OBJEXT) \
+	gsnapl_avx512-stage2.$(OBJEXT) gsnapl_avx512-intron.$(OBJEXT) \
+	gsnapl_avx512-boyer-moore.$(OBJEXT) \
+	gsnapl_avx512-changepoint.$(OBJEXT) \
+	gsnapl_avx512-pbinom.$(OBJEXT) gsnapl_avx512-dynprog.$(OBJEXT) \
+	gsnapl_avx512-dynprog_simd.$(OBJEXT) \
+	gsnapl_avx512-dynprog_single.$(OBJEXT) \
+	gsnapl_avx512-dynprog_genome.$(OBJEXT) \
+	gsnapl_avx512-dynprog_cdna.$(OBJEXT) \
+	gsnapl_avx512-dynprog_end.$(OBJEXT) \
+	gsnapl_avx512-gbuffer.$(OBJEXT) \
+	gsnapl_avx512-doublelist.$(OBJEXT) \
+	gsnapl_avx512-smooth.$(OBJEXT) gsnapl_avx512-chimera.$(OBJEXT) \
+	gsnapl_avx512-stage3.$(OBJEXT) \
+	gsnapl_avx512-splicestringpool.$(OBJEXT) \
+	gsnapl_avx512-splicetrie_build.$(OBJEXT) \
+	gsnapl_avx512-splicetrie.$(OBJEXT) \
+	gsnapl_avx512-splice.$(OBJEXT) gsnapl_avx512-indel.$(OBJEXT) \
+	gsnapl_avx512-bitpack64-access.$(OBJEXT) \
+	gsnapl_avx512-merge-heap.$(OBJEXT) \
+	gsnapl_avx512-stage1hr.$(OBJEXT) \
+	gsnapl_avx512-request.$(OBJEXT) \
+	gsnapl_avx512-resulthr.$(OBJEXT) \
+	gsnapl_avx512-output.$(OBJEXT) \
+	gsnapl_avx512-inbuffer.$(OBJEXT) \
+	gsnapl_avx512-samheader.$(OBJEXT) \
+	gsnapl_avx512-outbuffer.$(OBJEXT) \
+	gsnapl_avx512-datadir.$(OBJEXT) \
+	gsnapl_avx512-parserange.$(OBJEXT) \
+	gsnapl_avx512-getopt.$(OBJEXT) gsnapl_avx512-getopt1.$(OBJEXT) \
+	gsnapl_avx512-gsnap.$(OBJEXT)
+dist_gsnapl_avx512_OBJECTS = $(am__objects_33)
+gsnapl_avx512_OBJECTS = $(dist_gsnapl_avx512_OBJECTS)
+gsnapl_avx512_DEPENDENCIES = $(am__DEPENDENCIES_1) \
+	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
+gsnapl_avx512_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_avx512_CFLAGS) \
+	$(CFLAGS) $(gsnapl_avx512_LDFLAGS) $(LDFLAGS) -o $@
+am__objects_34 = gsnapl_nosimd-except.$(OBJEXT) \
 	gsnapl_nosimd-assert.$(OBJEXT) gsnapl_nosimd-mem.$(OBJEXT) \
 	gsnapl_nosimd-intlist.$(OBJEXT) gsnapl_nosimd-list.$(OBJEXT) \
 	gsnapl_nosimd-littleendian.$(OBJEXT) \
@@ -1553,14 +1939,14 @@ am__objects_29 = gsnapl_nosimd-except.$(OBJEXT) \
 	gsnapl_nosimd-genome_sites.$(OBJEXT) \
 	gsnapl_nosimd-bitpack64-read.$(OBJEXT) \
 	gsnapl_nosimd-bitpack64-readtwo.$(OBJEXT) \
-	gsnapl_nosimd-indexdb.$(OBJEXT) \
+	gsnapl_nosimd-merge.$(OBJEXT) gsnapl_nosimd-indexdb.$(OBJEXT) \
 	gsnapl_nosimd-indexdb_hr.$(OBJEXT) \
 	gsnapl_nosimd-oligo.$(OBJEXT) gsnapl_nosimd-chrom.$(OBJEXT) \
 	gsnapl_nosimd-segmentpos.$(OBJEXT) \
 	gsnapl_nosimd-chrnum.$(OBJEXT) \
 	gsnapl_nosimd-maxent_hr.$(OBJEXT) \
-	gsnapl_nosimd-samprint.$(OBJEXT) gsnapl_nosimd-mapq.$(OBJEXT) \
-	gsnapl_nosimd-shortread.$(OBJEXT) \
+	gsnapl_nosimd-cigar.$(OBJEXT) gsnapl_nosimd-samprint.$(OBJEXT) \
+	gsnapl_nosimd-mapq.$(OBJEXT) gsnapl_nosimd-shortread.$(OBJEXT) \
 	gsnapl_nosimd-substring.$(OBJEXT) \
 	gsnapl_nosimd-junction.$(OBJEXT) \
 	gsnapl_nosimd-stage3hr.$(OBJEXT) \
@@ -1590,6 +1976,7 @@ am__objects_29 = gsnapl_nosimd-except.$(OBJEXT) \
 	gsnapl_nosimd-splicetrie.$(OBJEXT) \
 	gsnapl_nosimd-splice.$(OBJEXT) gsnapl_nosimd-indel.$(OBJEXT) \
 	gsnapl_nosimd-bitpack64-access.$(OBJEXT) \
+	gsnapl_nosimd-merge-heap.$(OBJEXT) \
 	gsnapl_nosimd-stage1hr.$(OBJEXT) \
 	gsnapl_nosimd-request.$(OBJEXT) \
 	gsnapl_nosimd-resulthr.$(OBJEXT) \
@@ -1601,14 +1988,14 @@ am__objects_29 = gsnapl_nosimd-except.$(OBJEXT) \
 	gsnapl_nosimd-parserange.$(OBJEXT) \
 	gsnapl_nosimd-getopt.$(OBJEXT) gsnapl_nosimd-getopt1.$(OBJEXT) \
 	gsnapl_nosimd-gsnap.$(OBJEXT)
-dist_gsnapl_nosimd_OBJECTS = $(am__objects_29)
+dist_gsnapl_nosimd_OBJECTS = $(am__objects_34)
 gsnapl_nosimd_OBJECTS = $(dist_gsnapl_nosimd_OBJECTS)
 gsnapl_nosimd_DEPENDENCIES = $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 gsnapl_nosimd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_nosimd_CFLAGS) \
 	$(CFLAGS) $(gsnapl_nosimd_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_30 = gsnapl_sse2-except.$(OBJEXT) \
+am__objects_35 = gsnapl_sse2-except.$(OBJEXT) \
 	gsnapl_sse2-assert.$(OBJEXT) gsnapl_sse2-mem.$(OBJEXT) \
 	gsnapl_sse2-intlist.$(OBJEXT) gsnapl_sse2-list.$(OBJEXT) \
 	gsnapl_sse2-littleendian.$(OBJEXT) \
@@ -1629,10 +2016,11 @@ am__objects_30 = gsnapl_sse2-except.$(OBJEXT) \
 	gsnapl_sse2-genome_sites.$(OBJEXT) \
 	gsnapl_sse2-bitpack64-read.$(OBJEXT) \
 	gsnapl_sse2-bitpack64-readtwo.$(OBJEXT) \
-	gsnapl_sse2-indexdb.$(OBJEXT) gsnapl_sse2-indexdb_hr.$(OBJEXT) \
-	gsnapl_sse2-oligo.$(OBJEXT) gsnapl_sse2-chrom.$(OBJEXT) \
-	gsnapl_sse2-segmentpos.$(OBJEXT) gsnapl_sse2-chrnum.$(OBJEXT) \
-	gsnapl_sse2-maxent_hr.$(OBJEXT) gsnapl_sse2-samprint.$(OBJEXT) \
+	gsnapl_sse2-merge.$(OBJEXT) gsnapl_sse2-indexdb.$(OBJEXT) \
+	gsnapl_sse2-indexdb_hr.$(OBJEXT) gsnapl_sse2-oligo.$(OBJEXT) \
+	gsnapl_sse2-chrom.$(OBJEXT) gsnapl_sse2-segmentpos.$(OBJEXT) \
+	gsnapl_sse2-chrnum.$(OBJEXT) gsnapl_sse2-maxent_hr.$(OBJEXT) \
+	gsnapl_sse2-cigar.$(OBJEXT) gsnapl_sse2-samprint.$(OBJEXT) \
 	gsnapl_sse2-mapq.$(OBJEXT) gsnapl_sse2-shortread.$(OBJEXT) \
 	gsnapl_sse2-substring.$(OBJEXT) gsnapl_sse2-junction.$(OBJEXT) \
 	gsnapl_sse2-stage3hr.$(OBJEXT) \
@@ -1659,20 +2047,21 @@ am__objects_30 = gsnapl_sse2-except.$(OBJEXT) \
 	gsnapl_sse2-splicetrie.$(OBJEXT) gsnapl_sse2-splice.$(OBJEXT) \
 	gsnapl_sse2-indel.$(OBJEXT) \
 	gsnapl_sse2-bitpack64-access.$(OBJEXT) \
+	gsnapl_sse2-merge-heap.$(OBJEXT) \
 	gsnapl_sse2-stage1hr.$(OBJEXT) gsnapl_sse2-request.$(OBJEXT) \
 	gsnapl_sse2-resulthr.$(OBJEXT) gsnapl_sse2-output.$(OBJEXT) \
 	gsnapl_sse2-inbuffer.$(OBJEXT) gsnapl_sse2-samheader.$(OBJEXT) \
 	gsnapl_sse2-outbuffer.$(OBJEXT) gsnapl_sse2-datadir.$(OBJEXT) \
 	gsnapl_sse2-parserange.$(OBJEXT) gsnapl_sse2-getopt.$(OBJEXT) \
 	gsnapl_sse2-getopt1.$(OBJEXT) gsnapl_sse2-gsnap.$(OBJEXT)
-dist_gsnapl_sse2_OBJECTS = $(am__objects_30)
+dist_gsnapl_sse2_OBJECTS = $(am__objects_35)
 gsnapl_sse2_OBJECTS = $(dist_gsnapl_sse2_OBJECTS)
 gsnapl_sse2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 gsnapl_sse2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_sse2_CFLAGS) \
 	$(CFLAGS) $(gsnapl_sse2_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_31 = gsnapl_sse41-except.$(OBJEXT) \
+am__objects_36 = gsnapl_sse41-except.$(OBJEXT) \
 	gsnapl_sse41-assert.$(OBJEXT) gsnapl_sse41-mem.$(OBJEXT) \
 	gsnapl_sse41-intlist.$(OBJEXT) gsnapl_sse41-list.$(OBJEXT) \
 	gsnapl_sse41-littleendian.$(OBJEXT) \
@@ -1695,12 +2084,12 @@ am__objects_31 = gsnapl_sse41-except.$(OBJEXT) \
 	gsnapl_sse41-genome_sites.$(OBJEXT) \
 	gsnapl_sse41-bitpack64-read.$(OBJEXT) \
 	gsnapl_sse41-bitpack64-readtwo.$(OBJEXT) \
-	gsnapl_sse41-indexdb.$(OBJEXT) \
+	gsnapl_sse41-merge.$(OBJEXT) gsnapl_sse41-indexdb.$(OBJEXT) \
 	gsnapl_sse41-indexdb_hr.$(OBJEXT) gsnapl_sse41-oligo.$(OBJEXT) \
 	gsnapl_sse41-chrom.$(OBJEXT) gsnapl_sse41-segmentpos.$(OBJEXT) \
 	gsnapl_sse41-chrnum.$(OBJEXT) gsnapl_sse41-maxent_hr.$(OBJEXT) \
-	gsnapl_sse41-samprint.$(OBJEXT) gsnapl_sse41-mapq.$(OBJEXT) \
-	gsnapl_sse41-shortread.$(OBJEXT) \
+	gsnapl_sse41-cigar.$(OBJEXT) gsnapl_sse41-samprint.$(OBJEXT) \
+	gsnapl_sse41-mapq.$(OBJEXT) gsnapl_sse41-shortread.$(OBJEXT) \
 	gsnapl_sse41-substring.$(OBJEXT) \
 	gsnapl_sse41-junction.$(OBJEXT) \
 	gsnapl_sse41-stage3hr.$(OBJEXT) \
@@ -1729,6 +2118,7 @@ am__objects_31 = gsnapl_sse41-except.$(OBJEXT) \
 	gsnapl_sse41-splicetrie.$(OBJEXT) \
 	gsnapl_sse41-splice.$(OBJEXT) gsnapl_sse41-indel.$(OBJEXT) \
 	gsnapl_sse41-bitpack64-access.$(OBJEXT) \
+	gsnapl_sse41-merge-heap.$(OBJEXT) \
 	gsnapl_sse41-stage1hr.$(OBJEXT) gsnapl_sse41-request.$(OBJEXT) \
 	gsnapl_sse41-resulthr.$(OBJEXT) gsnapl_sse41-output.$(OBJEXT) \
 	gsnapl_sse41-inbuffer.$(OBJEXT) \
@@ -1738,14 +2128,14 @@ am__objects_31 = gsnapl_sse41-except.$(OBJEXT) \
 	gsnapl_sse41-parserange.$(OBJEXT) \
 	gsnapl_sse41-getopt.$(OBJEXT) gsnapl_sse41-getopt1.$(OBJEXT) \
 	gsnapl_sse41-gsnap.$(OBJEXT)
-dist_gsnapl_sse41_OBJECTS = $(am__objects_31)
+dist_gsnapl_sse41_OBJECTS = $(am__objects_36)
 gsnapl_sse41_OBJECTS = $(dist_gsnapl_sse41_OBJECTS)
 gsnapl_sse41_DEPENDENCIES = $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 gsnapl_sse41_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_sse41_CFLAGS) \
 	$(CFLAGS) $(gsnapl_sse41_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_32 = gsnapl_sse42-except.$(OBJEXT) \
+am__objects_37 = gsnapl_sse42-except.$(OBJEXT) \
 	gsnapl_sse42-assert.$(OBJEXT) gsnapl_sse42-mem.$(OBJEXT) \
 	gsnapl_sse42-intlist.$(OBJEXT) gsnapl_sse42-list.$(OBJEXT) \
 	gsnapl_sse42-littleendian.$(OBJEXT) \
@@ -1768,12 +2158,12 @@ am__objects_32 = gsnapl_sse42-except.$(OBJEXT) \
 	gsnapl_sse42-genome_sites.$(OBJEXT) \
 	gsnapl_sse42-bitpack64-read.$(OBJEXT) \
 	gsnapl_sse42-bitpack64-readtwo.$(OBJEXT) \
-	gsnapl_sse42-indexdb.$(OBJEXT) \
+	gsnapl_sse42-merge.$(OBJEXT) gsnapl_sse42-indexdb.$(OBJEXT) \
 	gsnapl_sse42-indexdb_hr.$(OBJEXT) gsnapl_sse42-oligo.$(OBJEXT) \
 	gsnapl_sse42-chrom.$(OBJEXT) gsnapl_sse42-segmentpos.$(OBJEXT) \
 	gsnapl_sse42-chrnum.$(OBJEXT) gsnapl_sse42-maxent_hr.$(OBJEXT) \
-	gsnapl_sse42-samprint.$(OBJEXT) gsnapl_sse42-mapq.$(OBJEXT) \
-	gsnapl_sse42-shortread.$(OBJEXT) \
+	gsnapl_sse42-cigar.$(OBJEXT) gsnapl_sse42-samprint.$(OBJEXT) \
+	gsnapl_sse42-mapq.$(OBJEXT) gsnapl_sse42-shortread.$(OBJEXT) \
 	gsnapl_sse42-substring.$(OBJEXT) \
 	gsnapl_sse42-junction.$(OBJEXT) \
 	gsnapl_sse42-stage3hr.$(OBJEXT) \
@@ -1802,6 +2192,7 @@ am__objects_32 = gsnapl_sse42-except.$(OBJEXT) \
 	gsnapl_sse42-splicetrie.$(OBJEXT) \
 	gsnapl_sse42-splice.$(OBJEXT) gsnapl_sse42-indel.$(OBJEXT) \
 	gsnapl_sse42-bitpack64-access.$(OBJEXT) \
+	gsnapl_sse42-merge-heap.$(OBJEXT) \
 	gsnapl_sse42-stage1hr.$(OBJEXT) gsnapl_sse42-request.$(OBJEXT) \
 	gsnapl_sse42-resulthr.$(OBJEXT) gsnapl_sse42-output.$(OBJEXT) \
 	gsnapl_sse42-inbuffer.$(OBJEXT) \
@@ -1811,14 +2202,14 @@ am__objects_32 = gsnapl_sse42-except.$(OBJEXT) \
 	gsnapl_sse42-parserange.$(OBJEXT) \
 	gsnapl_sse42-getopt.$(OBJEXT) gsnapl_sse42-getopt1.$(OBJEXT) \
 	gsnapl_sse42-gsnap.$(OBJEXT)
-dist_gsnapl_sse42_OBJECTS = $(am__objects_32)
+dist_gsnapl_sse42_OBJECTS = $(am__objects_37)
 gsnapl_sse42_OBJECTS = $(dist_gsnapl_sse42_OBJECTS)
 gsnapl_sse42_DEPENDENCIES = $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 gsnapl_sse42_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_sse42_CFLAGS) \
 	$(CFLAGS) $(gsnapl_sse42_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_33 = gsnapl_ssse3-except.$(OBJEXT) \
+am__objects_38 = gsnapl_ssse3-except.$(OBJEXT) \
 	gsnapl_ssse3-assert.$(OBJEXT) gsnapl_ssse3-mem.$(OBJEXT) \
 	gsnapl_ssse3-intlist.$(OBJEXT) gsnapl_ssse3-list.$(OBJEXT) \
 	gsnapl_ssse3-littleendian.$(OBJEXT) \
@@ -1841,12 +2232,12 @@ am__objects_33 = gsnapl_ssse3-except.$(OBJEXT) \
 	gsnapl_ssse3-genome_sites.$(OBJEXT) \
 	gsnapl_ssse3-bitpack64-read.$(OBJEXT) \
 	gsnapl_ssse3-bitpack64-readtwo.$(OBJEXT) \
-	gsnapl_ssse3-indexdb.$(OBJEXT) \
+	gsnapl_ssse3-merge.$(OBJEXT) gsnapl_ssse3-indexdb.$(OBJEXT) \
 	gsnapl_ssse3-indexdb_hr.$(OBJEXT) gsnapl_ssse3-oligo.$(OBJEXT) \
 	gsnapl_ssse3-chrom.$(OBJEXT) gsnapl_ssse3-segmentpos.$(OBJEXT) \
 	gsnapl_ssse3-chrnum.$(OBJEXT) gsnapl_ssse3-maxent_hr.$(OBJEXT) \
-	gsnapl_ssse3-samprint.$(OBJEXT) gsnapl_ssse3-mapq.$(OBJEXT) \
-	gsnapl_ssse3-shortread.$(OBJEXT) \
+	gsnapl_ssse3-cigar.$(OBJEXT) gsnapl_ssse3-samprint.$(OBJEXT) \
+	gsnapl_ssse3-mapq.$(OBJEXT) gsnapl_ssse3-shortread.$(OBJEXT) \
 	gsnapl_ssse3-substring.$(OBJEXT) \
 	gsnapl_ssse3-junction.$(OBJEXT) \
 	gsnapl_ssse3-stage3hr.$(OBJEXT) \
@@ -1875,6 +2266,7 @@ am__objects_33 = gsnapl_ssse3-except.$(OBJEXT) \
 	gsnapl_ssse3-splicetrie.$(OBJEXT) \
 	gsnapl_ssse3-splice.$(OBJEXT) gsnapl_ssse3-indel.$(OBJEXT) \
 	gsnapl_ssse3-bitpack64-access.$(OBJEXT) \
+	gsnapl_ssse3-merge-heap.$(OBJEXT) \
 	gsnapl_ssse3-stage1hr.$(OBJEXT) gsnapl_ssse3-request.$(OBJEXT) \
 	gsnapl_ssse3-resulthr.$(OBJEXT) gsnapl_ssse3-output.$(OBJEXT) \
 	gsnapl_ssse3-inbuffer.$(OBJEXT) \
@@ -1884,14 +2276,14 @@ am__objects_33 = gsnapl_ssse3-except.$(OBJEXT) \
 	gsnapl_ssse3-parserange.$(OBJEXT) \
 	gsnapl_ssse3-getopt.$(OBJEXT) gsnapl_ssse3-getopt1.$(OBJEXT) \
 	gsnapl_ssse3-gsnap.$(OBJEXT)
-dist_gsnapl_ssse3_OBJECTS = $(am__objects_33)
+dist_gsnapl_ssse3_OBJECTS = $(am__objects_38)
 gsnapl_ssse3_OBJECTS = $(dist_gsnapl_ssse3_OBJECTS)
 gsnapl_ssse3_DEPENDENCIES = $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1)
 gsnapl_ssse3_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_ssse3_CFLAGS) \
 	$(CFLAGS) $(gsnapl_ssse3_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_34 = iit_dump-except.$(OBJEXT) iit_dump-assert.$(OBJEXT) \
+am__objects_39 = iit_dump-except.$(OBJEXT) iit_dump-assert.$(OBJEXT) \
 	iit_dump-mem.$(OBJEXT) iit_dump-littleendian.$(OBJEXT) \
 	iit_dump-bigendian.$(OBJEXT) iit_dump-intlist.$(OBJEXT) \
 	iit_dump-list.$(OBJEXT) iit_dump-univinterval.$(OBJEXT) \
@@ -1901,13 +2293,13 @@ am__objects_34 = iit_dump-except.$(OBJEXT) iit_dump-assert.$(OBJEXT) \
 	iit_dump-iit-read-univ.$(OBJEXT) iit_dump-iit-read.$(OBJEXT) \
 	iit_dump-parserange.$(OBJEXT) iit_dump-getopt.$(OBJEXT) \
 	iit_dump-getopt1.$(OBJEXT) iit_dump-iit_dump.$(OBJEXT)
-dist_iit_dump_OBJECTS = $(am__objects_34)
+dist_iit_dump_OBJECTS = $(am__objects_39)
 iit_dump_OBJECTS = $(dist_iit_dump_OBJECTS)
 iit_dump_DEPENDENCIES = $(am__DEPENDENCIES_1)
 iit_dump_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(iit_dump_CFLAGS) \
 	$(CFLAGS) $(iit_dump_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_35 = iit_get-except.$(OBJEXT) iit_get-assert.$(OBJEXT) \
+am__objects_40 = iit_get-except.$(OBJEXT) iit_get-assert.$(OBJEXT) \
 	iit_get-mem.$(OBJEXT) iit_get-intlist.$(OBJEXT) \
 	iit_get-list.$(OBJEXT) iit_get-littleendian.$(OBJEXT) \
 	iit_get-bigendian.$(OBJEXT) iit_get-univinterval.$(OBJEXT) \
@@ -1917,13 +2309,13 @@ am__objects_35 = iit_get-except.$(OBJEXT) iit_get-assert.$(OBJEXT) \
 	iit_get-iit-read-univ.$(OBJEXT) iit_get-iit-read.$(OBJEXT) \
 	iit_get-parserange.$(OBJEXT) iit_get-getopt.$(OBJEXT) \
 	iit_get-getopt1.$(OBJEXT) iit_get-iit_get.$(OBJEXT)
-dist_iit_get_OBJECTS = $(am__objects_35)
+dist_iit_get_OBJECTS = $(am__objects_40)
 iit_get_OBJECTS = $(dist_iit_get_OBJECTS)
 iit_get_DEPENDENCIES = $(am__DEPENDENCIES_1)
 iit_get_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(iit_get_CFLAGS) \
 	$(CFLAGS) $(iit_get_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_36 = iit_store-except.$(OBJEXT) iit_store-assert.$(OBJEXT) \
+am__objects_41 = iit_store-except.$(OBJEXT) iit_store-assert.$(OBJEXT) \
 	iit_store-mem.$(OBJEXT) iit_store-intlist.$(OBJEXT) \
 	iit_store-list.$(OBJEXT) iit_store-littleendian.$(OBJEXT) \
 	iit_store-bigendian.$(OBJEXT) iit_store-univinterval.$(OBJEXT) \
@@ -1935,13 +2327,13 @@ am__objects_36 = iit_store-except.$(OBJEXT) iit_store-assert.$(OBJEXT) \
 	iit_store-table.$(OBJEXT) iit_store-chrom.$(OBJEXT) \
 	iit_store-getopt.$(OBJEXT) iit_store-getopt1.$(OBJEXT) \
 	iit_store-iit_store.$(OBJEXT)
-dist_iit_store_OBJECTS = $(am__objects_36)
+dist_iit_store_OBJECTS = $(am__objects_41)
 iit_store_OBJECTS = $(dist_iit_store_OBJECTS)
 iit_store_DEPENDENCIES = $(am__DEPENDENCIES_1)
 iit_store_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(iit_store_CFLAGS) \
 	$(CFLAGS) $(iit_store_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_37 = sam_sort-except.$(OBJEXT) sam_sort-assert.$(OBJEXT) \
+am__objects_42 = sam_sort-except.$(OBJEXT) sam_sort-assert.$(OBJEXT) \
 	sam_sort-mem.$(OBJEXT) sam_sort-littleendian.$(OBJEXT) \
 	sam_sort-bigendian.$(OBJEXT) sam_sort-intlist.$(OBJEXT) \
 	sam_sort-list.$(OBJEXT) sam_sort-stopwatch.$(OBJEXT) \
@@ -1953,13 +2345,13 @@ am__objects_37 = sam_sort-except.$(OBJEXT) sam_sort-assert.$(OBJEXT) \
 	sam_sort-samread.$(OBJEXT) sam_sort-datadir.$(OBJEXT) \
 	sam_sort-getopt.$(OBJEXT) sam_sort-getopt1.$(OBJEXT) \
 	sam_sort-sam_sort.$(OBJEXT)
-dist_sam_sort_OBJECTS = $(am__objects_37)
+dist_sam_sort_OBJECTS = $(am__objects_42)
 sam_sort_OBJECTS = $(dist_sam_sort_OBJECTS)
 sam_sort_DEPENDENCIES = $(am__DEPENDENCIES_1)
 sam_sort_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(sam_sort_CFLAGS) \
 	$(CFLAGS) $(sam_sort_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_38 = snpindex-except.$(OBJEXT) snpindex-assert.$(OBJEXT) \
+am__objects_43 = snpindex-except.$(OBJEXT) snpindex-assert.$(OBJEXT) \
 	snpindex-mem.$(OBJEXT) snpindex-intlist.$(OBJEXT) \
 	snpindex-list.$(OBJEXT) snpindex-littleendian.$(OBJEXT) \
 	snpindex-bigendian.$(OBJEXT) snpindex-univinterval.$(OBJEXT) \
@@ -1981,14 +2373,14 @@ am__objects_38 = snpindex-except.$(OBJEXT) snpindex-assert.$(OBJEXT) \
 	snpindex-datadir.$(OBJEXT) snpindex-parserange.$(OBJEXT) \
 	snpindex-getopt.$(OBJEXT) snpindex-getopt1.$(OBJEXT) \
 	snpindex-snpindex.$(OBJEXT)
-dist_snpindex_OBJECTS = $(am__objects_38)
+dist_snpindex_OBJECTS = $(am__objects_43)
 snpindex_OBJECTS = $(dist_snpindex_OBJECTS)
 snpindex_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 snpindex_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(snpindex_CFLAGS) \
 	$(CFLAGS) $(snpindex_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_39 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
+am__objects_44 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
 	uniqscan-mem.$(OBJEXT) uniqscan-intlist.$(OBJEXT) \
 	uniqscan-list.$(OBJEXT) uniqscan-littleendian.$(OBJEXT) \
 	uniqscan-bigendian.$(OBJEXT) uniqscan-univinterval.$(OBJEXT) \
@@ -2003,7 +2395,7 @@ am__objects_39 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
 	uniqscan-genome128_hr.$(OBJEXT) \
 	uniqscan-genome_sites.$(OBJEXT) \
 	uniqscan-bitpack64-read.$(OBJEXT) \
-	uniqscan-bitpack64-readtwo.$(OBJEXT) \
+	uniqscan-bitpack64-readtwo.$(OBJEXT) uniqscan-merge.$(OBJEXT) \
 	uniqscan-indexdb.$(OBJEXT) uniqscan-indexdb_hr.$(OBJEXT) \
 	uniqscan-oligo.$(OBJEXT) uniqscan-chrom.$(OBJEXT) \
 	uniqscan-segmentpos.$(OBJEXT) uniqscan-chrnum.$(OBJEXT) \
@@ -2011,14 +2403,15 @@ am__objects_39 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
 	uniqscan-shortread.$(OBJEXT) uniqscan-substring.$(OBJEXT) \
 	uniqscan-junction.$(OBJEXT) uniqscan-stage3hr.$(OBJEXT) \
 	uniqscan-spanningelt.$(OBJEXT) uniqscan-cmet.$(OBJEXT) \
-	uniqscan-atoi.$(OBJEXT) uniqscan-maxent.$(OBJEXT) \
-	uniqscan-pair.$(OBJEXT) uniqscan-pairpool.$(OBJEXT) \
-	uniqscan-diag.$(OBJEXT) uniqscan-diagpool.$(OBJEXT) \
-	uniqscan-orderstat.$(OBJEXT) uniqscan-oligoindex_hr.$(OBJEXT) \
-	uniqscan-cellpool.$(OBJEXT) uniqscan-stage2.$(OBJEXT) \
-	uniqscan-intron.$(OBJEXT) uniqscan-boyer-moore.$(OBJEXT) \
-	uniqscan-changepoint.$(OBJEXT) uniqscan-pbinom.$(OBJEXT) \
-	uniqscan-dynprog.$(OBJEXT) uniqscan-dynprog_simd.$(OBJEXT) \
+	uniqscan-atoi.$(OBJEXT) uniqscan-cigar.$(OBJEXT) \
+	uniqscan-maxent.$(OBJEXT) uniqscan-pair.$(OBJEXT) \
+	uniqscan-pairpool.$(OBJEXT) uniqscan-diag.$(OBJEXT) \
+	uniqscan-diagpool.$(OBJEXT) uniqscan-orderstat.$(OBJEXT) \
+	uniqscan-oligoindex_hr.$(OBJEXT) uniqscan-cellpool.$(OBJEXT) \
+	uniqscan-stage2.$(OBJEXT) uniqscan-intron.$(OBJEXT) \
+	uniqscan-boyer-moore.$(OBJEXT) uniqscan-changepoint.$(OBJEXT) \
+	uniqscan-pbinom.$(OBJEXT) uniqscan-dynprog.$(OBJEXT) \
+	uniqscan-dynprog_simd.$(OBJEXT) \
 	uniqscan-dynprog_single.$(OBJEXT) \
 	uniqscan-dynprog_genome.$(OBJEXT) \
 	uniqscan-dynprog_cdna.$(OBJEXT) uniqscan-dynprog_end.$(OBJEXT) \
@@ -2030,18 +2423,19 @@ am__objects_39 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \
 	uniqscan-indel.$(OBJEXT) uniqscan-bitpack64-access.$(OBJEXT) \
 	uniqscan-bytecoding.$(OBJEXT) uniqscan-univdiag.$(OBJEXT) \
 	uniqscan-sedgesort.$(OBJEXT) uniqscan-sarray-read.$(OBJEXT) \
+	uniqscan-sarray-search.$(OBJEXT) uniqscan-merge-heap.$(OBJEXT) \
 	uniqscan-stage1hr.$(OBJEXT) uniqscan-resulthr.$(OBJEXT) \
 	uniqscan-datadir.$(OBJEXT) uniqscan-parserange.$(OBJEXT) \
 	uniqscan-getopt.$(OBJEXT) uniqscan-getopt1.$(OBJEXT) \
 	uniqscan-uniqscan.$(OBJEXT)
-dist_uniqscan_OBJECTS = $(am__objects_39)
+dist_uniqscan_OBJECTS = $(am__objects_44)
 uniqscan_OBJECTS = $(dist_uniqscan_OBJECTS)
 uniqscan_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
 uniqscan_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(uniqscan_CFLAGS) \
 	$(CFLAGS) $(uniqscan_LDFLAGS) $(LDFLAGS) -o $@
-am__objects_40 = uniqscanl-except.$(OBJEXT) uniqscanl-assert.$(OBJEXT) \
+am__objects_45 = uniqscanl-except.$(OBJEXT) uniqscanl-assert.$(OBJEXT) \
 	uniqscanl-mem.$(OBJEXT) uniqscanl-intlist.$(OBJEXT) \
 	uniqscanl-list.$(OBJEXT) uniqscanl-littleendian.$(OBJEXT) \
 	uniqscanl-bigendian.$(OBJEXT) uniqscanl-univinterval.$(OBJEXT) \
@@ -2058,14 +2452,15 @@ am__objects_40 = uniqscanl-except.$(OBJEXT) uniqscanl-assert.$(OBJEXT) \
 	uniqscanl-genome_sites.$(OBJEXT) \
 	uniqscanl-bitpack64-read.$(OBJEXT) \
 	uniqscanl-bitpack64-readtwo.$(OBJEXT) \
-	uniqscanl-indexdb.$(OBJEXT) uniqscanl-indexdb_hr.$(OBJEXT) \
-	uniqscanl-oligo.$(OBJEXT) uniqscanl-chrom.$(OBJEXT) \
-	uniqscanl-segmentpos.$(OBJEXT) uniqscanl-chrnum.$(OBJEXT) \
-	uniqscanl-maxent_hr.$(OBJEXT) uniqscanl-mapq.$(OBJEXT) \
-	uniqscanl-shortread.$(OBJEXT) uniqscanl-substring.$(OBJEXT) \
-	uniqscanl-junction.$(OBJEXT) uniqscanl-stage3hr.$(OBJEXT) \
-	uniqscanl-spanningelt.$(OBJEXT) uniqscanl-cmet.$(OBJEXT) \
-	uniqscanl-atoi.$(OBJEXT) uniqscanl-maxent.$(OBJEXT) \
+	uniqscanl-merge.$(OBJEXT) uniqscanl-indexdb.$(OBJEXT) \
+	uniqscanl-indexdb_hr.$(OBJEXT) uniqscanl-oligo.$(OBJEXT) \
+	uniqscanl-chrom.$(OBJEXT) uniqscanl-segmentpos.$(OBJEXT) \
+	uniqscanl-chrnum.$(OBJEXT) uniqscanl-maxent_hr.$(OBJEXT) \
+	uniqscanl-mapq.$(OBJEXT) uniqscanl-shortread.$(OBJEXT) \
+	uniqscanl-substring.$(OBJEXT) uniqscanl-junction.$(OBJEXT) \
+	uniqscanl-stage3hr.$(OBJEXT) uniqscanl-spanningelt.$(OBJEXT) \
+	uniqscanl-cmet.$(OBJEXT) uniqscanl-atoi.$(OBJEXT) \
+	uniqscanl-cigar.$(OBJEXT) uniqscanl-maxent.$(OBJEXT) \
 	uniqscanl-pair.$(OBJEXT) uniqscanl-pairpool.$(OBJEXT) \
 	uniqscanl-diag.$(OBJEXT) uniqscanl-diagpool.$(OBJEXT) \
 	uniqscanl-orderstat.$(OBJEXT) \
@@ -2085,11 +2480,11 @@ am__objects_40 = uniqscanl-except.$(OBJEXT) uniqscanl-assert.$(OBJEXT) \
 	uniqscanl-splicetrie_build.$(OBJEXT) \
 	uniqscanl-splicetrie.$(OBJEXT) uniqscanl-splice.$(OBJEXT) \
 	uniqscanl-indel.$(OBJEXT) uniqscanl-bitpack64-access.$(OBJEXT) \
-	uniqscanl-stage1hr.$(OBJEXT) uniqscanl-resulthr.$(OBJEXT) \
-	uniqscanl-datadir.$(OBJEXT) uniqscanl-parserange.$(OBJEXT) \
-	uniqscanl-getopt.$(OBJEXT) uniqscanl-getopt1.$(OBJEXT) \
-	uniqscanl-uniqscan.$(OBJEXT)
-dist_uniqscanl_OBJECTS = $(am__objects_40)
+	uniqscanl-merge-heap.$(OBJEXT) uniqscanl-stage1hr.$(OBJEXT) \
+	uniqscanl-resulthr.$(OBJEXT) uniqscanl-datadir.$(OBJEXT) \
+	uniqscanl-parserange.$(OBJEXT) uniqscanl-getopt.$(OBJEXT) \
+	uniqscanl-getopt1.$(OBJEXT) uniqscanl-uniqscan.$(OBJEXT)
+dist_uniqscanl_OBJECTS = $(am__objects_45)
 uniqscanl_OBJECTS = $(dist_uniqscanl_OBJECTS)
 uniqscanl_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 	$(am__DEPENDENCIES_1)
@@ -2130,51 +2525,58 @@ AM_V_CCLD = $(am__v_CCLD_ at AM_V@)
 am__v_CCLD_ = $(am__v_CCLD_ at AM_DEFAULT_V@)
 am__v_CCLD_0 = @echo "  CCLD    " $@;
 am__v_CCLD_1 = 
-SOURCES = $(dist_atoiindex_SOURCES) $(dist_cmetindex_SOURCES) \
-	$(dist_cpuid_SOURCES) $(dist_get_genome_SOURCES) \
-	$(dist_gmap_SOURCES) $(dist_gmap_avx2_SOURCES) \
+SOURCES = $(dist_libgmap_la_SOURCES) $(dist_atoiindex_SOURCES) \
+	$(dist_cmetindex_SOURCES) $(dist_cpuid_SOURCES) \
+	$(dist_get_genome_SOURCES) $(dist_gmap_SOURCES) \
+	$(dist_gmap_avx2_SOURCES) $(dist_gmap_avx512_SOURCES) \
 	$(dist_gmap_nosimd_SOURCES) $(dist_gmap_sse2_SOURCES) \
 	$(dist_gmap_sse41_SOURCES) $(dist_gmap_sse42_SOURCES) \
 	$(dist_gmap_ssse3_SOURCES) $(dist_gmapindex_SOURCES) \
 	$(dist_gmapl_SOURCES) $(dist_gmapl_avx2_SOURCES) \
-	$(dist_gmapl_nosimd_SOURCES) $(dist_gmapl_sse2_SOURCES) \
-	$(dist_gmapl_sse41_SOURCES) $(dist_gmapl_sse42_SOURCES) \
-	$(dist_gmapl_ssse3_SOURCES) $(dist_gsnap_SOURCES) \
-	$(dist_gsnap_avx2_SOURCES) $(dist_gsnap_nosimd_SOURCES) \
+	$(dist_gmapl_avx512_SOURCES) $(dist_gmapl_nosimd_SOURCES) \
+	$(dist_gmapl_sse2_SOURCES) $(dist_gmapl_sse41_SOURCES) \
+	$(dist_gmapl_sse42_SOURCES) $(dist_gmapl_ssse3_SOURCES) \
+	$(dist_gsnap_SOURCES) $(dist_gsnap_avx2_SOURCES) \
+	$(dist_gsnap_avx512_SOURCES) $(dist_gsnap_nosimd_SOURCES) \
 	$(dist_gsnap_sse2_SOURCES) $(dist_gsnap_sse41_SOURCES) \
 	$(dist_gsnap_sse42_SOURCES) $(dist_gsnap_ssse3_SOURCES) \
 	$(dist_gsnapl_SOURCES) $(dist_gsnapl_avx2_SOURCES) \
-	$(dist_gsnapl_nosimd_SOURCES) $(dist_gsnapl_sse2_SOURCES) \
-	$(dist_gsnapl_sse41_SOURCES) $(dist_gsnapl_sse42_SOURCES) \
-	$(dist_gsnapl_ssse3_SOURCES) $(dist_iit_dump_SOURCES) \
-	$(dist_iit_get_SOURCES) $(dist_iit_store_SOURCES) \
-	$(dist_sam_sort_SOURCES) $(dist_snpindex_SOURCES) \
-	$(dist_uniqscan_SOURCES) $(dist_uniqscanl_SOURCES)
-DIST_SOURCES = $(dist_atoiindex_SOURCES) $(dist_cmetindex_SOURCES) \
-	$(dist_cpuid_SOURCES) $(dist_get_genome_SOURCES) \
-	$(dist_gmap_SOURCES) $(dist_gmap_avx2_SOURCES) \
+	$(dist_gsnapl_avx512_SOURCES) $(dist_gsnapl_nosimd_SOURCES) \
+	$(dist_gsnapl_sse2_SOURCES) $(dist_gsnapl_sse41_SOURCES) \
+	$(dist_gsnapl_sse42_SOURCES) $(dist_gsnapl_ssse3_SOURCES) \
+	$(dist_iit_dump_SOURCES) $(dist_iit_get_SOURCES) \
+	$(dist_iit_store_SOURCES) $(dist_sam_sort_SOURCES) \
+	$(dist_snpindex_SOURCES) $(dist_uniqscan_SOURCES) \
+	$(dist_uniqscanl_SOURCES)
+DIST_SOURCES = $(dist_libgmap_la_SOURCES) $(dist_atoiindex_SOURCES) \
+	$(dist_cmetindex_SOURCES) $(dist_cpuid_SOURCES) \
+	$(dist_get_genome_SOURCES) $(dist_gmap_SOURCES) \
+	$(dist_gmap_avx2_SOURCES) $(dist_gmap_avx512_SOURCES) \
 	$(dist_gmap_nosimd_SOURCES) $(dist_gmap_sse2_SOURCES) \
 	$(dist_gmap_sse41_SOURCES) $(dist_gmap_sse42_SOURCES) \
 	$(dist_gmap_ssse3_SOURCES) $(dist_gmapindex_SOURCES) \
 	$(dist_gmapl_SOURCES) $(dist_gmapl_avx2_SOURCES) \
-	$(dist_gmapl_nosimd_SOURCES) $(dist_gmapl_sse2_SOURCES) \
-	$(dist_gmapl_sse41_SOURCES) $(dist_gmapl_sse42_SOURCES) \
-	$(dist_gmapl_ssse3_SOURCES) $(dist_gsnap_SOURCES) \
-	$(dist_gsnap_avx2_SOURCES) $(dist_gsnap_nosimd_SOURCES) \
+	$(dist_gmapl_avx512_SOURCES) $(dist_gmapl_nosimd_SOURCES) \
+	$(dist_gmapl_sse2_SOURCES) $(dist_gmapl_sse41_SOURCES) \
+	$(dist_gmapl_sse42_SOURCES) $(dist_gmapl_ssse3_SOURCES) \
+	$(dist_gsnap_SOURCES) $(dist_gsnap_avx2_SOURCES) \
+	$(dist_gsnap_avx512_SOURCES) $(dist_gsnap_nosimd_SOURCES) \
 	$(dist_gsnap_sse2_SOURCES) $(dist_gsnap_sse41_SOURCES) \
 	$(dist_gsnap_sse42_SOURCES) $(dist_gsnap_ssse3_SOURCES) \
 	$(dist_gsnapl_SOURCES) $(dist_gsnapl_avx2_SOURCES) \
-	$(dist_gsnapl_nosimd_SOURCES) $(dist_gsnapl_sse2_SOURCES) \
-	$(dist_gsnapl_sse41_SOURCES) $(dist_gsnapl_sse42_SOURCES) \
-	$(dist_gsnapl_ssse3_SOURCES) $(dist_iit_dump_SOURCES) \
-	$(dist_iit_get_SOURCES) $(dist_iit_store_SOURCES) \
-	$(dist_sam_sort_SOURCES) $(dist_snpindex_SOURCES) \
-	$(dist_uniqscan_SOURCES) $(dist_uniqscanl_SOURCES)
+	$(dist_gsnapl_avx512_SOURCES) $(dist_gsnapl_nosimd_SOURCES) \
+	$(dist_gsnapl_sse2_SOURCES) $(dist_gsnapl_sse41_SOURCES) \
+	$(dist_gsnapl_sse42_SOURCES) $(dist_gsnapl_ssse3_SOURCES) \
+	$(dist_iit_dump_SOURCES) $(dist_iit_get_SOURCES) \
+	$(dist_iit_store_SOURCES) $(dist_sam_sort_SOURCES) \
+	$(dist_snpindex_SOURCES) $(dist_uniqscan_SOURCES) \
+	$(dist_uniqscanl_SOURCES)
 am__can_run_installinfo = \
   case $$AM_UPDATE_INFO_DIR in \
     n|no|NO) false;; \
     *) (install-info --version) >/dev/null 2>&1;; \
   esac
+HEADERS = $(include_HEADERS)
 am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \
 	$(LISP)config.h.in
 # Read a list of newline-separated strings from the standard input,
@@ -2235,6 +2637,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
+LIBGMAP_SO_VERSION = @LIBGMAP_SO_VERSION@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
@@ -2247,9 +2650,6 @@ MAKEINFO = @MAKEINFO@
 MANIFEST_TOOL = @MANIFEST_TOOL@
 MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@
 MKDIR_P = @MKDIR_P@
-MPICC = @MPICC@
-MPILIBS = @MPILIBS@
-MPI_CFLAGS = @MPI_CFLAGS@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -2274,6 +2674,7 @@ SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIMD_AVX2_CFLAGS = @SIMD_AVX2_CFLAGS@
+SIMD_AVX512_CFLAGS = @SIMD_AVX512_CFLAGS@
 SIMD_SSE2_CFLAGS = @SIMD_SSE2_CFLAGS@
 SIMD_SSE4_1_CFLAGS = @SIMD_SSE4_1_CFLAGS@
 SIMD_SSE4_2_CFLAGS = @SIMD_SSE4_2_CFLAGS@
@@ -2344,6 +2745,58 @@ CLEANFILES = fa_coords gmap_process
 
 # Include master.c and master.h, so they get included in distribution
 EXTRA_DIST = mpidebug.c mpidebug.h master.c master.h
+
+# -DUTILITYP=1 needed for iit_store, iit_dump, iit_get, gmapindex, and
+# get-genome so they can handle both small and large genomes at run
+# time
+
+#lib_LTLIBRARIES = libgmap- at LIBGMAP_API_VERSION@.la
+lib_LTLIBRARIES = libgmap.la
+include_HEADERS = fopen.h bool.h types.h separator.h comp.h \
+ except.h assert.h mem.h \
+ intlistdef.h intlist.h listdef.h list.h \
+ doublelist.h \
+ littleendian.h bigendian.h \
+ interval.h uintlist.h uint8list.h \
+ iitdef.h iit-read.h iit-write.h parserange.h \
+ univinterval.h iit-read-univ.h \
+ table.h tableuint.h uinttable.h \
+ stopwatch.h semaphore.h access.h \
+ chrom.h filestring.h \
+ md5.h complement.h bzip2.h sequence.h \
+ genomicpos.h \
+ bitpack64-read.h bitpack64-readtwo.h \
+ maxent_hr.h \
+ popcount.h genome128_hr.h \
+ compress.h bytecoding.h sarray-read.h \
+ mode.h chrnum.h genome.h samflags.h \
+ datadir.h
+
+LIBGMAP_LA_FILES = fopen.h bool.h types.h separator.h comp.h \
+ except.c except.h assert.c assert.h mem.c mem.h \
+ intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
+ doublelist.c doublelist.h \
+ littleendian.c littleendian.h bigendian.c bigendian.h \
+ interval.c interval.h uintlist.c uintlist.h uint8list.c uint8list.h \
+ iit-read.c iit-read.h iit-write.c iit-write.h parserange.c parserange.h \
+ univinterval.c univinterval.h iit-read-univ.c iit-read-univ.h \
+ stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
+ table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h \
+ chrom.c chrom.h filestring.c filestring.h \
+ md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
+ genomicpos.c genomicpos.h \
+ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ maxent_hr.c maxent_hr.h \
+ popcount.c popcount.h genome128_hr.c genome128_hr.h \
+ compress.c compress.h bytecoding.c bytecoding.h sarray-read.c sarray-read.h \
+ mode.h chrnum.c chrnum.h genome.c genome.h \
+ datadir.c datadir.h
+
+libgmap_CC = $(PTHREAD_CC)
+libgmap_la_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) $(POPCNT_CFLAGS) -fPIC -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\"
+libgmap_la_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) -version-info $(LIBGMAP_SO_VERSION)
+libgmap_la_LIBADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_libgmap_la_SOURCES = $(LIBGMAP_LA_FILES)
 CPUID_FILES = bool.h cpuid.c cpuid.h
 cpuid_CC = $(CC)
 cpuid_CFLAGS = $(AM_CFLAGS) -DMAIN=1
@@ -2388,6 +2841,7 @@ GMAP_FILES = fopen.h bool.h types.h separator.h comp.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  genome-write.c genome-write.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h block.c block.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
@@ -2443,6 +2897,11 @@ gmap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPD
 gmap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
 gmap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
 dist_gmap_avx2_SOURCES = $(GMAP_FILES)
+gmap_avx512_CC = $(PTHREAD_CC)
+gmap_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS)
+gmap_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gmap_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_gmap_avx512_SOURCES = $(GMAP_FILES)
 GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
  except.c except.h assert.c assert.h mem.c mem.h \
  intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
@@ -2457,6 +2916,7 @@ GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  genome-write.c genome-write.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h block.c block.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
@@ -2512,6 +2972,11 @@ gmapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAP
 gmapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
 gmapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
 dist_gmapl_avx2_SOURCES = $(GMAPL_FILES)
+gmapl_avx512_CC = $(PTHREAD_CC)
+gmapl_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS)
+gmapl_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gmapl_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_gmapl_avx512_SOURCES = $(GMAPL_FILES)
 GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
  except.c except.h assert.c assert.h mem.c mem.h \
  intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
@@ -2526,11 +2991,12 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
  genome.c genome.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
  chrnum.c chrnum.h \
- maxent_hr.c maxent_hr.h samflags.h samprint.c samprint.h \
+ maxent_hr.c maxent_hr.h cigar.c cigar.h samflags.h samprint.c samprint.h \
  mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
  spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
  comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
@@ -2545,7 +3011,8 @@ GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \
  splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
  bytecoding.c bytecoding.h \
  univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \
- stage1hr.c stage1hr.h \
+ sarray-search.c sarray-search.h \
+ merge-heap.c merge-heap.h stage1hr.c stage1hr.h \
  request.c request.h resulthr.c resulthr.h output.c output.h \
  inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
  datadir.c datadir.h mode.h parserange.c parserange.h \
@@ -2584,6 +3051,11 @@ gsnap_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAP
 gsnap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
 gsnap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
 dist_gsnap_avx2_SOURCES = $(GSNAP_FILES)
+gsnap_avx512_CC = $(PTHREAD_CC)
+gsnap_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS)
+gsnap_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gsnap_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_gsnap_avx512_SOURCES = $(GSNAP_FILES)
 GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
  except.c except.h assert.c assert.h mem.c mem.h \
  intlistdef.h intlist.c intlist.h listdef.h list.c list.h \
@@ -2598,11 +3070,12 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
  genome.c genome.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
  chrnum.c chrnum.h \
- maxent_hr.c maxent_hr.h samflags.h samprint.c samprint.h \
+ maxent_hr.c maxent_hr.h cigar.c cigar.h samflags.h samprint.c samprint.h \
  mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
  spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
  comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
@@ -2615,7 +3088,7 @@ GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \
  chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
  splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
  splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
- stage1hr.c stage1hr.h \
+ merge-heap.c merge-heap.h stage1hr.c stage1hr.h \
  request.c request.h resulthr.c resulthr.h output.c output.h \
  inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \
  datadir.c datadir.h mode.h parserange.c parserange.h \
@@ -2653,6 +3126,11 @@ gsnapl_avx2_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMA
 gsnapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
 gsnapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
 dist_gsnapl_avx2_SOURCES = $(GSNAPL_FILES)
+gsnapl_avx512_CC = $(PTHREAD_CC)
+gsnapl_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS)
+gsnapl_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG)
+gsnapl_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS)
+dist_gsnapl_avx512_SOURCES = $(GSNAPL_FILES)
 
 # Build as a non-SIMD program
 UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
@@ -2668,6 +3146,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
  genome.c genome.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
@@ -2675,6 +3154,7 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
  maxent_hr.c maxent_hr.h \
  mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
  spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
+ cigar.c cigar.h \
  comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
  orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
  intron.c intron.h boyer-moore.c boyer-moore.h changepoint.c changepoint.h pbinom.c pbinom.h \
@@ -2687,7 +3167,8 @@ UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \
  splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
  bytecoding.c bytecoding.h \
  univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \
- stage1hr.c stage1hr.h resulthr.c resulthr.h \
+ sarray-search.c sarray-search.h \
+ merge-heap.c merge-heap.h stage1hr.c stage1hr.h resulthr.c resulthr.h \
  datadir.c datadir.h mode.h parserange.c parserange.h \
  getopt.c getopt1.c getopt.h uniqscan.c
 
@@ -2709,6 +3190,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
  genome.c genome.h \
  popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \
  bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \
+ merge.c merge.h \
  indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \
  oligo.c oligo.h \
  chrom.c chrom.h segmentpos.c segmentpos.h \
@@ -2716,6 +3198,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
  maxent_hr.c maxent_hr.h \
  mapq.c mapq.h shortread.c shortread.h substring.c substring.h junction.c junction.h stage3hr.c stage3hr.h \
  spanningelt.c spanningelt.h cmet.c cmet.h atoi.c atoi.h \
+ cigar.c cigar.h \
  comp.h maxent.c maxent.h pairdef.h pair.c pair.h pairpool.c pairpool.h diag.c diag.h diagpool.c diagpool.h \
  orderstat.c orderstat.h oligoindex_hr.c oligoindex_hr.h cellpool.c cellpool.h stage2.c stage2.h \
  intron.c intron.h boyer-moore.c boyer-moore.h changepoint.c changepoint.h pbinom.c pbinom.h \
@@ -2726,7 +3209,7 @@ UNIQSCANL_FILES = fopen.h bool.h types.h separator.h comp.h \
  chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \
  splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \
  splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \
- stage1hr.c stage1hr.h resulthr.c resulthr.h \
+ merge-heap.c merge-heap.h stage1hr.c stage1hr.h resulthr.c resulthr.h \
  datadir.c datadir.h mode.h parserange.c parserange.h \
  getopt.c getopt1.c getopt.h uniqscan.c
 
@@ -2979,6 +3462,44 @@ $(srcdir)/config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
 
 distclean-hdr:
 	-rm -f config.h stamp-h1
+
+install-libLTLIBRARIES: $(lib_LTLIBRARIES)
+	@$(NORMAL_INSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	list2=; for p in $$list; do \
+	  if test -f $$p; then \
+	    list2="$$list2 $$p"; \
+	  else :; fi; \
+	done; \
+	test -z "$$list2" || { \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
+	}
+
+uninstall-libLTLIBRARIES:
+	@$(NORMAL_UNINSTALL)
+	@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
+	for p in $$list; do \
+	  $(am__strip_dir) \
+	  echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \
+	  $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \
+	done
+
+clean-libLTLIBRARIES:
+	-test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES)
+	@list='$(lib_LTLIBRARIES)'; \
+	locs=`for p in $$list; do echo $$p; done | \
+	      sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+	      sort -u`; \
+	test -z "$$locs" || { \
+	  echo rm -f $${locs}; \
+	  rm -f $${locs}; \
+	}
+
+libgmap.la: $(libgmap_la_OBJECTS) $(libgmap_la_DEPENDENCIES) $(EXTRA_libgmap_la_DEPENDENCIES) 
+	$(AM_V_CCLD)$(libgmap_la_LINK) -rpath $(libdir) $(libgmap_la_OBJECTS) $(libgmap_la_LIBADD) $(LIBS)
 install-binPROGRAMS: $(bin_PROGRAMS)
 	@$(NORMAL_INSTALL)
 	@list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
@@ -3053,6 +3574,10 @@ gmap.avx2$(EXEEXT): $(gmap_avx2_OBJECTS) $(gmap_avx2_DEPENDENCIES) $(EXTRA_gmap_
 	@rm -f gmap.avx2$(EXEEXT)
 	$(AM_V_CCLD)$(gmap_avx2_LINK) $(gmap_avx2_OBJECTS) $(gmap_avx2_LDADD) $(LIBS)
 
+gmap.avx512$(EXEEXT): $(gmap_avx512_OBJECTS) $(gmap_avx512_DEPENDENCIES) $(EXTRA_gmap_avx512_DEPENDENCIES) 
+	@rm -f gmap.avx512$(EXEEXT)
+	$(AM_V_CCLD)$(gmap_avx512_LINK) $(gmap_avx512_OBJECTS) $(gmap_avx512_LDADD) $(LIBS)
+
 gmap.nosimd$(EXEEXT): $(gmap_nosimd_OBJECTS) $(gmap_nosimd_DEPENDENCIES) $(EXTRA_gmap_nosimd_DEPENDENCIES) 
 	@rm -f gmap.nosimd$(EXEEXT)
 	$(AM_V_CCLD)$(gmap_nosimd_LINK) $(gmap_nosimd_OBJECTS) $(gmap_nosimd_LDADD) $(LIBS)
@@ -3085,6 +3610,10 @@ gmapl.avx2$(EXEEXT): $(gmapl_avx2_OBJECTS) $(gmapl_avx2_DEPENDENCIES) $(EXTRA_gm
 	@rm -f gmapl.avx2$(EXEEXT)
 	$(AM_V_CCLD)$(gmapl_avx2_LINK) $(gmapl_avx2_OBJECTS) $(gmapl_avx2_LDADD) $(LIBS)
 
+gmapl.avx512$(EXEEXT): $(gmapl_avx512_OBJECTS) $(gmapl_avx512_DEPENDENCIES) $(EXTRA_gmapl_avx512_DEPENDENCIES) 
+	@rm -f gmapl.avx512$(EXEEXT)
+	$(AM_V_CCLD)$(gmapl_avx512_LINK) $(gmapl_avx512_OBJECTS) $(gmapl_avx512_LDADD) $(LIBS)
+
 gmapl.nosimd$(EXEEXT): $(gmapl_nosimd_OBJECTS) $(gmapl_nosimd_DEPENDENCIES) $(EXTRA_gmapl_nosimd_DEPENDENCIES) 
 	@rm -f gmapl.nosimd$(EXEEXT)
 	$(AM_V_CCLD)$(gmapl_nosimd_LINK) $(gmapl_nosimd_OBJECTS) $(gmapl_nosimd_LDADD) $(LIBS)
@@ -3113,6 +3642,10 @@ gsnap.avx2$(EXEEXT): $(gsnap_avx2_OBJECTS) $(gsnap_avx2_DEPENDENCIES) $(EXTRA_gs
 	@rm -f gsnap.avx2$(EXEEXT)
 	$(AM_V_CCLD)$(gsnap_avx2_LINK) $(gsnap_avx2_OBJECTS) $(gsnap_avx2_LDADD) $(LIBS)
 
+gsnap.avx512$(EXEEXT): $(gsnap_avx512_OBJECTS) $(gsnap_avx512_DEPENDENCIES) $(EXTRA_gsnap_avx512_DEPENDENCIES) 
+	@rm -f gsnap.avx512$(EXEEXT)
+	$(AM_V_CCLD)$(gsnap_avx512_LINK) $(gsnap_avx512_OBJECTS) $(gsnap_avx512_LDADD) $(LIBS)
+
 gsnap.nosimd$(EXEEXT): $(gsnap_nosimd_OBJECTS) $(gsnap_nosimd_DEPENDENCIES) $(EXTRA_gsnap_nosimd_DEPENDENCIES) 
 	@rm -f gsnap.nosimd$(EXEEXT)
 	$(AM_V_CCLD)$(gsnap_nosimd_LINK) $(gsnap_nosimd_OBJECTS) $(gsnap_nosimd_LDADD) $(LIBS)
@@ -3141,6 +3674,10 @@ gsnapl.avx2$(EXEEXT): $(gsnapl_avx2_OBJECTS) $(gsnapl_avx2_DEPENDENCIES) $(EXTRA
 	@rm -f gsnapl.avx2$(EXEEXT)
 	$(AM_V_CCLD)$(gsnapl_avx2_LINK) $(gsnapl_avx2_OBJECTS) $(gsnapl_avx2_LDADD) $(LIBS)
 
+gsnapl.avx512$(EXEEXT): $(gsnapl_avx512_OBJECTS) $(gsnapl_avx512_DEPENDENCIES) $(EXTRA_gsnapl_avx512_DEPENDENCIES) 
+	@rm -f gsnapl.avx512$(EXEEXT)
+	$(AM_V_CCLD)$(gsnapl_avx512_LINK) $(gsnapl_avx512_OBJECTS) $(gsnapl_avx512_LDADD) $(LIBS)
+
 gsnapl.nosimd$(EXEEXT): $(gsnapl_nosimd_OBJECTS) $(gsnapl_nosimd_DEPENDENCIES) $(EXTRA_gsnapl_nosimd_DEPENDENCIES) 
 	@rm -f gsnapl.nosimd$(EXEEXT)
 	$(AM_V_CCLD)$(gsnapl_nosimd_LINK) $(gsnapl_nosimd_OBJECTS) $(gsnapl_nosimd_LDADD) $(LIBS)
@@ -3366,6 +3903,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-orderstat.Po at am__quote@
@@ -3395,6 +3933,92 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-uintlist.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-uinttable.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx2-univinterval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-access.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-assert.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-atoi.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-bigendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-bitpack64-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-bitpack64-readtwo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-block.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-boyer-moore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-bzip2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-cellpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-changepoint.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-chimera.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-chrnum.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-cmet.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-compress-write.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-compress.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-datadir.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-diag.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-diagnostic.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-diagpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-doublelist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-dynprog.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-dynprog_cdna.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-dynprog_end.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-dynprog_genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-dynprog_simd.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-dynprog_single.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-filestring.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-gbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-genome-write.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-genome128_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-genome_sites.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-genomicpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-getopt.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-getopt1.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-gmap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-gregion.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-iit-read-univ.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-iit-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-inbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-indexdb.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-indexdb_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-interval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-intlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-list.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-littleendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-match.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-matchpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-maxent.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-maxent_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-md5.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-merge.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-oligo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-oligoindex_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-orderstat.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-output.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-pair.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-pairpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-parserange.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-pbinom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-popcount.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-reader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-request.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-result.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-samheader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-segmentpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-semaphore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-sequence.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-smooth.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-splicestringpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-splicetrie.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-splicetrie_build.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-stage1.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-stage2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-stage3.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-stopwatch.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-uintlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-uinttable.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_avx512-univinterval.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-access.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-assert.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-atoi.Po at am__quote@
@@ -3451,6 +4075,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_nosimd-orderstat.Po at am__quote@
@@ -3536,6 +4161,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse2-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse2-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse2-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse2-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse2-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse2-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse2-orderstat.Po at am__quote@
@@ -3621,6 +4247,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse41-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse41-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse41-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse41-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse41-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse41-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse41-orderstat.Po at am__quote@
@@ -3706,6 +4333,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse42-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse42-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse42-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse42-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse42-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse42-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_sse42-orderstat.Po at am__quote@
@@ -3791,6 +4419,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_ssse3-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_ssse3-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_ssse3-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_ssse3-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_ssse3-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_ssse3-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmap_ssse3-orderstat.Po at am__quote@
@@ -3924,6 +4553,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-orderstat.Po at am__quote@
@@ -3954,6 +4584,93 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-uintlist.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-uinttable.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx2-univinterval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-access.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-assert.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-atoi.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-bigendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-bitpack64-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-block.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-boyer-moore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-bzip2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-cellpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-changepoint.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-chimera.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-chrnum.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-cmet.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-compress-write.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-compress.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-datadir.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-diag.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-diagnostic.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-diagpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-doublelist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-dynprog.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-dynprog_cdna.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-dynprog_end.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-dynprog_genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-dynprog_simd.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-dynprog_single.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-filestring.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-gbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-genome-write.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-genome128_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-genome_sites.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-genomicpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-getopt.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-getopt1.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-gmap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-gregion.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-iit-read-univ.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-iit-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-inbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-indexdb.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-indexdb_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-interval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-intlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-list.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-littleendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-match.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-matchpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-maxent.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-maxent_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-md5.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-merge.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-oligo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-oligoindex_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-orderstat.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-output.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-pair.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-pairpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-parserange.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-pbinom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-popcount.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-reader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-request.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-result.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-samheader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-segmentpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-semaphore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-sequence.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-smooth.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-splicestringpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-splicetrie.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-splicetrie_build.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-stage1.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-stage2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-stage3.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-stopwatch.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-translation.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-uint8list.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-uintlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-uinttable.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_avx512-univinterval.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-access.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-assert.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-atoi.Po at am__quote@
@@ -4010,6 +4727,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_nosimd-orderstat.Po at am__quote@
@@ -4096,6 +4814,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse2-orderstat.Po at am__quote@
@@ -4182,6 +4901,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse41-orderstat.Po at am__quote@
@@ -4268,6 +4988,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_sse42-orderstat.Po at am__quote@
@@ -4354,6 +5075,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gmapl_ssse3-orderstat.Po at am__quote@
@@ -4401,6 +5123,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-datadir.Po at am__quote@
@@ -4440,6 +5163,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-orderstat.Po at am__quote@
@@ -4456,6 +5181,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-samheader.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-samprint.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-sarray-search.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-sedgesort.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-segmentpos.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-semaphore.Po at am__quote@
@@ -4476,6 +5202,100 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-uintlist.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-univdiag.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx2-univinterval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-access.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-assert.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-atoi.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-bigendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-bitpack64-access.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-bitpack64-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-boyer-moore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-bytecoding.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-bzip2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-cellpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-changepoint.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-chimera.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-chrnum.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-cigar.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-cmet.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-compress.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-datadir.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-diag.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-diagpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-doublelist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-dynprog.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-dynprog_cdna.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-dynprog_end.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-dynprog_genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-dynprog_simd.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-dynprog_single.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-filestring.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-gbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-genome128_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-genome_sites.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-genomicpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-getopt.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-getopt1.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-gsnap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-iit-read-univ.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-iit-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-inbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-indel.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-indexdb.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-indexdb_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-interval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-intlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-junction.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-list.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-littleendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-mapq.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-maxent.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-maxent_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-md5.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-merge.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-oligo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-oligoindex_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-orderstat.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-output.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-pair.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-pairpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-parserange.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-pbinom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-popcount.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-reader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-request.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-resulthr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-samheader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-samprint.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-sarray-search.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-sedgesort.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-segmentpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-semaphore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-sequence.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-shortread.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-smooth.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-spanningelt.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-splice.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-splicestringpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-splicetrie.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-splicetrie_build.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-stage1hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-stage2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-stage3.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-stage3hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-stopwatch.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-substring.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-uintlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-univdiag.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_avx512-univinterval.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-access.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-assert.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-atoi.Po at am__quote@
@@ -4491,6 +5311,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-datadir.Po at am__quote@
@@ -4530,6 +5351,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-orderstat.Po at am__quote@
@@ -4546,6 +5369,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-samheader.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-samprint.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-sarray-search.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-sedgesort.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-segmentpos.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_nosimd-semaphore.Po at am__quote@
@@ -4581,6 +5405,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-datadir.Po at am__quote@
@@ -4620,6 +5445,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-orderstat.Po at am__quote@
@@ -4636,6 +5463,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-samheader.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-samprint.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-sarray-search.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-sedgesort.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-segmentpos.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse2-semaphore.Po at am__quote@
@@ -4671,6 +5499,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-datadir.Po at am__quote@
@@ -4710,6 +5539,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-orderstat.Po at am__quote@
@@ -4726,6 +5557,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-samheader.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-samprint.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-sarray-search.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-sedgesort.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-segmentpos.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse41-semaphore.Po at am__quote@
@@ -4761,6 +5593,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-datadir.Po at am__quote@
@@ -4800,6 +5633,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-orderstat.Po at am__quote@
@@ -4816,6 +5651,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-samheader.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-samprint.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-sarray-search.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-sedgesort.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-segmentpos.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_sse42-semaphore.Po at am__quote@
@@ -4851,6 +5687,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-datadir.Po at am__quote@
@@ -4890,6 +5727,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-orderstat.Po at am__quote@
@@ -4906,6 +5745,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-samheader.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-samprint.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-sarray-search.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-sedgesort.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-segmentpos.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnap_ssse3-semaphore.Po at am__quote@
@@ -4942,6 +5782,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-datadir.Po at am__quote@
@@ -4981,6 +5822,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-orderstat.Po at am__quote@
@@ -5015,6 +5858,96 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-uint8list.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-uintlist.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx2-univinterval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-access.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-assert.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-atoi.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-bigendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-bitpack64-access.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-bitpack64-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-boyer-moore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-bzip2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-cellpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-changepoint.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-chimera.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-chrnum.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-cigar.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-cmet.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-compress.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-datadir.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-diag.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-diagpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-doublelist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-dynprog.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-dynprog_cdna.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-dynprog_end.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-dynprog_genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-dynprog_simd.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-dynprog_single.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-except.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-filestring.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-gbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-genome.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-genome128_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-genome_sites.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-genomicpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-getopt.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-getopt1.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-gsnap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-iit-read-univ.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-iit-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-inbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-indel.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-indexdb.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-indexdb_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-interval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-intlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-intron.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-junction.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-list.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-littleendian.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-mapq.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-maxent.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-maxent_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-md5.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-merge.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-oligo.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-oligoindex_hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-orderstat.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-outbuffer.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-output.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-pair.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-pairpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-parserange.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-pbinom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-popcount.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-reader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-request.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-resulthr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-samheader.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-samprint.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-segmentpos.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-semaphore.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-sequence.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-shortread.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-smooth.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-spanningelt.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-splice.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-splicestringpool.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-splicetrie.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-splicetrie_build.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-stage1hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-stage2.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-stage3.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-stage3hr.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-stopwatch.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-substring.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-uint8list.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-uintlist.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_avx512-univinterval.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-access.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-assert.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-atoi.Po at am__quote@
@@ -5029,6 +5962,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-datadir.Po at am__quote@
@@ -5068,6 +6002,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_nosimd-orderstat.Po at am__quote@
@@ -5116,6 +6052,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-datadir.Po at am__quote@
@@ -5155,6 +6092,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse2-orderstat.Po at am__quote@
@@ -5203,6 +6142,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-datadir.Po at am__quote@
@@ -5242,6 +6182,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse41-orderstat.Po at am__quote@
@@ -5290,6 +6232,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-datadir.Po at am__quote@
@@ -5329,6 +6272,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_sse42-orderstat.Po at am__quote@
@@ -5377,6 +6322,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-datadir.Po at am__quote@
@@ -5416,6 +6362,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/gsnapl_ssse3-orderstat.Po at am__quote@
@@ -5512,6 +6460,45 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_store-tableint.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_store-uintlist.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/iit_store-univinterval.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-access.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-assert.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-bigendian.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-bitpack64-read.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-bitpack64-readtwo.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-bytecoding.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-bzip2.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-chrnum.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-chrom.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-compress.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-datadir.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-doublelist.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-except.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-filestring.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-genome.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-genome128_hr.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-genomicpos.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-iit-read-univ.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-iit-read.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-iit-write.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-interval.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-intlist.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-list.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-littleendian.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-maxent_hr.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-md5.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-mem.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-parserange.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-popcount.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-sarray-read.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-semaphore.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-sequence.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-stopwatch.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-table.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-tableuint.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uint8list.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uintlist.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uinttable.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-univinterval.Plo at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-access.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-assert.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-bigendian.Po at am__quote@
@@ -5588,6 +6575,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-datadir.Po at am__quote@
@@ -5624,6 +6612,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-orderstat.Po at am__quote@
@@ -5635,6 +6625,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-reader.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-resulthr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-sarray-read.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-sarray-search.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-sedgesort.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-segmentpos.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscan-semaphore.Po at am__quote@
@@ -5671,6 +6662,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-chimera.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-chrnum.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-chrom.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-cigar.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-cmet.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-compress.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-datadir.Po at am__quote@
@@ -5707,6 +6699,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-maxent_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-md5.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-mem.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-merge-heap.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-merge.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-oligo.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-oligoindex_hr.Po at am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/uniqscanl-orderstat.Po at am__quote@
@@ -5760,6 +6754,279 @@ distclean-compile:
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LTCOMPILE) -c -o $@ $<
 
+libgmap_la-except.lo: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-except.lo -MD -MP -MF $(DEPDIR)/libgmap_la-except.Tpo -c -o libgmap_la-except.lo `test -f 'except.c' || echo '$(srcdir)/'`except.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-except.Tpo $(DEPDIR)/libgmap_la-except.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='libgmap_la-except.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-except.lo `test -f 'except.c' || echo '$(srcdir)/'`except.c
+
+libgmap_la-assert.lo: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-assert.lo -MD -MP -MF $(DEPDIR)/libgmap_la-assert.Tpo -c -o libgmap_la-assert.lo `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-assert.Tpo $(DEPDIR)/libgmap_la-assert.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='libgmap_la-assert.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-assert.lo `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+
+libgmap_la-mem.lo: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-mem.lo -MD -MP -MF $(DEPDIR)/libgmap_la-mem.Tpo -c -o libgmap_la-mem.lo `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-mem.Tpo $(DEPDIR)/libgmap_la-mem.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='libgmap_la-mem.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-mem.lo `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+
+libgmap_la-intlist.lo: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-intlist.lo -MD -MP -MF $(DEPDIR)/libgmap_la-intlist.Tpo -c -o libgmap_la-intlist.lo `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-intlist.Tpo $(DEPDIR)/libgmap_la-intlist.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='libgmap_la-intlist.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-intlist.lo `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+
+libgmap_la-list.lo: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-list.lo -MD -MP -MF $(DEPDIR)/libgmap_la-list.Tpo -c -o libgmap_la-list.lo `test -f 'list.c' || echo '$(srcdir)/'`list.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-list.Tpo $(DEPDIR)/libgmap_la-list.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='libgmap_la-list.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-list.lo `test -f 'list.c' || echo '$(srcdir)/'`list.c
+
+libgmap_la-doublelist.lo: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-doublelist.lo -MD -MP -MF $(DEPDIR)/libgmap_la-doublelist.Tpo -c -o libgmap_la-doublelist.lo `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-doublelist.Tpo $(DEPDIR)/libgmap_la-doublelist.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='libgmap_la-doublelist.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-doublelist.lo `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+
+libgmap_la-littleendian.lo: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-littleendian.lo -MD -MP -MF $(DEPDIR)/libgmap_la-littleendian.Tpo -c -o libgmap_la-littleendian.lo `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-littleendian.Tpo $(DEPDIR)/libgmap_la-littleendian.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='libgmap_la-littleendian.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-littleendian.lo `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+
+libgmap_la-bigendian.lo: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-bigendian.lo -MD -MP -MF $(DEPDIR)/libgmap_la-bigendian.Tpo -c -o libgmap_la-bigendian.lo `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-bigendian.Tpo $(DEPDIR)/libgmap_la-bigendian.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='libgmap_la-bigendian.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-bigendian.lo `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+
+libgmap_la-interval.lo: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-interval.lo -MD -MP -MF $(DEPDIR)/libgmap_la-interval.Tpo -c -o libgmap_la-interval.lo `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-interval.Tpo $(DEPDIR)/libgmap_la-interval.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='libgmap_la-interval.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-interval.lo `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+
+libgmap_la-uintlist.lo: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-uintlist.lo -MD -MP -MF $(DEPDIR)/libgmap_la-uintlist.Tpo -c -o libgmap_la-uintlist.lo `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-uintlist.Tpo $(DEPDIR)/libgmap_la-uintlist.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='libgmap_la-uintlist.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-uintlist.lo `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+
+libgmap_la-uint8list.lo: uint8list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-uint8list.lo -MD -MP -MF $(DEPDIR)/libgmap_la-uint8list.Tpo -c -o libgmap_la-uint8list.lo `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-uint8list.Tpo $(DEPDIR)/libgmap_la-uint8list.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uint8list.c' object='libgmap_la-uint8list.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-uint8list.lo `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+libgmap_la-iit-read.lo: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-iit-read.lo -MD -MP -MF $(DEPDIR)/libgmap_la-iit-read.Tpo -c -o libgmap_la-iit-read.lo `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-iit-read.Tpo $(DEPDIR)/libgmap_la-iit-read.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='libgmap_la-iit-read.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-iit-read.lo `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+
+libgmap_la-iit-write.lo: iit-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-iit-write.lo -MD -MP -MF $(DEPDIR)/libgmap_la-iit-write.Tpo -c -o libgmap_la-iit-write.lo `test -f 'iit-write.c' || echo '$(srcdir)/'`iit-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-iit-write.Tpo $(DEPDIR)/libgmap_la-iit-write.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-write.c' object='libgmap_la-iit-write.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-iit-write.lo `test -f 'iit-write.c' || echo '$(srcdir)/'`iit-write.c
+
+libgmap_la-parserange.lo: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-parserange.lo -MD -MP -MF $(DEPDIR)/libgmap_la-parserange.Tpo -c -o libgmap_la-parserange.lo `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-parserange.Tpo $(DEPDIR)/libgmap_la-parserange.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='libgmap_la-parserange.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-parserange.lo `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+
+libgmap_la-univinterval.lo: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-univinterval.lo -MD -MP -MF $(DEPDIR)/libgmap_la-univinterval.Tpo -c -o libgmap_la-univinterval.lo `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-univinterval.Tpo $(DEPDIR)/libgmap_la-univinterval.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='libgmap_la-univinterval.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-univinterval.lo `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+
+libgmap_la-iit-read-univ.lo: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-iit-read-univ.lo -MD -MP -MF $(DEPDIR)/libgmap_la-iit-read-univ.Tpo -c -o libgmap_la-iit-read-univ.lo `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-iit-read-univ.Tpo $(DEPDIR)/libgmap_la-iit-read-univ.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='libgmap_la-iit-read-univ.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-iit-read-univ.lo `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+
+libgmap_la-stopwatch.lo: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-stopwatch.lo -MD -MP -MF $(DEPDIR)/libgmap_la-stopwatch.Tpo -c -o libgmap_la-stopwatch.lo `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-stopwatch.Tpo $(DEPDIR)/libgmap_la-stopwatch.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='libgmap_la-stopwatch.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-stopwatch.lo `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+
+libgmap_la-semaphore.lo: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-semaphore.lo -MD -MP -MF $(DEPDIR)/libgmap_la-semaphore.Tpo -c -o libgmap_la-semaphore.lo `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-semaphore.Tpo $(DEPDIR)/libgmap_la-semaphore.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='libgmap_la-semaphore.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-semaphore.lo `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+
+libgmap_la-access.lo: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-access.lo -MD -MP -MF $(DEPDIR)/libgmap_la-access.Tpo -c -o libgmap_la-access.lo `test -f 'access.c' || echo '$(srcdir)/'`access.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-access.Tpo $(DEPDIR)/libgmap_la-access.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='libgmap_la-access.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-access.lo `test -f 'access.c' || echo '$(srcdir)/'`access.c
+
+libgmap_la-table.lo: table.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-table.lo -MD -MP -MF $(DEPDIR)/libgmap_la-table.Tpo -c -o libgmap_la-table.lo `test -f 'table.c' || echo '$(srcdir)/'`table.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-table.Tpo $(DEPDIR)/libgmap_la-table.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='table.c' object='libgmap_la-table.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-table.lo `test -f 'table.c' || echo '$(srcdir)/'`table.c
+
+libgmap_la-tableuint.lo: tableuint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-tableuint.lo -MD -MP -MF $(DEPDIR)/libgmap_la-tableuint.Tpo -c -o libgmap_la-tableuint.lo `test -f 'tableuint.c' || echo '$(srcdir)/'`tableuint.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-tableuint.Tpo $(DEPDIR)/libgmap_la-tableuint.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='tableuint.c' object='libgmap_la-tableuint.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-tableuint.lo `test -f 'tableuint.c' || echo '$(srcdir)/'`tableuint.c
+
+libgmap_la-uinttable.lo: uinttable.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-uinttable.lo -MD -MP -MF $(DEPDIR)/libgmap_la-uinttable.Tpo -c -o libgmap_la-uinttable.lo `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-uinttable.Tpo $(DEPDIR)/libgmap_la-uinttable.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uinttable.c' object='libgmap_la-uinttable.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-uinttable.lo `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c
+
+libgmap_la-chrom.lo: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-chrom.lo -MD -MP -MF $(DEPDIR)/libgmap_la-chrom.Tpo -c -o libgmap_la-chrom.lo `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-chrom.Tpo $(DEPDIR)/libgmap_la-chrom.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='libgmap_la-chrom.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-chrom.lo `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+
+libgmap_la-filestring.lo: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-filestring.lo -MD -MP -MF $(DEPDIR)/libgmap_la-filestring.Tpo -c -o libgmap_la-filestring.lo `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-filestring.Tpo $(DEPDIR)/libgmap_la-filestring.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='libgmap_la-filestring.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-filestring.lo `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+libgmap_la-md5.lo: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-md5.lo -MD -MP -MF $(DEPDIR)/libgmap_la-md5.Tpo -c -o libgmap_la-md5.lo `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-md5.Tpo $(DEPDIR)/libgmap_la-md5.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='libgmap_la-md5.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-md5.lo `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+
+libgmap_la-bzip2.lo: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-bzip2.lo -MD -MP -MF $(DEPDIR)/libgmap_la-bzip2.Tpo -c -o libgmap_la-bzip2.lo `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-bzip2.Tpo $(DEPDIR)/libgmap_la-bzip2.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='libgmap_la-bzip2.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-bzip2.lo `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+
+libgmap_la-sequence.lo: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-sequence.lo -MD -MP -MF $(DEPDIR)/libgmap_la-sequence.Tpo -c -o libgmap_la-sequence.lo `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-sequence.Tpo $(DEPDIR)/libgmap_la-sequence.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='libgmap_la-sequence.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-sequence.lo `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+
+libgmap_la-genomicpos.lo: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-genomicpos.lo -MD -MP -MF $(DEPDIR)/libgmap_la-genomicpos.Tpo -c -o libgmap_la-genomicpos.lo `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-genomicpos.Tpo $(DEPDIR)/libgmap_la-genomicpos.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='libgmap_la-genomicpos.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-genomicpos.lo `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+
+libgmap_la-bitpack64-read.lo: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-bitpack64-read.lo -MD -MP -MF $(DEPDIR)/libgmap_la-bitpack64-read.Tpo -c -o libgmap_la-bitpack64-read.lo `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-bitpack64-read.Tpo $(DEPDIR)/libgmap_la-bitpack64-read.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='libgmap_la-bitpack64-read.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-bitpack64-read.lo `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+
+libgmap_la-bitpack64-readtwo.lo: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-bitpack64-readtwo.lo -MD -MP -MF $(DEPDIR)/libgmap_la-bitpack64-readtwo.Tpo -c -o libgmap_la-bitpack64-readtwo.lo `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-bitpack64-readtwo.Tpo $(DEPDIR)/libgmap_la-bitpack64-readtwo.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='libgmap_la-bitpack64-readtwo.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-bitpack64-readtwo.lo `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+
+libgmap_la-maxent_hr.lo: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-maxent_hr.lo -MD -MP -MF $(DEPDIR)/libgmap_la-maxent_hr.Tpo -c -o libgmap_la-maxent_hr.lo `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-maxent_hr.Tpo $(DEPDIR)/libgmap_la-maxent_hr.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='libgmap_la-maxent_hr.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-maxent_hr.lo `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+
+libgmap_la-popcount.lo: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-popcount.lo -MD -MP -MF $(DEPDIR)/libgmap_la-popcount.Tpo -c -o libgmap_la-popcount.lo `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-popcount.Tpo $(DEPDIR)/libgmap_la-popcount.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='libgmap_la-popcount.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-popcount.lo `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+
+libgmap_la-genome128_hr.lo: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-genome128_hr.lo -MD -MP -MF $(DEPDIR)/libgmap_la-genome128_hr.Tpo -c -o libgmap_la-genome128_hr.lo `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-genome128_hr.Tpo $(DEPDIR)/libgmap_la-genome128_hr.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='libgmap_la-genome128_hr.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-genome128_hr.lo `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+
+libgmap_la-compress.lo: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-compress.lo -MD -MP -MF $(DEPDIR)/libgmap_la-compress.Tpo -c -o libgmap_la-compress.lo `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-compress.Tpo $(DEPDIR)/libgmap_la-compress.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='libgmap_la-compress.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-compress.lo `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+
+libgmap_la-bytecoding.lo: bytecoding.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-bytecoding.lo -MD -MP -MF $(DEPDIR)/libgmap_la-bytecoding.Tpo -c -o libgmap_la-bytecoding.lo `test -f 'bytecoding.c' || echo '$(srcdir)/'`bytecoding.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-bytecoding.Tpo $(DEPDIR)/libgmap_la-bytecoding.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bytecoding.c' object='libgmap_la-bytecoding.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-bytecoding.lo `test -f 'bytecoding.c' || echo '$(srcdir)/'`bytecoding.c
+
+libgmap_la-sarray-read.lo: sarray-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-sarray-read.lo -MD -MP -MF $(DEPDIR)/libgmap_la-sarray-read.Tpo -c -o libgmap_la-sarray-read.lo `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-sarray-read.Tpo $(DEPDIR)/libgmap_la-sarray-read.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-read.c' object='libgmap_la-sarray-read.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-sarray-read.lo `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
+
+libgmap_la-chrnum.lo: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-chrnum.lo -MD -MP -MF $(DEPDIR)/libgmap_la-chrnum.Tpo -c -o libgmap_la-chrnum.lo `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-chrnum.Tpo $(DEPDIR)/libgmap_la-chrnum.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='libgmap_la-chrnum.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-chrnum.lo `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+
+libgmap_la-genome.lo: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-genome.lo -MD -MP -MF $(DEPDIR)/libgmap_la-genome.Tpo -c -o libgmap_la-genome.lo `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-genome.Tpo $(DEPDIR)/libgmap_la-genome.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='libgmap_la-genome.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-genome.lo `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+
+libgmap_la-datadir.lo: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-datadir.lo -MD -MP -MF $(DEPDIR)/libgmap_la-datadir.Tpo -c -o libgmap_la-datadir.lo `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-datadir.Tpo $(DEPDIR)/libgmap_la-datadir.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='libgmap_la-datadir.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-datadir.lo `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+
 atoiindex-except.o: except.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(atoiindex_CFLAGS) $(CFLAGS) -MT atoiindex-except.o -MD -MP -MF $(DEPDIR)/atoiindex-except.Tpo -c -o atoiindex-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/atoiindex-except.Tpo $(DEPDIR)/atoiindex-except.Po
@@ -7804,6 +9071,20 @@ gmap_avx2-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -c -o gmap_avx2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmap_avx2-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -MT gmap_avx2-merge.o -MD -MP -MF $(DEPDIR)/gmap_avx2-merge.Tpo -c -o gmap_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx2-merge.Tpo $(DEPDIR)/gmap_avx2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_avx2-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -c -o gmap_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmap_avx2-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -MT gmap_avx2-merge.obj -MD -MP -MF $(DEPDIR)/gmap_avx2-merge.Tpo -c -o gmap_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx2-merge.Tpo $(DEPDIR)/gmap_avx2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_avx2-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -c -o gmap_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmap_avx2-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -MT gmap_avx2-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_avx2-indexdb.Tpo -c -o gmap_avx2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx2-indexdb.Tpo $(DEPDIR)/gmap_avx2-indexdb.Po
@@ -8560,6 +9841,1210 @@ gmap_avx2-gmap.obj: gmap.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -c -o gmap_avx2-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi`
 
+gmap_avx512-except.o: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-except.o -MD -MP -MF $(DEPDIR)/gmap_avx512-except.Tpo -c -o gmap_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-except.Tpo $(DEPDIR)/gmap_avx512-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='gmap_avx512-except.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
+
+gmap_avx512-except.obj: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-except.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-except.Tpo -c -o gmap_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-except.Tpo $(DEPDIR)/gmap_avx512-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='gmap_avx512-except.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi`
+
+gmap_avx512-assert.o: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-assert.o -MD -MP -MF $(DEPDIR)/gmap_avx512-assert.Tpo -c -o gmap_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-assert.Tpo $(DEPDIR)/gmap_avx512-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='gmap_avx512-assert.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+
+gmap_avx512-assert.obj: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-assert.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-assert.Tpo -c -o gmap_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-assert.Tpo $(DEPDIR)/gmap_avx512-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='gmap_avx512-assert.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi`
+
+gmap_avx512-mem.o: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-mem.o -MD -MP -MF $(DEPDIR)/gmap_avx512-mem.Tpo -c -o gmap_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-mem.Tpo $(DEPDIR)/gmap_avx512-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='gmap_avx512-mem.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+
+gmap_avx512-mem.obj: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-mem.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-mem.Tpo -c -o gmap_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-mem.Tpo $(DEPDIR)/gmap_avx512-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='gmap_avx512-mem.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi`
+
+gmap_avx512-intlist.o: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-intlist.o -MD -MP -MF $(DEPDIR)/gmap_avx512-intlist.Tpo -c -o gmap_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-intlist.Tpo $(DEPDIR)/gmap_avx512-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='gmap_avx512-intlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+
+gmap_avx512-intlist.obj: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-intlist.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-intlist.Tpo -c -o gmap_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-intlist.Tpo $(DEPDIR)/gmap_avx512-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='gmap_avx512-intlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi`
+
+gmap_avx512-list.o: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-list.o -MD -MP -MF $(DEPDIR)/gmap_avx512-list.Tpo -c -o gmap_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-list.Tpo $(DEPDIR)/gmap_avx512-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='gmap_avx512-list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c
+
+gmap_avx512-list.obj: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-list.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-list.Tpo -c -o gmap_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-list.Tpo $(DEPDIR)/gmap_avx512-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='gmap_avx512-list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi`
+
+gmap_avx512-littleendian.o: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-littleendian.o -MD -MP -MF $(DEPDIR)/gmap_avx512-littleendian.Tpo -c -o gmap_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-littleendian.Tpo $(DEPDIR)/gmap_avx512-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='gmap_avx512-littleendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+
+gmap_avx512-littleendian.obj: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-littleendian.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-littleendian.Tpo -c -o gmap_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-littleendian.Tpo $(DEPDIR)/gmap_avx512-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='gmap_avx512-littleendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi`
+
+gmap_avx512-bigendian.o: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bigendian.o -MD -MP -MF $(DEPDIR)/gmap_avx512-bigendian.Tpo -c -o gmap_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bigendian.Tpo $(DEPDIR)/gmap_avx512-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='gmap_avx512-bigendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+
+gmap_avx512-bigendian.obj: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bigendian.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-bigendian.Tpo -c -o gmap_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bigendian.Tpo $(DEPDIR)/gmap_avx512-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='gmap_avx512-bigendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi`
+
+gmap_avx512-univinterval.o: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-univinterval.o -MD -MP -MF $(DEPDIR)/gmap_avx512-univinterval.Tpo -c -o gmap_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-univinterval.Tpo $(DEPDIR)/gmap_avx512-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='gmap_avx512-univinterval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+
+gmap_avx512-univinterval.obj: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-univinterval.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-univinterval.Tpo -c -o gmap_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-univinterval.Tpo $(DEPDIR)/gmap_avx512-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='gmap_avx512-univinterval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi`
+
+gmap_avx512-interval.o: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-interval.o -MD -MP -MF $(DEPDIR)/gmap_avx512-interval.Tpo -c -o gmap_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-interval.Tpo $(DEPDIR)/gmap_avx512-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='gmap_avx512-interval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+
+gmap_avx512-interval.obj: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-interval.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-interval.Tpo -c -o gmap_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-interval.Tpo $(DEPDIR)/gmap_avx512-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='gmap_avx512-interval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+
+gmap_avx512-uintlist.o: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-uintlist.o -MD -MP -MF $(DEPDIR)/gmap_avx512-uintlist.Tpo -c -o gmap_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-uintlist.Tpo $(DEPDIR)/gmap_avx512-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='gmap_avx512-uintlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+
+gmap_avx512-uintlist.obj: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-uintlist.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-uintlist.Tpo -c -o gmap_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-uintlist.Tpo $(DEPDIR)/gmap_avx512-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='gmap_avx512-uintlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+
+gmap_avx512-stopwatch.o: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stopwatch.o -MD -MP -MF $(DEPDIR)/gmap_avx512-stopwatch.Tpo -c -o gmap_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stopwatch.Tpo $(DEPDIR)/gmap_avx512-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='gmap_avx512-stopwatch.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+
+gmap_avx512-stopwatch.obj: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stopwatch.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-stopwatch.Tpo -c -o gmap_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stopwatch.Tpo $(DEPDIR)/gmap_avx512-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='gmap_avx512-stopwatch.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi`
+
+gmap_avx512-semaphore.o: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-semaphore.o -MD -MP -MF $(DEPDIR)/gmap_avx512-semaphore.Tpo -c -o gmap_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-semaphore.Tpo $(DEPDIR)/gmap_avx512-semaphore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='gmap_avx512-semaphore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+
+gmap_avx512-semaphore.obj: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-semaphore.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-semaphore.Tpo -c -o gmap_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-semaphore.Tpo $(DEPDIR)/gmap_avx512-semaphore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='gmap_avx512-semaphore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi`
+
+gmap_avx512-access.o: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-access.o -MD -MP -MF $(DEPDIR)/gmap_avx512-access.Tpo -c -o gmap_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-access.Tpo $(DEPDIR)/gmap_avx512-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='gmap_avx512-access.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c
+
+gmap_avx512-access.obj: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-access.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-access.Tpo -c -o gmap_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-access.Tpo $(DEPDIR)/gmap_avx512-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='gmap_avx512-access.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+
+gmap_avx512-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-filestring.o -MD -MP -MF $(DEPDIR)/gmap_avx512-filestring.Tpo -c -o gmap_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-filestring.Tpo $(DEPDIR)/gmap_avx512-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='gmap_avx512-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gmap_avx512-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-filestring.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-filestring.Tpo -c -o gmap_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-filestring.Tpo $(DEPDIR)/gmap_avx512-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='gmap_avx512-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
+gmap_avx512-iit-read-univ.o: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gmap_avx512-iit-read-univ.Tpo -c -o gmap_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-iit-read-univ.Tpo $(DEPDIR)/gmap_avx512-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='gmap_avx512-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+
+gmap_avx512-iit-read-univ.obj: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-iit-read-univ.Tpo -c -o gmap_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-iit-read-univ.Tpo $(DEPDIR)/gmap_avx512-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='gmap_avx512-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi`
+
+gmap_avx512-iit-read.o: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-iit-read.o -MD -MP -MF $(DEPDIR)/gmap_avx512-iit-read.Tpo -c -o gmap_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-iit-read.Tpo $(DEPDIR)/gmap_avx512-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='gmap_avx512-iit-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+
+gmap_avx512-iit-read.obj: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-iit-read.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-iit-read.Tpo -c -o gmap_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-iit-read.Tpo $(DEPDIR)/gmap_avx512-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='gmap_avx512-iit-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi`
+
+gmap_avx512-md5.o: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-md5.o -MD -MP -MF $(DEPDIR)/gmap_avx512-md5.Tpo -c -o gmap_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-md5.Tpo $(DEPDIR)/gmap_avx512-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='gmap_avx512-md5.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+
+gmap_avx512-md5.obj: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-md5.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-md5.Tpo -c -o gmap_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-md5.Tpo $(DEPDIR)/gmap_avx512-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='gmap_avx512-md5.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi`
+
+gmap_avx512-bzip2.o: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bzip2.o -MD -MP -MF $(DEPDIR)/gmap_avx512-bzip2.Tpo -c -o gmap_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bzip2.Tpo $(DEPDIR)/gmap_avx512-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='gmap_avx512-bzip2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+
+gmap_avx512-bzip2.obj: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bzip2.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-bzip2.Tpo -c -o gmap_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bzip2.Tpo $(DEPDIR)/gmap_avx512-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='gmap_avx512-bzip2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi`
+
+gmap_avx512-sequence.o: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-sequence.o -MD -MP -MF $(DEPDIR)/gmap_avx512-sequence.Tpo -c -o gmap_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-sequence.Tpo $(DEPDIR)/gmap_avx512-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='gmap_avx512-sequence.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+
+gmap_avx512-sequence.obj: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-sequence.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-sequence.Tpo -c -o gmap_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-sequence.Tpo $(DEPDIR)/gmap_avx512-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='gmap_avx512-sequence.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi`
+
+gmap_avx512-reader.o: reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-reader.o -MD -MP -MF $(DEPDIR)/gmap_avx512-reader.Tpo -c -o gmap_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-reader.Tpo $(DEPDIR)/gmap_avx512-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='reader.c' object='gmap_avx512-reader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c
+
+gmap_avx512-reader.obj: reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-reader.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-reader.Tpo -c -o gmap_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-reader.Tpo $(DEPDIR)/gmap_avx512-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='reader.c' object='gmap_avx512-reader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi`
+
+gmap_avx512-genomicpos.o: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genomicpos.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genomicpos.Tpo -c -o gmap_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genomicpos.Tpo $(DEPDIR)/gmap_avx512-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='gmap_avx512-genomicpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+
+gmap_avx512-genomicpos.obj: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genomicpos.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genomicpos.Tpo -c -o gmap_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genomicpos.Tpo $(DEPDIR)/gmap_avx512-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='gmap_avx512-genomicpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi`
+
+gmap_avx512-compress.o: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-compress.o -MD -MP -MF $(DEPDIR)/gmap_avx512-compress.Tpo -c -o gmap_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-compress.Tpo $(DEPDIR)/gmap_avx512-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='gmap_avx512-compress.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+
+gmap_avx512-compress.obj: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-compress.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-compress.Tpo -c -o gmap_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-compress.Tpo $(DEPDIR)/gmap_avx512-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='gmap_avx512-compress.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi`
+
+gmap_avx512-compress-write.o: compress-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-compress-write.o -MD -MP -MF $(DEPDIR)/gmap_avx512-compress-write.Tpo -c -o gmap_avx512-compress-write.o `test -f 'compress-write.c' || echo '$(srcdir)/'`compress-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-compress-write.Tpo $(DEPDIR)/gmap_avx512-compress-write.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress-write.c' object='gmap_avx512-compress-write.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-compress-write.o `test -f 'compress-write.c' || echo '$(srcdir)/'`compress-write.c
+
+gmap_avx512-compress-write.obj: compress-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-compress-write.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-compress-write.Tpo -c -o gmap_avx512-compress-write.obj `if test -f 'compress-write.c'; then $(CYGPATH_W) 'compress-write.c'; else $(CYGPATH_W) '$(srcdir)/compress-write.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-compress-write.Tpo $(DEPDIR)/gmap_avx512-compress-write.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress-write.c' object='gmap_avx512-compress-write.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-compress-write.obj `if test -f 'compress-write.c'; then $(CYGPATH_W) 'compress-write.c'; else $(CYGPATH_W) '$(srcdir)/compress-write.c'; fi`
+
+gmap_avx512-gbuffer.o: gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gbuffer.o -MD -MP -MF $(DEPDIR)/gmap_avx512-gbuffer.Tpo -c -o gmap_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gbuffer.Tpo $(DEPDIR)/gmap_avx512-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gbuffer.c' object='gmap_avx512-gbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c
+
+gmap_avx512-gbuffer.obj: gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gbuffer.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-gbuffer.Tpo -c -o gmap_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gbuffer.Tpo $(DEPDIR)/gmap_avx512-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gbuffer.c' object='gmap_avx512-gbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
+
+gmap_avx512-genome.o: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genome.Tpo -c -o gmap_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome.Tpo $(DEPDIR)/gmap_avx512-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='gmap_avx512-genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+
+gmap_avx512-genome.obj: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genome.Tpo -c -o gmap_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome.Tpo $(DEPDIR)/gmap_avx512-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='gmap_avx512-genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi`
+
+gmap_avx512-popcount.o: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-popcount.o -MD -MP -MF $(DEPDIR)/gmap_avx512-popcount.Tpo -c -o gmap_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-popcount.Tpo $(DEPDIR)/gmap_avx512-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='gmap_avx512-popcount.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+
+gmap_avx512-popcount.obj: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-popcount.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-popcount.Tpo -c -o gmap_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-popcount.Tpo $(DEPDIR)/gmap_avx512-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='gmap_avx512-popcount.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi`
+
+gmap_avx512-genome128_hr.o: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome128_hr.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genome128_hr.Tpo -c -o gmap_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome128_hr.Tpo $(DEPDIR)/gmap_avx512-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='gmap_avx512-genome128_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+
+gmap_avx512-genome128_hr.obj: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome128_hr.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genome128_hr.Tpo -c -o gmap_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome128_hr.Tpo $(DEPDIR)/gmap_avx512-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='gmap_avx512-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi`
+
+gmap_avx512-genome_sites.o: genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome_sites.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genome_sites.Tpo -c -o gmap_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome_sites.Tpo $(DEPDIR)/gmap_avx512-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome_sites.c' object='gmap_avx512-genome_sites.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c
+
+gmap_avx512-genome_sites.obj: genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome_sites.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genome_sites.Tpo -c -o gmap_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome_sites.Tpo $(DEPDIR)/gmap_avx512-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome_sites.c' object='gmap_avx512-genome_sites.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi`
+
+gmap_avx512-genome-write.o: genome-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome-write.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genome-write.Tpo -c -o gmap_avx512-genome-write.o `test -f 'genome-write.c' || echo '$(srcdir)/'`genome-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome-write.Tpo $(DEPDIR)/gmap_avx512-genome-write.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome-write.c' object='gmap_avx512-genome-write.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome-write.o `test -f 'genome-write.c' || echo '$(srcdir)/'`genome-write.c
+
+gmap_avx512-genome-write.obj: genome-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome-write.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genome-write.Tpo -c -o gmap_avx512-genome-write.obj `if test -f 'genome-write.c'; then $(CYGPATH_W) 'genome-write.c'; else $(CYGPATH_W) '$(srcdir)/genome-write.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome-write.Tpo $(DEPDIR)/gmap_avx512-genome-write.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome-write.c' object='gmap_avx512-genome-write.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome-write.obj `if test -f 'genome-write.c'; then $(CYGPATH_W) 'genome-write.c'; else $(CYGPATH_W) '$(srcdir)/genome-write.c'; fi`
+
+gmap_avx512-bitpack64-read.o: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bitpack64-read.o -MD -MP -MF $(DEPDIR)/gmap_avx512-bitpack64-read.Tpo -c -o gmap_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bitpack64-read.Tpo $(DEPDIR)/gmap_avx512-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='gmap_avx512-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+
+gmap_avx512-bitpack64-read.obj: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-bitpack64-read.Tpo -c -o gmap_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bitpack64-read.Tpo $(DEPDIR)/gmap_avx512-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='gmap_avx512-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi`
+
+gmap_avx512-bitpack64-readtwo.o: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Tpo -c -o gmap_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='gmap_avx512-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+
+gmap_avx512-bitpack64-readtwo.obj: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Tpo -c -o gmap_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='gmap_avx512-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
+
+gmap_avx512-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-merge.o -MD -MP -MF $(DEPDIR)/gmap_avx512-merge.Tpo -c -o gmap_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-merge.Tpo $(DEPDIR)/gmap_avx512-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_avx512-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmap_avx512-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-merge.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-merge.Tpo -c -o gmap_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-merge.Tpo $(DEPDIR)/gmap_avx512-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_avx512-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
+gmap_avx512-indexdb.o: indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_avx512-indexdb.Tpo -c -o gmap_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-indexdb.Tpo $(DEPDIR)/gmap_avx512-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb.c' object='gmap_avx512-indexdb.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
+
+gmap_avx512-indexdb.obj: indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-indexdb.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-indexdb.Tpo -c -o gmap_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-indexdb.Tpo $(DEPDIR)/gmap_avx512-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb.c' object='gmap_avx512-indexdb.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi`
+
+gmap_avx512-indexdb_hr.o: indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-indexdb_hr.o -MD -MP -MF $(DEPDIR)/gmap_avx512-indexdb_hr.Tpo -c -o gmap_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-indexdb_hr.Tpo $(DEPDIR)/gmap_avx512-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb_hr.c' object='gmap_avx512-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c
+
+gmap_avx512-indexdb_hr.obj: indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-indexdb_hr.Tpo -c -o gmap_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-indexdb_hr.Tpo $(DEPDIR)/gmap_avx512-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb_hr.c' object='gmap_avx512-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi`
+
+gmap_avx512-oligo.o: oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-oligo.o -MD -MP -MF $(DEPDIR)/gmap_avx512-oligo.Tpo -c -o gmap_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-oligo.Tpo $(DEPDIR)/gmap_avx512-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligo.c' object='gmap_avx512-oligo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c
+
+gmap_avx512-oligo.obj: oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-oligo.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-oligo.Tpo -c -o gmap_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-oligo.Tpo $(DEPDIR)/gmap_avx512-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligo.c' object='gmap_avx512-oligo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi`
+
+gmap_avx512-block.o: block.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-block.o -MD -MP -MF $(DEPDIR)/gmap_avx512-block.Tpo -c -o gmap_avx512-block.o `test -f 'block.c' || echo '$(srcdir)/'`block.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-block.Tpo $(DEPDIR)/gmap_avx512-block.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='block.c' object='gmap_avx512-block.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-block.o `test -f 'block.c' || echo '$(srcdir)/'`block.c
+
+gmap_avx512-block.obj: block.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-block.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-block.Tpo -c -o gmap_avx512-block.obj `if test -f 'block.c'; then $(CYGPATH_W) 'block.c'; else $(CYGPATH_W) '$(srcdir)/block.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-block.Tpo $(DEPDIR)/gmap_avx512-block.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='block.c' object='gmap_avx512-block.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-block.obj `if test -f 'block.c'; then $(CYGPATH_W) 'block.c'; else $(CYGPATH_W) '$(srcdir)/block.c'; fi`
+
+gmap_avx512-chrom.o: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chrom.o -MD -MP -MF $(DEPDIR)/gmap_avx512-chrom.Tpo -c -o gmap_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chrom.Tpo $(DEPDIR)/gmap_avx512-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='gmap_avx512-chrom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+
+gmap_avx512-chrom.obj: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chrom.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-chrom.Tpo -c -o gmap_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chrom.Tpo $(DEPDIR)/gmap_avx512-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='gmap_avx512-chrom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi`
+
+gmap_avx512-segmentpos.o: segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-segmentpos.o -MD -MP -MF $(DEPDIR)/gmap_avx512-segmentpos.Tpo -c -o gmap_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-segmentpos.Tpo $(DEPDIR)/gmap_avx512-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='segmentpos.c' object='gmap_avx512-segmentpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c
+
+gmap_avx512-segmentpos.obj: segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-segmentpos.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-segmentpos.Tpo -c -o gmap_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-segmentpos.Tpo $(DEPDIR)/gmap_avx512-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='segmentpos.c' object='gmap_avx512-segmentpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi`
+
+gmap_avx512-chrnum.o: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chrnum.o -MD -MP -MF $(DEPDIR)/gmap_avx512-chrnum.Tpo -c -o gmap_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chrnum.Tpo $(DEPDIR)/gmap_avx512-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='gmap_avx512-chrnum.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+
+gmap_avx512-chrnum.obj: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chrnum.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-chrnum.Tpo -c -o gmap_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chrnum.Tpo $(DEPDIR)/gmap_avx512-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='gmap_avx512-chrnum.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi`
+
+gmap_avx512-uinttable.o: uinttable.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-uinttable.o -MD -MP -MF $(DEPDIR)/gmap_avx512-uinttable.Tpo -c -o gmap_avx512-uinttable.o `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-uinttable.Tpo $(DEPDIR)/gmap_avx512-uinttable.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uinttable.c' object='gmap_avx512-uinttable.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-uinttable.o `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c
+
+gmap_avx512-uinttable.obj: uinttable.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-uinttable.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-uinttable.Tpo -c -o gmap_avx512-uinttable.obj `if test -f 'uinttable.c'; then $(CYGPATH_W) 'uinttable.c'; else $(CYGPATH_W) '$(srcdir)/uinttable.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-uinttable.Tpo $(DEPDIR)/gmap_avx512-uinttable.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uinttable.c' object='gmap_avx512-uinttable.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-uinttable.obj `if test -f 'uinttable.c'; then $(CYGPATH_W) 'uinttable.c'; else $(CYGPATH_W) '$(srcdir)/uinttable.c'; fi`
+
+gmap_avx512-gregion.o: gregion.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gregion.o -MD -MP -MF $(DEPDIR)/gmap_avx512-gregion.Tpo -c -o gmap_avx512-gregion.o `test -f 'gregion.c' || echo '$(srcdir)/'`gregion.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gregion.Tpo $(DEPDIR)/gmap_avx512-gregion.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gregion.c' object='gmap_avx512-gregion.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gregion.o `test -f 'gregion.c' || echo '$(srcdir)/'`gregion.c
+
+gmap_avx512-gregion.obj: gregion.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gregion.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-gregion.Tpo -c -o gmap_avx512-gregion.obj `if test -f 'gregion.c'; then $(CYGPATH_W) 'gregion.c'; else $(CYGPATH_W) '$(srcdir)/gregion.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gregion.Tpo $(DEPDIR)/gmap_avx512-gregion.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gregion.c' object='gmap_avx512-gregion.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gregion.obj `if test -f 'gregion.c'; then $(CYGPATH_W) 'gregion.c'; else $(CYGPATH_W) '$(srcdir)/gregion.c'; fi`
+
+gmap_avx512-match.o: match.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-match.o -MD -MP -MF $(DEPDIR)/gmap_avx512-match.Tpo -c -o gmap_avx512-match.o `test -f 'match.c' || echo '$(srcdir)/'`match.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-match.Tpo $(DEPDIR)/gmap_avx512-match.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='match.c' object='gmap_avx512-match.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-match.o `test -f 'match.c' || echo '$(srcdir)/'`match.c
+
+gmap_avx512-match.obj: match.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-match.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-match.Tpo -c -o gmap_avx512-match.obj `if test -f 'match.c'; then $(CYGPATH_W) 'match.c'; else $(CYGPATH_W) '$(srcdir)/match.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-match.Tpo $(DEPDIR)/gmap_avx512-match.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='match.c' object='gmap_avx512-match.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-match.obj `if test -f 'match.c'; then $(CYGPATH_W) 'match.c'; else $(CYGPATH_W) '$(srcdir)/match.c'; fi`
+
+gmap_avx512-matchpool.o: matchpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-matchpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-matchpool.Tpo -c -o gmap_avx512-matchpool.o `test -f 'matchpool.c' || echo '$(srcdir)/'`matchpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-matchpool.Tpo $(DEPDIR)/gmap_avx512-matchpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='matchpool.c' object='gmap_avx512-matchpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-matchpool.o `test -f 'matchpool.c' || echo '$(srcdir)/'`matchpool.c
+
+gmap_avx512-matchpool.obj: matchpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-matchpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-matchpool.Tpo -c -o gmap_avx512-matchpool.obj `if test -f 'matchpool.c'; then $(CYGPATH_W) 'matchpool.c'; else $(CYGPATH_W) '$(srcdir)/matchpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-matchpool.Tpo $(DEPDIR)/gmap_avx512-matchpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='matchpool.c' object='gmap_avx512-matchpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-matchpool.obj `if test -f 'matchpool.c'; then $(CYGPATH_W) 'matchpool.c'; else $(CYGPATH_W) '$(srcdir)/matchpool.c'; fi`
+
+gmap_avx512-diagnostic.o: diagnostic.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diagnostic.o -MD -MP -MF $(DEPDIR)/gmap_avx512-diagnostic.Tpo -c -o gmap_avx512-diagnostic.o `test -f 'diagnostic.c' || echo '$(srcdir)/'`diagnostic.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diagnostic.Tpo $(DEPDIR)/gmap_avx512-diagnostic.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagnostic.c' object='gmap_avx512-diagnostic.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diagnostic.o `test -f 'diagnostic.c' || echo '$(srcdir)/'`diagnostic.c
+
+gmap_avx512-diagnostic.obj: diagnostic.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diagnostic.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-diagnostic.Tpo -c -o gmap_avx512-diagnostic.obj `if test -f 'diagnostic.c'; then $(CYGPATH_W) 'diagnostic.c'; else $(CYGPATH_W) '$(srcdir)/diagnostic.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diagnostic.Tpo $(DEPDIR)/gmap_avx512-diagnostic.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagnostic.c' object='gmap_avx512-diagnostic.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diagnostic.obj `if test -f 'diagnostic.c'; then $(CYGPATH_W) 'diagnostic.c'; else $(CYGPATH_W) '$(srcdir)/diagnostic.c'; fi`
+
+gmap_avx512-stage1.o: stage1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage1.o -MD -MP -MF $(DEPDIR)/gmap_avx512-stage1.Tpo -c -o gmap_avx512-stage1.o `test -f 'stage1.c' || echo '$(srcdir)/'`stage1.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage1.Tpo $(DEPDIR)/gmap_avx512-stage1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage1.c' object='gmap_avx512-stage1.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage1.o `test -f 'stage1.c' || echo '$(srcdir)/'`stage1.c
+
+gmap_avx512-stage1.obj: stage1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage1.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-stage1.Tpo -c -o gmap_avx512-stage1.obj `if test -f 'stage1.c'; then $(CYGPATH_W) 'stage1.c'; else $(CYGPATH_W) '$(srcdir)/stage1.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage1.Tpo $(DEPDIR)/gmap_avx512-stage1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage1.c' object='gmap_avx512-stage1.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage1.obj `if test -f 'stage1.c'; then $(CYGPATH_W) 'stage1.c'; else $(CYGPATH_W) '$(srcdir)/stage1.c'; fi`
+
+gmap_avx512-diag.o: diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diag.o -MD -MP -MF $(DEPDIR)/gmap_avx512-diag.Tpo -c -o gmap_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diag.Tpo $(DEPDIR)/gmap_avx512-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diag.c' object='gmap_avx512-diag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c
+
+gmap_avx512-diag.obj: diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diag.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-diag.Tpo -c -o gmap_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diag.Tpo $(DEPDIR)/gmap_avx512-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diag.c' object='gmap_avx512-diag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi`
+
+gmap_avx512-diagpool.o: diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diagpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-diagpool.Tpo -c -o gmap_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diagpool.Tpo $(DEPDIR)/gmap_avx512-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagpool.c' object='gmap_avx512-diagpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c
+
+gmap_avx512-diagpool.obj: diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diagpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-diagpool.Tpo -c -o gmap_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diagpool.Tpo $(DEPDIR)/gmap_avx512-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagpool.c' object='gmap_avx512-diagpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi`
+
+gmap_avx512-cmet.o: cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-cmet.o -MD -MP -MF $(DEPDIR)/gmap_avx512-cmet.Tpo -c -o gmap_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-cmet.Tpo $(DEPDIR)/gmap_avx512-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cmet.c' object='gmap_avx512-cmet.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c
+
+gmap_avx512-cmet.obj: cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-cmet.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-cmet.Tpo -c -o gmap_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-cmet.Tpo $(DEPDIR)/gmap_avx512-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cmet.c' object='gmap_avx512-cmet.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi`
+
+gmap_avx512-atoi.o: atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-atoi.o -MD -MP -MF $(DEPDIR)/gmap_avx512-atoi.Tpo -c -o gmap_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-atoi.Tpo $(DEPDIR)/gmap_avx512-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='atoi.c' object='gmap_avx512-atoi.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c
+
+gmap_avx512-atoi.obj: atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-atoi.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-atoi.Tpo -c -o gmap_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-atoi.Tpo $(DEPDIR)/gmap_avx512-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='atoi.c' object='gmap_avx512-atoi.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
+
+gmap_avx512-orderstat.o: orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-orderstat.o -MD -MP -MF $(DEPDIR)/gmap_avx512-orderstat.Tpo -c -o gmap_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-orderstat.Tpo $(DEPDIR)/gmap_avx512-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='orderstat.c' object='gmap_avx512-orderstat.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c
+
+gmap_avx512-orderstat.obj: orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-orderstat.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-orderstat.Tpo -c -o gmap_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-orderstat.Tpo $(DEPDIR)/gmap_avx512-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='orderstat.c' object='gmap_avx512-orderstat.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi`
+
+gmap_avx512-oligoindex_hr.o: oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/gmap_avx512-oligoindex_hr.Tpo -c -o gmap_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-oligoindex_hr.Tpo $(DEPDIR)/gmap_avx512-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligoindex_hr.c' object='gmap_avx512-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c
+
+gmap_avx512-oligoindex_hr.obj: oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-oligoindex_hr.Tpo -c -o gmap_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-oligoindex_hr.Tpo $(DEPDIR)/gmap_avx512-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligoindex_hr.c' object='gmap_avx512-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi`
+
+gmap_avx512-intron.o: intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-intron.o -MD -MP -MF $(DEPDIR)/gmap_avx512-intron.Tpo -c -o gmap_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-intron.Tpo $(DEPDIR)/gmap_avx512-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intron.c' object='gmap_avx512-intron.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c
+
+gmap_avx512-intron.obj: intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-intron.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-intron.Tpo -c -o gmap_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-intron.Tpo $(DEPDIR)/gmap_avx512-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intron.c' object='gmap_avx512-intron.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi`
+
+gmap_avx512-maxent.o: maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-maxent.o -MD -MP -MF $(DEPDIR)/gmap_avx512-maxent.Tpo -c -o gmap_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-maxent.Tpo $(DEPDIR)/gmap_avx512-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent.c' object='gmap_avx512-maxent.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
+
+gmap_avx512-maxent.obj: maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-maxent.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-maxent.Tpo -c -o gmap_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-maxent.Tpo $(DEPDIR)/gmap_avx512-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent.c' object='gmap_avx512-maxent.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi`
+
+gmap_avx512-maxent_hr.o: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-maxent_hr.o -MD -MP -MF $(DEPDIR)/gmap_avx512-maxent_hr.Tpo -c -o gmap_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-maxent_hr.Tpo $(DEPDIR)/gmap_avx512-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='gmap_avx512-maxent_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+
+gmap_avx512-maxent_hr.obj: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-maxent_hr.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-maxent_hr.Tpo -c -o gmap_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-maxent_hr.Tpo $(DEPDIR)/gmap_avx512-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='gmap_avx512-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
+
+gmap_avx512-pair.o: pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pair.o -MD -MP -MF $(DEPDIR)/gmap_avx512-pair.Tpo -c -o gmap_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pair.Tpo $(DEPDIR)/gmap_avx512-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pair.c' object='gmap_avx512-pair.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c
+
+gmap_avx512-pair.obj: pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pair.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-pair.Tpo -c -o gmap_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pair.Tpo $(DEPDIR)/gmap_avx512-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pair.c' object='gmap_avx512-pair.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi`
+
+gmap_avx512-pairpool.o: pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pairpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-pairpool.Tpo -c -o gmap_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pairpool.Tpo $(DEPDIR)/gmap_avx512-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pairpool.c' object='gmap_avx512-pairpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c
+
+gmap_avx512-pairpool.obj: pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pairpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-pairpool.Tpo -c -o gmap_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pairpool.Tpo $(DEPDIR)/gmap_avx512-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pairpool.c' object='gmap_avx512-pairpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi`
+
+gmap_avx512-cellpool.o: cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-cellpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-cellpool.Tpo -c -o gmap_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-cellpool.Tpo $(DEPDIR)/gmap_avx512-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cellpool.c' object='gmap_avx512-cellpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c
+
+gmap_avx512-cellpool.obj: cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-cellpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-cellpool.Tpo -c -o gmap_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-cellpool.Tpo $(DEPDIR)/gmap_avx512-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cellpool.c' object='gmap_avx512-cellpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi`
+
+gmap_avx512-stage2.o: stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage2.o -MD -MP -MF $(DEPDIR)/gmap_avx512-stage2.Tpo -c -o gmap_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage2.Tpo $(DEPDIR)/gmap_avx512-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage2.c' object='gmap_avx512-stage2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c
+
+gmap_avx512-stage2.obj: stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage2.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-stage2.Tpo -c -o gmap_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage2.Tpo $(DEPDIR)/gmap_avx512-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage2.c' object='gmap_avx512-stage2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi`
+
+gmap_avx512-doublelist.o: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-doublelist.o -MD -MP -MF $(DEPDIR)/gmap_avx512-doublelist.Tpo -c -o gmap_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-doublelist.Tpo $(DEPDIR)/gmap_avx512-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='gmap_avx512-doublelist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+
+gmap_avx512-doublelist.obj: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-doublelist.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-doublelist.Tpo -c -o gmap_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-doublelist.Tpo $(DEPDIR)/gmap_avx512-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='gmap_avx512-doublelist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi`
+
+gmap_avx512-smooth.o: smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-smooth.o -MD -MP -MF $(DEPDIR)/gmap_avx512-smooth.Tpo -c -o gmap_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-smooth.Tpo $(DEPDIR)/gmap_avx512-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='smooth.c' object='gmap_avx512-smooth.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c
+
+gmap_avx512-smooth.obj: smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-smooth.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-smooth.Tpo -c -o gmap_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-smooth.Tpo $(DEPDIR)/gmap_avx512-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='smooth.c' object='gmap_avx512-smooth.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi`
+
+gmap_avx512-splicestringpool.o: splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicestringpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-splicestringpool.Tpo -c -o gmap_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicestringpool.Tpo $(DEPDIR)/gmap_avx512-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicestringpool.c' object='gmap_avx512-splicestringpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c
+
+gmap_avx512-splicestringpool.obj: splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicestringpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-splicestringpool.Tpo -c -o gmap_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicestringpool.Tpo $(DEPDIR)/gmap_avx512-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicestringpool.c' object='gmap_avx512-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi`
+
+gmap_avx512-splicetrie_build.o: splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicetrie_build.o -MD -MP -MF $(DEPDIR)/gmap_avx512-splicetrie_build.Tpo -c -o gmap_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicetrie_build.Tpo $(DEPDIR)/gmap_avx512-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie_build.c' object='gmap_avx512-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c
+
+gmap_avx512-splicetrie_build.obj: splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-splicetrie_build.Tpo -c -o gmap_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicetrie_build.Tpo $(DEPDIR)/gmap_avx512-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie_build.c' object='gmap_avx512-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi`
+
+gmap_avx512-splicetrie.o: splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicetrie.o -MD -MP -MF $(DEPDIR)/gmap_avx512-splicetrie.Tpo -c -o gmap_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicetrie.Tpo $(DEPDIR)/gmap_avx512-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie.c' object='gmap_avx512-splicetrie.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c
+
+gmap_avx512-splicetrie.obj: splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicetrie.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-splicetrie.Tpo -c -o gmap_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicetrie.Tpo $(DEPDIR)/gmap_avx512-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie.c' object='gmap_avx512-splicetrie.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi`
+
+gmap_avx512-boyer-moore.o: boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-boyer-moore.o -MD -MP -MF $(DEPDIR)/gmap_avx512-boyer-moore.Tpo -c -o gmap_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-boyer-moore.Tpo $(DEPDIR)/gmap_avx512-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='boyer-moore.c' object='gmap_avx512-boyer-moore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c
+
+gmap_avx512-boyer-moore.obj: boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-boyer-moore.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-boyer-moore.Tpo -c -o gmap_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-boyer-moore.Tpo $(DEPDIR)/gmap_avx512-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='boyer-moore.c' object='gmap_avx512-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi`
+
+gmap_avx512-dynprog.o: dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog.Tpo -c -o gmap_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog.Tpo $(DEPDIR)/gmap_avx512-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog.c' object='gmap_avx512-dynprog.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c
+
+gmap_avx512-dynprog.obj: dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog.Tpo -c -o gmap_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog.Tpo $(DEPDIR)/gmap_avx512-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog.c' object='gmap_avx512-dynprog.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi`
+
+gmap_avx512-dynprog_simd.o: dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_simd.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_simd.Tpo -c -o gmap_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_simd.Tpo $(DEPDIR)/gmap_avx512-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_simd.c' object='gmap_avx512-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c
+
+gmap_avx512-dynprog_simd.obj: dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_simd.Tpo -c -o gmap_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_simd.Tpo $(DEPDIR)/gmap_avx512-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_simd.c' object='gmap_avx512-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi`
+
+gmap_avx512-dynprog_single.o: dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_single.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_single.Tpo -c -o gmap_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_single.Tpo $(DEPDIR)/gmap_avx512-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_single.c' object='gmap_avx512-dynprog_single.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c
+
+gmap_avx512-dynprog_single.obj: dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_single.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_single.Tpo -c -o gmap_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_single.Tpo $(DEPDIR)/gmap_avx512-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_single.c' object='gmap_avx512-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi`
+
+gmap_avx512-dynprog_genome.o: dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_genome.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_genome.Tpo -c -o gmap_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_genome.Tpo $(DEPDIR)/gmap_avx512-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_genome.c' object='gmap_avx512-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c
+
+gmap_avx512-dynprog_genome.obj: dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_genome.Tpo -c -o gmap_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_genome.Tpo $(DEPDIR)/gmap_avx512-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_genome.c' object='gmap_avx512-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi`
+
+gmap_avx512-dynprog_cdna.o: dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_cdna.Tpo -c -o gmap_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_cdna.Tpo $(DEPDIR)/gmap_avx512-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_cdna.c' object='gmap_avx512-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c
+
+gmap_avx512-dynprog_cdna.obj: dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_cdna.Tpo -c -o gmap_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_cdna.Tpo $(DEPDIR)/gmap_avx512-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_cdna.c' object='gmap_avx512-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi`
+
+gmap_avx512-dynprog_end.o: dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_end.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_end.Tpo -c -o gmap_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_end.Tpo $(DEPDIR)/gmap_avx512-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_end.c' object='gmap_avx512-dynprog_end.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c
+
+gmap_avx512-dynprog_end.obj: dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_end.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_end.Tpo -c -o gmap_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_end.Tpo $(DEPDIR)/gmap_avx512-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_end.c' object='gmap_avx512-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi`
+
+gmap_avx512-translation.o: translation.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-translation.o -MD -MP -MF $(DEPDIR)/gmap_avx512-translation.Tpo -c -o gmap_avx512-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-translation.Tpo $(DEPDIR)/gmap_avx512-translation.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='translation.c' object='gmap_avx512-translation.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
+
+gmap_avx512-translation.obj: translation.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-translation.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-translation.Tpo -c -o gmap_avx512-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-translation.Tpo $(DEPDIR)/gmap_avx512-translation.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='translation.c' object='gmap_avx512-translation.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
+
+gmap_avx512-pbinom.o: pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pbinom.o -MD -MP -MF $(DEPDIR)/gmap_avx512-pbinom.Tpo -c -o gmap_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pbinom.Tpo $(DEPDIR)/gmap_avx512-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pbinom.c' object='gmap_avx512-pbinom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c
+
+gmap_avx512-pbinom.obj: pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pbinom.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-pbinom.Tpo -c -o gmap_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pbinom.Tpo $(DEPDIR)/gmap_avx512-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pbinom.c' object='gmap_avx512-pbinom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi`
+
+gmap_avx512-changepoint.o: changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-changepoint.o -MD -MP -MF $(DEPDIR)/gmap_avx512-changepoint.Tpo -c -o gmap_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-changepoint.Tpo $(DEPDIR)/gmap_avx512-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='changepoint.c' object='gmap_avx512-changepoint.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c
+
+gmap_avx512-changepoint.obj: changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-changepoint.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-changepoint.Tpo -c -o gmap_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-changepoint.Tpo $(DEPDIR)/gmap_avx512-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='changepoint.c' object='gmap_avx512-changepoint.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi`
+
+gmap_avx512-stage3.o: stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage3.o -MD -MP -MF $(DEPDIR)/gmap_avx512-stage3.Tpo -c -o gmap_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage3.Tpo $(DEPDIR)/gmap_avx512-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3.c' object='gmap_avx512-stage3.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c
+
+gmap_avx512-stage3.obj: stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage3.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-stage3.Tpo -c -o gmap_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage3.Tpo $(DEPDIR)/gmap_avx512-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3.c' object='gmap_avx512-stage3.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi`
+
+gmap_avx512-request.o: request.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-request.o -MD -MP -MF $(DEPDIR)/gmap_avx512-request.Tpo -c -o gmap_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-request.Tpo $(DEPDIR)/gmap_avx512-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='request.c' object='gmap_avx512-request.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c
+
+gmap_avx512-request.obj: request.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-request.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-request.Tpo -c -o gmap_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-request.Tpo $(DEPDIR)/gmap_avx512-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='request.c' object='gmap_avx512-request.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi`
+
+gmap_avx512-result.o: result.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-result.o -MD -MP -MF $(DEPDIR)/gmap_avx512-result.Tpo -c -o gmap_avx512-result.o `test -f 'result.c' || echo '$(srcdir)/'`result.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-result.Tpo $(DEPDIR)/gmap_avx512-result.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='result.c' object='gmap_avx512-result.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-result.o `test -f 'result.c' || echo '$(srcdir)/'`result.c
+
+gmap_avx512-result.obj: result.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-result.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-result.Tpo -c -o gmap_avx512-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-result.Tpo $(DEPDIR)/gmap_avx512-result.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='result.c' object='gmap_avx512-result.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi`
+
+gmap_avx512-output.o: output.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-output.o -MD -MP -MF $(DEPDIR)/gmap_avx512-output.Tpo -c -o gmap_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-output.Tpo $(DEPDIR)/gmap_avx512-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='output.c' object='gmap_avx512-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+
+gmap_avx512-output.obj: output.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-output.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-output.Tpo -c -o gmap_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-output.Tpo $(DEPDIR)/gmap_avx512-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='output.c' object='gmap_avx512-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+
+gmap_avx512-inbuffer.o: inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-inbuffer.o -MD -MP -MF $(DEPDIR)/gmap_avx512-inbuffer.Tpo -c -o gmap_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-inbuffer.Tpo $(DEPDIR)/gmap_avx512-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='inbuffer.c' object='gmap_avx512-inbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
+
+gmap_avx512-inbuffer.obj: inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-inbuffer.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-inbuffer.Tpo -c -o gmap_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-inbuffer.Tpo $(DEPDIR)/gmap_avx512-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='inbuffer.c' object='gmap_avx512-inbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi`
+
+gmap_avx512-samheader.o: samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-samheader.o -MD -MP -MF $(DEPDIR)/gmap_avx512-samheader.Tpo -c -o gmap_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-samheader.Tpo $(DEPDIR)/gmap_avx512-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samheader.c' object='gmap_avx512-samheader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c
+
+gmap_avx512-samheader.obj: samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-samheader.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-samheader.Tpo -c -o gmap_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-samheader.Tpo $(DEPDIR)/gmap_avx512-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samheader.c' object='gmap_avx512-samheader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi`
+
+gmap_avx512-outbuffer.o: outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-outbuffer.o -MD -MP -MF $(DEPDIR)/gmap_avx512-outbuffer.Tpo -c -o gmap_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-outbuffer.Tpo $(DEPDIR)/gmap_avx512-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='outbuffer.c' object='gmap_avx512-outbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c
+
+gmap_avx512-outbuffer.obj: outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-outbuffer.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-outbuffer.Tpo -c -o gmap_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-outbuffer.Tpo $(DEPDIR)/gmap_avx512-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='outbuffer.c' object='gmap_avx512-outbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi`
+
+gmap_avx512-chimera.o: chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chimera.o -MD -MP -MF $(DEPDIR)/gmap_avx512-chimera.Tpo -c -o gmap_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chimera.Tpo $(DEPDIR)/gmap_avx512-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chimera.c' object='gmap_avx512-chimera.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c
+
+gmap_avx512-chimera.obj: chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chimera.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-chimera.Tpo -c -o gmap_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chimera.Tpo $(DEPDIR)/gmap_avx512-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chimera.c' object='gmap_avx512-chimera.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi`
+
+gmap_avx512-datadir.o: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-datadir.o -MD -MP -MF $(DEPDIR)/gmap_avx512-datadir.Tpo -c -o gmap_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-datadir.Tpo $(DEPDIR)/gmap_avx512-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='gmap_avx512-datadir.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+
+gmap_avx512-datadir.obj: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-datadir.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-datadir.Tpo -c -o gmap_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-datadir.Tpo $(DEPDIR)/gmap_avx512-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='gmap_avx512-datadir.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi`
+
+gmap_avx512-parserange.o: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-parserange.o -MD -MP -MF $(DEPDIR)/gmap_avx512-parserange.Tpo -c -o gmap_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-parserange.Tpo $(DEPDIR)/gmap_avx512-parserange.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='gmap_avx512-parserange.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+
+gmap_avx512-parserange.obj: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-parserange.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-parserange.Tpo -c -o gmap_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-parserange.Tpo $(DEPDIR)/gmap_avx512-parserange.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='gmap_avx512-parserange.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi`
+
+gmap_avx512-getopt.o: getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-getopt.o -MD -MP -MF $(DEPDIR)/gmap_avx512-getopt.Tpo -c -o gmap_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-getopt.Tpo $(DEPDIR)/gmap_avx512-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt.c' object='gmap_avx512-getopt.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c
+
+gmap_avx512-getopt.obj: getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-getopt.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-getopt.Tpo -c -o gmap_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-getopt.Tpo $(DEPDIR)/gmap_avx512-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt.c' object='gmap_avx512-getopt.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi`
+
+gmap_avx512-getopt1.o: getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-getopt1.o -MD -MP -MF $(DEPDIR)/gmap_avx512-getopt1.Tpo -c -o gmap_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-getopt1.Tpo $(DEPDIR)/gmap_avx512-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt1.c' object='gmap_avx512-getopt1.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c
+
+gmap_avx512-getopt1.obj: getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-getopt1.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-getopt1.Tpo -c -o gmap_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-getopt1.Tpo $(DEPDIR)/gmap_avx512-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt1.c' object='gmap_avx512-getopt1.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi`
+
+gmap_avx512-gmap.o: gmap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gmap.o -MD -MP -MF $(DEPDIR)/gmap_avx512-gmap.Tpo -c -o gmap_avx512-gmap.o `test -f 'gmap.c' || echo '$(srcdir)/'`gmap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gmap.Tpo $(DEPDIR)/gmap_avx512-gmap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gmap.c' object='gmap_avx512-gmap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gmap.o `test -f 'gmap.c' || echo '$(srcdir)/'`gmap.c
+
+gmap_avx512-gmap.obj: gmap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gmap.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-gmap.Tpo -c -o gmap_avx512-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gmap.Tpo $(DEPDIR)/gmap_avx512-gmap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gmap.c' object='gmap_avx512-gmap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi`
+
 gmap_nosimd-except.o: except.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -MT gmap_nosimd-except.o -MD -MP -MF $(DEPDIR)/gmap_nosimd-except.Tpo -c -o gmap_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_nosimd-except.Tpo $(DEPDIR)/gmap_nosimd-except.Po
@@ -8994,6 +11479,20 @@ gmap_nosimd-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -c -o gmap_nosimd-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmap_nosimd-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -MT gmap_nosimd-merge.o -MD -MP -MF $(DEPDIR)/gmap_nosimd-merge.Tpo -c -o gmap_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_nosimd-merge.Tpo $(DEPDIR)/gmap_nosimd-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_nosimd-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -c -o gmap_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmap_nosimd-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -MT gmap_nosimd-merge.obj -MD -MP -MF $(DEPDIR)/gmap_nosimd-merge.Tpo -c -o gmap_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_nosimd-merge.Tpo $(DEPDIR)/gmap_nosimd-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_nosimd-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -c -o gmap_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmap_nosimd-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -MT gmap_nosimd-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_nosimd-indexdb.Tpo -c -o gmap_nosimd-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_nosimd-indexdb.Tpo $(DEPDIR)/gmap_nosimd-indexdb.Po
@@ -10184,6 +12683,20 @@ gmap_sse2-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -c -o gmap_sse2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmap_sse2-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -MT gmap_sse2-merge.o -MD -MP -MF $(DEPDIR)/gmap_sse2-merge.Tpo -c -o gmap_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse2-merge.Tpo $(DEPDIR)/gmap_sse2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_sse2-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -c -o gmap_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmap_sse2-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -MT gmap_sse2-merge.obj -MD -MP -MF $(DEPDIR)/gmap_sse2-merge.Tpo -c -o gmap_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse2-merge.Tpo $(DEPDIR)/gmap_sse2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_sse2-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -c -o gmap_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmap_sse2-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -MT gmap_sse2-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_sse2-indexdb.Tpo -c -o gmap_sse2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse2-indexdb.Tpo $(DEPDIR)/gmap_sse2-indexdb.Po
@@ -11374,6 +13887,20 @@ gmap_sse41-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -c -o gmap_sse41-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmap_sse41-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -MT gmap_sse41-merge.o -MD -MP -MF $(DEPDIR)/gmap_sse41-merge.Tpo -c -o gmap_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse41-merge.Tpo $(DEPDIR)/gmap_sse41-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_sse41-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -c -o gmap_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmap_sse41-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -MT gmap_sse41-merge.obj -MD -MP -MF $(DEPDIR)/gmap_sse41-merge.Tpo -c -o gmap_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse41-merge.Tpo $(DEPDIR)/gmap_sse41-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_sse41-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -c -o gmap_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmap_sse41-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -MT gmap_sse41-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_sse41-indexdb.Tpo -c -o gmap_sse41-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse41-indexdb.Tpo $(DEPDIR)/gmap_sse41-indexdb.Po
@@ -12564,6 +15091,20 @@ gmap_sse42-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -c -o gmap_sse42-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmap_sse42-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -MT gmap_sse42-merge.o -MD -MP -MF $(DEPDIR)/gmap_sse42-merge.Tpo -c -o gmap_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse42-merge.Tpo $(DEPDIR)/gmap_sse42-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_sse42-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -c -o gmap_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmap_sse42-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -MT gmap_sse42-merge.obj -MD -MP -MF $(DEPDIR)/gmap_sse42-merge.Tpo -c -o gmap_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse42-merge.Tpo $(DEPDIR)/gmap_sse42-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_sse42-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -c -o gmap_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmap_sse42-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -MT gmap_sse42-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_sse42-indexdb.Tpo -c -o gmap_sse42-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse42-indexdb.Tpo $(DEPDIR)/gmap_sse42-indexdb.Po
@@ -13754,6 +16295,20 @@ gmap_ssse3-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -c -o gmap_ssse3-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmap_ssse3-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -MT gmap_ssse3-merge.o -MD -MP -MF $(DEPDIR)/gmap_ssse3-merge.Tpo -c -o gmap_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_ssse3-merge.Tpo $(DEPDIR)/gmap_ssse3-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_ssse3-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -c -o gmap_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmap_ssse3-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -MT gmap_ssse3-merge.obj -MD -MP -MF $(DEPDIR)/gmap_ssse3-merge.Tpo -c -o gmap_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_ssse3-merge.Tpo $(DEPDIR)/gmap_ssse3-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmap_ssse3-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -c -o gmap_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmap_ssse3-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -MT gmap_ssse3-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_ssse3-indexdb.Tpo -c -o gmap_ssse3-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmap_ssse3-indexdb.Tpo $(DEPDIR)/gmap_ssse3-indexdb.Po
@@ -15630,6 +18185,20 @@ gmapl_avx2-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmapl_avx2-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-merge.o -MD -MP -MF $(DEPDIR)/gmapl_avx2-merge.Tpo -c -o gmapl_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-merge.Tpo $(DEPDIR)/gmapl_avx2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_avx2-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmapl_avx2-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_avx2-merge.Tpo -c -o gmapl_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-merge.Tpo $(DEPDIR)/gmapl_avx2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_avx2-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmapl_avx2-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_avx2-indexdb.Tpo -c -o gmapl_avx2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-indexdb.Tpo $(DEPDIR)/gmapl_avx2-indexdb.Po
@@ -16386,6 +18955,1224 @@ gmapl_avx2-gmap.obj: gmap.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi`
 
+gmapl_avx512-except.o: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-except.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-except.Tpo -c -o gmapl_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-except.Tpo $(DEPDIR)/gmapl_avx512-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='gmapl_avx512-except.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
+
+gmapl_avx512-except.obj: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-except.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-except.Tpo -c -o gmapl_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-except.Tpo $(DEPDIR)/gmapl_avx512-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='gmapl_avx512-except.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi`
+
+gmapl_avx512-assert.o: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-assert.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-assert.Tpo -c -o gmapl_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-assert.Tpo $(DEPDIR)/gmapl_avx512-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='gmapl_avx512-assert.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+
+gmapl_avx512-assert.obj: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-assert.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-assert.Tpo -c -o gmapl_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-assert.Tpo $(DEPDIR)/gmapl_avx512-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='gmapl_avx512-assert.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi`
+
+gmapl_avx512-mem.o: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-mem.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-mem.Tpo -c -o gmapl_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-mem.Tpo $(DEPDIR)/gmapl_avx512-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='gmapl_avx512-mem.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+
+gmapl_avx512-mem.obj: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-mem.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-mem.Tpo -c -o gmapl_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-mem.Tpo $(DEPDIR)/gmapl_avx512-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='gmapl_avx512-mem.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi`
+
+gmapl_avx512-intlist.o: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-intlist.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-intlist.Tpo -c -o gmapl_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-intlist.Tpo $(DEPDIR)/gmapl_avx512-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='gmapl_avx512-intlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+
+gmapl_avx512-intlist.obj: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-intlist.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-intlist.Tpo -c -o gmapl_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-intlist.Tpo $(DEPDIR)/gmapl_avx512-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='gmapl_avx512-intlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi`
+
+gmapl_avx512-list.o: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-list.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-list.Tpo -c -o gmapl_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-list.Tpo $(DEPDIR)/gmapl_avx512-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='gmapl_avx512-list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c
+
+gmapl_avx512-list.obj: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-list.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-list.Tpo -c -o gmapl_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-list.Tpo $(DEPDIR)/gmapl_avx512-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='gmapl_avx512-list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi`
+
+gmapl_avx512-littleendian.o: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-littleendian.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-littleendian.Tpo -c -o gmapl_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-littleendian.Tpo $(DEPDIR)/gmapl_avx512-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='gmapl_avx512-littleendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+
+gmapl_avx512-littleendian.obj: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-littleendian.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-littleendian.Tpo -c -o gmapl_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-littleendian.Tpo $(DEPDIR)/gmapl_avx512-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='gmapl_avx512-littleendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi`
+
+gmapl_avx512-bigendian.o: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bigendian.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-bigendian.Tpo -c -o gmapl_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bigendian.Tpo $(DEPDIR)/gmapl_avx512-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='gmapl_avx512-bigendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+
+gmapl_avx512-bigendian.obj: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bigendian.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-bigendian.Tpo -c -o gmapl_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bigendian.Tpo $(DEPDIR)/gmapl_avx512-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='gmapl_avx512-bigendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi`
+
+gmapl_avx512-univinterval.o: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-univinterval.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-univinterval.Tpo -c -o gmapl_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-univinterval.Tpo $(DEPDIR)/gmapl_avx512-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='gmapl_avx512-univinterval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+
+gmapl_avx512-univinterval.obj: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-univinterval.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-univinterval.Tpo -c -o gmapl_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-univinterval.Tpo $(DEPDIR)/gmapl_avx512-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='gmapl_avx512-univinterval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi`
+
+gmapl_avx512-interval.o: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-interval.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-interval.Tpo -c -o gmapl_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-interval.Tpo $(DEPDIR)/gmapl_avx512-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='gmapl_avx512-interval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+
+gmapl_avx512-interval.obj: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-interval.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-interval.Tpo -c -o gmapl_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-interval.Tpo $(DEPDIR)/gmapl_avx512-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='gmapl_avx512-interval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+
+gmapl_avx512-uintlist.o: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uintlist.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-uintlist.Tpo -c -o gmapl_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uintlist.Tpo $(DEPDIR)/gmapl_avx512-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='gmapl_avx512-uintlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+
+gmapl_avx512-uintlist.obj: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uintlist.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-uintlist.Tpo -c -o gmapl_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uintlist.Tpo $(DEPDIR)/gmapl_avx512-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='gmapl_avx512-uintlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+
+gmapl_avx512-uint8list.o: uint8list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-uint8list.Tpo -c -o gmapl_avx512-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uint8list.Tpo $(DEPDIR)/gmapl_avx512-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uint8list.c' object='gmapl_avx512-uint8list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+gmapl_avx512-uint8list.obj: uint8list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-uint8list.Tpo -c -o gmapl_avx512-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uint8list.Tpo $(DEPDIR)/gmapl_avx512-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uint8list.c' object='gmapl_avx512-uint8list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+
+gmapl_avx512-stopwatch.o: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-stopwatch.Tpo -c -o gmapl_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stopwatch.Tpo $(DEPDIR)/gmapl_avx512-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='gmapl_avx512-stopwatch.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+
+gmapl_avx512-stopwatch.obj: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stopwatch.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-stopwatch.Tpo -c -o gmapl_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stopwatch.Tpo $(DEPDIR)/gmapl_avx512-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='gmapl_avx512-stopwatch.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi`
+
+gmapl_avx512-semaphore.o: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-semaphore.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-semaphore.Tpo -c -o gmapl_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-semaphore.Tpo $(DEPDIR)/gmapl_avx512-semaphore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='gmapl_avx512-semaphore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+
+gmapl_avx512-semaphore.obj: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-semaphore.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-semaphore.Tpo -c -o gmapl_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-semaphore.Tpo $(DEPDIR)/gmapl_avx512-semaphore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='gmapl_avx512-semaphore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi`
+
+gmapl_avx512-access.o: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-access.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-access.Tpo -c -o gmapl_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-access.Tpo $(DEPDIR)/gmapl_avx512-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='gmapl_avx512-access.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c
+
+gmapl_avx512-access.obj: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-access.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-access.Tpo -c -o gmapl_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-access.Tpo $(DEPDIR)/gmapl_avx512-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='gmapl_avx512-access.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+
+gmapl_avx512-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-filestring.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-filestring.Tpo -c -o gmapl_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-filestring.Tpo $(DEPDIR)/gmapl_avx512-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='gmapl_avx512-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gmapl_avx512-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-filestring.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-filestring.Tpo -c -o gmapl_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-filestring.Tpo $(DEPDIR)/gmapl_avx512-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='gmapl_avx512-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
+gmapl_avx512-iit-read-univ.o: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-iit-read-univ.Tpo -c -o gmapl_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-iit-read-univ.Tpo $(DEPDIR)/gmapl_avx512-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='gmapl_avx512-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+
+gmapl_avx512-iit-read-univ.obj: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-iit-read-univ.Tpo -c -o gmapl_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-iit-read-univ.Tpo $(DEPDIR)/gmapl_avx512-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='gmapl_avx512-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi`
+
+gmapl_avx512-iit-read.o: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-iit-read.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-iit-read.Tpo -c -o gmapl_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-iit-read.Tpo $(DEPDIR)/gmapl_avx512-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='gmapl_avx512-iit-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+
+gmapl_avx512-iit-read.obj: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-iit-read.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-iit-read.Tpo -c -o gmapl_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-iit-read.Tpo $(DEPDIR)/gmapl_avx512-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='gmapl_avx512-iit-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi`
+
+gmapl_avx512-md5.o: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-md5.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-md5.Tpo -c -o gmapl_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-md5.Tpo $(DEPDIR)/gmapl_avx512-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='gmapl_avx512-md5.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+
+gmapl_avx512-md5.obj: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-md5.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-md5.Tpo -c -o gmapl_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-md5.Tpo $(DEPDIR)/gmapl_avx512-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='gmapl_avx512-md5.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi`
+
+gmapl_avx512-bzip2.o: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bzip2.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-bzip2.Tpo -c -o gmapl_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bzip2.Tpo $(DEPDIR)/gmapl_avx512-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='gmapl_avx512-bzip2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+
+gmapl_avx512-bzip2.obj: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bzip2.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-bzip2.Tpo -c -o gmapl_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bzip2.Tpo $(DEPDIR)/gmapl_avx512-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='gmapl_avx512-bzip2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi`
+
+gmapl_avx512-sequence.o: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-sequence.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-sequence.Tpo -c -o gmapl_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-sequence.Tpo $(DEPDIR)/gmapl_avx512-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='gmapl_avx512-sequence.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+
+gmapl_avx512-sequence.obj: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-sequence.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-sequence.Tpo -c -o gmapl_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-sequence.Tpo $(DEPDIR)/gmapl_avx512-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='gmapl_avx512-sequence.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi`
+
+gmapl_avx512-reader.o: reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-reader.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-reader.Tpo -c -o gmapl_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-reader.Tpo $(DEPDIR)/gmapl_avx512-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='reader.c' object='gmapl_avx512-reader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c
+
+gmapl_avx512-reader.obj: reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-reader.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-reader.Tpo -c -o gmapl_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-reader.Tpo $(DEPDIR)/gmapl_avx512-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='reader.c' object='gmapl_avx512-reader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi`
+
+gmapl_avx512-genomicpos.o: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genomicpos.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genomicpos.Tpo -c -o gmapl_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genomicpos.Tpo $(DEPDIR)/gmapl_avx512-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='gmapl_avx512-genomicpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+
+gmapl_avx512-genomicpos.obj: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genomicpos.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genomicpos.Tpo -c -o gmapl_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genomicpos.Tpo $(DEPDIR)/gmapl_avx512-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='gmapl_avx512-genomicpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi`
+
+gmapl_avx512-compress.o: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-compress.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-compress.Tpo -c -o gmapl_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-compress.Tpo $(DEPDIR)/gmapl_avx512-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='gmapl_avx512-compress.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+
+gmapl_avx512-compress.obj: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-compress.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-compress.Tpo -c -o gmapl_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-compress.Tpo $(DEPDIR)/gmapl_avx512-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='gmapl_avx512-compress.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi`
+
+gmapl_avx512-compress-write.o: compress-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-compress-write.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-compress-write.Tpo -c -o gmapl_avx512-compress-write.o `test -f 'compress-write.c' || echo '$(srcdir)/'`compress-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-compress-write.Tpo $(DEPDIR)/gmapl_avx512-compress-write.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress-write.c' object='gmapl_avx512-compress-write.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-compress-write.o `test -f 'compress-write.c' || echo '$(srcdir)/'`compress-write.c
+
+gmapl_avx512-compress-write.obj: compress-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-compress-write.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-compress-write.Tpo -c -o gmapl_avx512-compress-write.obj `if test -f 'compress-write.c'; then $(CYGPATH_W) 'compress-write.c'; else $(CYGPATH_W) '$(srcdir)/compress-write.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-compress-write.Tpo $(DEPDIR)/gmapl_avx512-compress-write.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress-write.c' object='gmapl_avx512-compress-write.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-compress-write.obj `if test -f 'compress-write.c'; then $(CYGPATH_W) 'compress-write.c'; else $(CYGPATH_W) '$(srcdir)/compress-write.c'; fi`
+
+gmapl_avx512-gbuffer.o: gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gbuffer.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-gbuffer.Tpo -c -o gmapl_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gbuffer.Tpo $(DEPDIR)/gmapl_avx512-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gbuffer.c' object='gmapl_avx512-gbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c
+
+gmapl_avx512-gbuffer.obj: gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gbuffer.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-gbuffer.Tpo -c -o gmapl_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gbuffer.Tpo $(DEPDIR)/gmapl_avx512-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gbuffer.c' object='gmapl_avx512-gbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
+
+gmapl_avx512-genome.o: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome.Tpo -c -o gmapl_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome.Tpo $(DEPDIR)/gmapl_avx512-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='gmapl_avx512-genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+
+gmapl_avx512-genome.obj: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome.Tpo -c -o gmapl_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome.Tpo $(DEPDIR)/gmapl_avx512-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='gmapl_avx512-genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi`
+
+gmapl_avx512-popcount.o: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-popcount.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-popcount.Tpo -c -o gmapl_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-popcount.Tpo $(DEPDIR)/gmapl_avx512-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='gmapl_avx512-popcount.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+
+gmapl_avx512-popcount.obj: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-popcount.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-popcount.Tpo -c -o gmapl_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-popcount.Tpo $(DEPDIR)/gmapl_avx512-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='gmapl_avx512-popcount.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi`
+
+gmapl_avx512-genome128_hr.o: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome128_hr.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome128_hr.Tpo -c -o gmapl_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome128_hr.Tpo $(DEPDIR)/gmapl_avx512-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='gmapl_avx512-genome128_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+
+gmapl_avx512-genome128_hr.obj: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome128_hr.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome128_hr.Tpo -c -o gmapl_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome128_hr.Tpo $(DEPDIR)/gmapl_avx512-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='gmapl_avx512-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi`
+
+gmapl_avx512-genome_sites.o: genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome_sites.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome_sites.Tpo -c -o gmapl_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome_sites.Tpo $(DEPDIR)/gmapl_avx512-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome_sites.c' object='gmapl_avx512-genome_sites.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c
+
+gmapl_avx512-genome_sites.obj: genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome_sites.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome_sites.Tpo -c -o gmapl_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome_sites.Tpo $(DEPDIR)/gmapl_avx512-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome_sites.c' object='gmapl_avx512-genome_sites.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi`
+
+gmapl_avx512-genome-write.o: genome-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome-write.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome-write.Tpo -c -o gmapl_avx512-genome-write.o `test -f 'genome-write.c' || echo '$(srcdir)/'`genome-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome-write.Tpo $(DEPDIR)/gmapl_avx512-genome-write.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome-write.c' object='gmapl_avx512-genome-write.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome-write.o `test -f 'genome-write.c' || echo '$(srcdir)/'`genome-write.c
+
+gmapl_avx512-genome-write.obj: genome-write.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome-write.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome-write.Tpo -c -o gmapl_avx512-genome-write.obj `if test -f 'genome-write.c'; then $(CYGPATH_W) 'genome-write.c'; else $(CYGPATH_W) '$(srcdir)/genome-write.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome-write.Tpo $(DEPDIR)/gmapl_avx512-genome-write.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome-write.c' object='gmapl_avx512-genome-write.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome-write.obj `if test -f 'genome-write.c'; then $(CYGPATH_W) 'genome-write.c'; else $(CYGPATH_W) '$(srcdir)/genome-write.c'; fi`
+
+gmapl_avx512-bitpack64-read.o: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bitpack64-read.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-bitpack64-read.Tpo -c -o gmapl_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bitpack64-read.Tpo $(DEPDIR)/gmapl_avx512-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='gmapl_avx512-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+
+gmapl_avx512-bitpack64-read.obj: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-bitpack64-read.Tpo -c -o gmapl_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bitpack64-read.Tpo $(DEPDIR)/gmapl_avx512-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='gmapl_avx512-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi`
+
+gmapl_avx512-bitpack64-readtwo.o: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Tpo -c -o gmapl_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='gmapl_avx512-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+
+gmapl_avx512-bitpack64-readtwo.obj: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Tpo -c -o gmapl_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='gmapl_avx512-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
+
+gmapl_avx512-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-merge.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-merge.Tpo -c -o gmapl_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-merge.Tpo $(DEPDIR)/gmapl_avx512-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_avx512-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmapl_avx512-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-merge.Tpo -c -o gmapl_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-merge.Tpo $(DEPDIR)/gmapl_avx512-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_avx512-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
+gmapl_avx512-indexdb.o: indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-indexdb.Tpo -c -o gmapl_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-indexdb.Tpo $(DEPDIR)/gmapl_avx512-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb.c' object='gmapl_avx512-indexdb.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
+
+gmapl_avx512-indexdb.obj: indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-indexdb.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-indexdb.Tpo -c -o gmapl_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-indexdb.Tpo $(DEPDIR)/gmapl_avx512-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb.c' object='gmapl_avx512-indexdb.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi`
+
+gmapl_avx512-indexdb_hr.o: indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-indexdb_hr.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-indexdb_hr.Tpo -c -o gmapl_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-indexdb_hr.Tpo $(DEPDIR)/gmapl_avx512-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb_hr.c' object='gmapl_avx512-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c
+
+gmapl_avx512-indexdb_hr.obj: indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-indexdb_hr.Tpo -c -o gmapl_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-indexdb_hr.Tpo $(DEPDIR)/gmapl_avx512-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb_hr.c' object='gmapl_avx512-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi`
+
+gmapl_avx512-oligo.o: oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-oligo.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-oligo.Tpo -c -o gmapl_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-oligo.Tpo $(DEPDIR)/gmapl_avx512-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligo.c' object='gmapl_avx512-oligo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c
+
+gmapl_avx512-oligo.obj: oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-oligo.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-oligo.Tpo -c -o gmapl_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-oligo.Tpo $(DEPDIR)/gmapl_avx512-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligo.c' object='gmapl_avx512-oligo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi`
+
+gmapl_avx512-block.o: block.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-block.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-block.Tpo -c -o gmapl_avx512-block.o `test -f 'block.c' || echo '$(srcdir)/'`block.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-block.Tpo $(DEPDIR)/gmapl_avx512-block.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='block.c' object='gmapl_avx512-block.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-block.o `test -f 'block.c' || echo '$(srcdir)/'`block.c
+
+gmapl_avx512-block.obj: block.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-block.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-block.Tpo -c -o gmapl_avx512-block.obj `if test -f 'block.c'; then $(CYGPATH_W) 'block.c'; else $(CYGPATH_W) '$(srcdir)/block.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-block.Tpo $(DEPDIR)/gmapl_avx512-block.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='block.c' object='gmapl_avx512-block.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-block.obj `if test -f 'block.c'; then $(CYGPATH_W) 'block.c'; else $(CYGPATH_W) '$(srcdir)/block.c'; fi`
+
+gmapl_avx512-chrom.o: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chrom.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-chrom.Tpo -c -o gmapl_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chrom.Tpo $(DEPDIR)/gmapl_avx512-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='gmapl_avx512-chrom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+
+gmapl_avx512-chrom.obj: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chrom.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-chrom.Tpo -c -o gmapl_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chrom.Tpo $(DEPDIR)/gmapl_avx512-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='gmapl_avx512-chrom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi`
+
+gmapl_avx512-segmentpos.o: segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-segmentpos.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-segmentpos.Tpo -c -o gmapl_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-segmentpos.Tpo $(DEPDIR)/gmapl_avx512-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='segmentpos.c' object='gmapl_avx512-segmentpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c
+
+gmapl_avx512-segmentpos.obj: segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-segmentpos.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-segmentpos.Tpo -c -o gmapl_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-segmentpos.Tpo $(DEPDIR)/gmapl_avx512-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='segmentpos.c' object='gmapl_avx512-segmentpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi`
+
+gmapl_avx512-chrnum.o: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chrnum.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-chrnum.Tpo -c -o gmapl_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chrnum.Tpo $(DEPDIR)/gmapl_avx512-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='gmapl_avx512-chrnum.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+
+gmapl_avx512-chrnum.obj: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chrnum.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-chrnum.Tpo -c -o gmapl_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chrnum.Tpo $(DEPDIR)/gmapl_avx512-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='gmapl_avx512-chrnum.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi`
+
+gmapl_avx512-uinttable.o: uinttable.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uinttable.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-uinttable.Tpo -c -o gmapl_avx512-uinttable.o `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uinttable.Tpo $(DEPDIR)/gmapl_avx512-uinttable.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uinttable.c' object='gmapl_avx512-uinttable.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uinttable.o `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c
+
+gmapl_avx512-uinttable.obj: uinttable.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uinttable.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-uinttable.Tpo -c -o gmapl_avx512-uinttable.obj `if test -f 'uinttable.c'; then $(CYGPATH_W) 'uinttable.c'; else $(CYGPATH_W) '$(srcdir)/uinttable.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uinttable.Tpo $(DEPDIR)/gmapl_avx512-uinttable.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uinttable.c' object='gmapl_avx512-uinttable.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uinttable.obj `if test -f 'uinttable.c'; then $(CYGPATH_W) 'uinttable.c'; else $(CYGPATH_W) '$(srcdir)/uinttable.c'; fi`
+
+gmapl_avx512-gregion.o: gregion.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gregion.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-gregion.Tpo -c -o gmapl_avx512-gregion.o `test -f 'gregion.c' || echo '$(srcdir)/'`gregion.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gregion.Tpo $(DEPDIR)/gmapl_avx512-gregion.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gregion.c' object='gmapl_avx512-gregion.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gregion.o `test -f 'gregion.c' || echo '$(srcdir)/'`gregion.c
+
+gmapl_avx512-gregion.obj: gregion.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gregion.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-gregion.Tpo -c -o gmapl_avx512-gregion.obj `if test -f 'gregion.c'; then $(CYGPATH_W) 'gregion.c'; else $(CYGPATH_W) '$(srcdir)/gregion.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gregion.Tpo $(DEPDIR)/gmapl_avx512-gregion.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gregion.c' object='gmapl_avx512-gregion.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gregion.obj `if test -f 'gregion.c'; then $(CYGPATH_W) 'gregion.c'; else $(CYGPATH_W) '$(srcdir)/gregion.c'; fi`
+
+gmapl_avx512-match.o: match.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-match.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-match.Tpo -c -o gmapl_avx512-match.o `test -f 'match.c' || echo '$(srcdir)/'`match.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-match.Tpo $(DEPDIR)/gmapl_avx512-match.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='match.c' object='gmapl_avx512-match.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-match.o `test -f 'match.c' || echo '$(srcdir)/'`match.c
+
+gmapl_avx512-match.obj: match.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-match.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-match.Tpo -c -o gmapl_avx512-match.obj `if test -f 'match.c'; then $(CYGPATH_W) 'match.c'; else $(CYGPATH_W) '$(srcdir)/match.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-match.Tpo $(DEPDIR)/gmapl_avx512-match.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='match.c' object='gmapl_avx512-match.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-match.obj `if test -f 'match.c'; then $(CYGPATH_W) 'match.c'; else $(CYGPATH_W) '$(srcdir)/match.c'; fi`
+
+gmapl_avx512-matchpool.o: matchpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-matchpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-matchpool.Tpo -c -o gmapl_avx512-matchpool.o `test -f 'matchpool.c' || echo '$(srcdir)/'`matchpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-matchpool.Tpo $(DEPDIR)/gmapl_avx512-matchpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='matchpool.c' object='gmapl_avx512-matchpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-matchpool.o `test -f 'matchpool.c' || echo '$(srcdir)/'`matchpool.c
+
+gmapl_avx512-matchpool.obj: matchpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-matchpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-matchpool.Tpo -c -o gmapl_avx512-matchpool.obj `if test -f 'matchpool.c'; then $(CYGPATH_W) 'matchpool.c'; else $(CYGPATH_W) '$(srcdir)/matchpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-matchpool.Tpo $(DEPDIR)/gmapl_avx512-matchpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='matchpool.c' object='gmapl_avx512-matchpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-matchpool.obj `if test -f 'matchpool.c'; then $(CYGPATH_W) 'matchpool.c'; else $(CYGPATH_W) '$(srcdir)/matchpool.c'; fi`
+
+gmapl_avx512-diagnostic.o: diagnostic.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diagnostic.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-diagnostic.Tpo -c -o gmapl_avx512-diagnostic.o `test -f 'diagnostic.c' || echo '$(srcdir)/'`diagnostic.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diagnostic.Tpo $(DEPDIR)/gmapl_avx512-diagnostic.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagnostic.c' object='gmapl_avx512-diagnostic.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diagnostic.o `test -f 'diagnostic.c' || echo '$(srcdir)/'`diagnostic.c
+
+gmapl_avx512-diagnostic.obj: diagnostic.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diagnostic.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-diagnostic.Tpo -c -o gmapl_avx512-diagnostic.obj `if test -f 'diagnostic.c'; then $(CYGPATH_W) 'diagnostic.c'; else $(CYGPATH_W) '$(srcdir)/diagnostic.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diagnostic.Tpo $(DEPDIR)/gmapl_avx512-diagnostic.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagnostic.c' object='gmapl_avx512-diagnostic.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diagnostic.obj `if test -f 'diagnostic.c'; then $(CYGPATH_W) 'diagnostic.c'; else $(CYGPATH_W) '$(srcdir)/diagnostic.c'; fi`
+
+gmapl_avx512-stage1.o: stage1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage1.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage1.Tpo -c -o gmapl_avx512-stage1.o `test -f 'stage1.c' || echo '$(srcdir)/'`stage1.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage1.Tpo $(DEPDIR)/gmapl_avx512-stage1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage1.c' object='gmapl_avx512-stage1.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage1.o `test -f 'stage1.c' || echo '$(srcdir)/'`stage1.c
+
+gmapl_avx512-stage1.obj: stage1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage1.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage1.Tpo -c -o gmapl_avx512-stage1.obj `if test -f 'stage1.c'; then $(CYGPATH_W) 'stage1.c'; else $(CYGPATH_W) '$(srcdir)/stage1.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage1.Tpo $(DEPDIR)/gmapl_avx512-stage1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage1.c' object='gmapl_avx512-stage1.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage1.obj `if test -f 'stage1.c'; then $(CYGPATH_W) 'stage1.c'; else $(CYGPATH_W) '$(srcdir)/stage1.c'; fi`
+
+gmapl_avx512-diag.o: diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diag.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-diag.Tpo -c -o gmapl_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diag.Tpo $(DEPDIR)/gmapl_avx512-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diag.c' object='gmapl_avx512-diag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c
+
+gmapl_avx512-diag.obj: diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diag.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-diag.Tpo -c -o gmapl_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diag.Tpo $(DEPDIR)/gmapl_avx512-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diag.c' object='gmapl_avx512-diag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi`
+
+gmapl_avx512-diagpool.o: diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diagpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-diagpool.Tpo -c -o gmapl_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diagpool.Tpo $(DEPDIR)/gmapl_avx512-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagpool.c' object='gmapl_avx512-diagpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c
+
+gmapl_avx512-diagpool.obj: diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diagpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-diagpool.Tpo -c -o gmapl_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diagpool.Tpo $(DEPDIR)/gmapl_avx512-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagpool.c' object='gmapl_avx512-diagpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi`
+
+gmapl_avx512-cmet.o: cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-cmet.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-cmet.Tpo -c -o gmapl_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-cmet.Tpo $(DEPDIR)/gmapl_avx512-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cmet.c' object='gmapl_avx512-cmet.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c
+
+gmapl_avx512-cmet.obj: cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-cmet.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-cmet.Tpo -c -o gmapl_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-cmet.Tpo $(DEPDIR)/gmapl_avx512-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cmet.c' object='gmapl_avx512-cmet.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi`
+
+gmapl_avx512-atoi.o: atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-atoi.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-atoi.Tpo -c -o gmapl_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-atoi.Tpo $(DEPDIR)/gmapl_avx512-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='atoi.c' object='gmapl_avx512-atoi.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c
+
+gmapl_avx512-atoi.obj: atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-atoi.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-atoi.Tpo -c -o gmapl_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-atoi.Tpo $(DEPDIR)/gmapl_avx512-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='atoi.c' object='gmapl_avx512-atoi.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
+
+gmapl_avx512-orderstat.o: orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-orderstat.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-orderstat.Tpo -c -o gmapl_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-orderstat.Tpo $(DEPDIR)/gmapl_avx512-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='orderstat.c' object='gmapl_avx512-orderstat.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c
+
+gmapl_avx512-orderstat.obj: orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-orderstat.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-orderstat.Tpo -c -o gmapl_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-orderstat.Tpo $(DEPDIR)/gmapl_avx512-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='orderstat.c' object='gmapl_avx512-orderstat.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi`
+
+gmapl_avx512-oligoindex_hr.o: oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-oligoindex_hr.Tpo -c -o gmapl_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-oligoindex_hr.Tpo $(DEPDIR)/gmapl_avx512-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligoindex_hr.c' object='gmapl_avx512-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c
+
+gmapl_avx512-oligoindex_hr.obj: oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-oligoindex_hr.Tpo -c -o gmapl_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-oligoindex_hr.Tpo $(DEPDIR)/gmapl_avx512-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligoindex_hr.c' object='gmapl_avx512-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi`
+
+gmapl_avx512-intron.o: intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-intron.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-intron.Tpo -c -o gmapl_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-intron.Tpo $(DEPDIR)/gmapl_avx512-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intron.c' object='gmapl_avx512-intron.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c
+
+gmapl_avx512-intron.obj: intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-intron.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-intron.Tpo -c -o gmapl_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-intron.Tpo $(DEPDIR)/gmapl_avx512-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intron.c' object='gmapl_avx512-intron.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi`
+
+gmapl_avx512-maxent.o: maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-maxent.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-maxent.Tpo -c -o gmapl_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-maxent.Tpo $(DEPDIR)/gmapl_avx512-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent.c' object='gmapl_avx512-maxent.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
+
+gmapl_avx512-maxent.obj: maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-maxent.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-maxent.Tpo -c -o gmapl_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-maxent.Tpo $(DEPDIR)/gmapl_avx512-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent.c' object='gmapl_avx512-maxent.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi`
+
+gmapl_avx512-maxent_hr.o: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-maxent_hr.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-maxent_hr.Tpo -c -o gmapl_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-maxent_hr.Tpo $(DEPDIR)/gmapl_avx512-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='gmapl_avx512-maxent_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+
+gmapl_avx512-maxent_hr.obj: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-maxent_hr.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-maxent_hr.Tpo -c -o gmapl_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-maxent_hr.Tpo $(DEPDIR)/gmapl_avx512-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='gmapl_avx512-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
+
+gmapl_avx512-pair.o: pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pair.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-pair.Tpo -c -o gmapl_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pair.Tpo $(DEPDIR)/gmapl_avx512-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pair.c' object='gmapl_avx512-pair.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c
+
+gmapl_avx512-pair.obj: pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pair.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-pair.Tpo -c -o gmapl_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pair.Tpo $(DEPDIR)/gmapl_avx512-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pair.c' object='gmapl_avx512-pair.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi`
+
+gmapl_avx512-pairpool.o: pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pairpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-pairpool.Tpo -c -o gmapl_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pairpool.Tpo $(DEPDIR)/gmapl_avx512-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pairpool.c' object='gmapl_avx512-pairpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c
+
+gmapl_avx512-pairpool.obj: pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pairpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-pairpool.Tpo -c -o gmapl_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pairpool.Tpo $(DEPDIR)/gmapl_avx512-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pairpool.c' object='gmapl_avx512-pairpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi`
+
+gmapl_avx512-cellpool.o: cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-cellpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-cellpool.Tpo -c -o gmapl_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-cellpool.Tpo $(DEPDIR)/gmapl_avx512-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cellpool.c' object='gmapl_avx512-cellpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c
+
+gmapl_avx512-cellpool.obj: cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-cellpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-cellpool.Tpo -c -o gmapl_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-cellpool.Tpo $(DEPDIR)/gmapl_avx512-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cellpool.c' object='gmapl_avx512-cellpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi`
+
+gmapl_avx512-stage2.o: stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage2.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage2.Tpo -c -o gmapl_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage2.Tpo $(DEPDIR)/gmapl_avx512-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage2.c' object='gmapl_avx512-stage2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c
+
+gmapl_avx512-stage2.obj: stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage2.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage2.Tpo -c -o gmapl_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage2.Tpo $(DEPDIR)/gmapl_avx512-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage2.c' object='gmapl_avx512-stage2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi`
+
+gmapl_avx512-doublelist.o: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-doublelist.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-doublelist.Tpo -c -o gmapl_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-doublelist.Tpo $(DEPDIR)/gmapl_avx512-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='gmapl_avx512-doublelist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+
+gmapl_avx512-doublelist.obj: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-doublelist.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-doublelist.Tpo -c -o gmapl_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-doublelist.Tpo $(DEPDIR)/gmapl_avx512-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='gmapl_avx512-doublelist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi`
+
+gmapl_avx512-smooth.o: smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-smooth.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-smooth.Tpo -c -o gmapl_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-smooth.Tpo $(DEPDIR)/gmapl_avx512-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='smooth.c' object='gmapl_avx512-smooth.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c
+
+gmapl_avx512-smooth.obj: smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-smooth.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-smooth.Tpo -c -o gmapl_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-smooth.Tpo $(DEPDIR)/gmapl_avx512-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='smooth.c' object='gmapl_avx512-smooth.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi`
+
+gmapl_avx512-splicestringpool.o: splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicestringpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicestringpool.Tpo -c -o gmapl_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicestringpool.Tpo $(DEPDIR)/gmapl_avx512-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicestringpool.c' object='gmapl_avx512-splicestringpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c
+
+gmapl_avx512-splicestringpool.obj: splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicestringpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicestringpool.Tpo -c -o gmapl_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicestringpool.Tpo $(DEPDIR)/gmapl_avx512-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicestringpool.c' object='gmapl_avx512-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi`
+
+gmapl_avx512-splicetrie_build.o: splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicetrie_build.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicetrie_build.Tpo -c -o gmapl_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicetrie_build.Tpo $(DEPDIR)/gmapl_avx512-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie_build.c' object='gmapl_avx512-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c
+
+gmapl_avx512-splicetrie_build.obj: splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicetrie_build.Tpo -c -o gmapl_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicetrie_build.Tpo $(DEPDIR)/gmapl_avx512-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie_build.c' object='gmapl_avx512-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi`
+
+gmapl_avx512-splicetrie.o: splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicetrie.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicetrie.Tpo -c -o gmapl_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicetrie.Tpo $(DEPDIR)/gmapl_avx512-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie.c' object='gmapl_avx512-splicetrie.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c
+
+gmapl_avx512-splicetrie.obj: splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicetrie.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicetrie.Tpo -c -o gmapl_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicetrie.Tpo $(DEPDIR)/gmapl_avx512-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie.c' object='gmapl_avx512-splicetrie.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi`
+
+gmapl_avx512-boyer-moore.o: boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-boyer-moore.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-boyer-moore.Tpo -c -o gmapl_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-boyer-moore.Tpo $(DEPDIR)/gmapl_avx512-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='boyer-moore.c' object='gmapl_avx512-boyer-moore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c
+
+gmapl_avx512-boyer-moore.obj: boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-boyer-moore.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-boyer-moore.Tpo -c -o gmapl_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-boyer-moore.Tpo $(DEPDIR)/gmapl_avx512-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='boyer-moore.c' object='gmapl_avx512-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi`
+
+gmapl_avx512-dynprog.o: dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog.Tpo -c -o gmapl_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog.Tpo $(DEPDIR)/gmapl_avx512-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog.c' object='gmapl_avx512-dynprog.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c
+
+gmapl_avx512-dynprog.obj: dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog.Tpo -c -o gmapl_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog.Tpo $(DEPDIR)/gmapl_avx512-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog.c' object='gmapl_avx512-dynprog.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi`
+
+gmapl_avx512-dynprog_simd.o: dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_simd.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_simd.Tpo -c -o gmapl_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_simd.Tpo $(DEPDIR)/gmapl_avx512-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_simd.c' object='gmapl_avx512-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c
+
+gmapl_avx512-dynprog_simd.obj: dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_simd.Tpo -c -o gmapl_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_simd.Tpo $(DEPDIR)/gmapl_avx512-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_simd.c' object='gmapl_avx512-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi`
+
+gmapl_avx512-dynprog_single.o: dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_single.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_single.Tpo -c -o gmapl_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_single.Tpo $(DEPDIR)/gmapl_avx512-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_single.c' object='gmapl_avx512-dynprog_single.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c
+
+gmapl_avx512-dynprog_single.obj: dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_single.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_single.Tpo -c -o gmapl_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_single.Tpo $(DEPDIR)/gmapl_avx512-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_single.c' object='gmapl_avx512-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi`
+
+gmapl_avx512-dynprog_genome.o: dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_genome.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_genome.Tpo -c -o gmapl_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_genome.Tpo $(DEPDIR)/gmapl_avx512-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_genome.c' object='gmapl_avx512-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c
+
+gmapl_avx512-dynprog_genome.obj: dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_genome.Tpo -c -o gmapl_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_genome.Tpo $(DEPDIR)/gmapl_avx512-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_genome.c' object='gmapl_avx512-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi`
+
+gmapl_avx512-dynprog_cdna.o: dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_cdna.Tpo -c -o gmapl_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_cdna.Tpo $(DEPDIR)/gmapl_avx512-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_cdna.c' object='gmapl_avx512-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c
+
+gmapl_avx512-dynprog_cdna.obj: dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_cdna.Tpo -c -o gmapl_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_cdna.Tpo $(DEPDIR)/gmapl_avx512-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_cdna.c' object='gmapl_avx512-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi`
+
+gmapl_avx512-dynprog_end.o: dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_end.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_end.Tpo -c -o gmapl_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_end.Tpo $(DEPDIR)/gmapl_avx512-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_end.c' object='gmapl_avx512-dynprog_end.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c
+
+gmapl_avx512-dynprog_end.obj: dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_end.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_end.Tpo -c -o gmapl_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_end.Tpo $(DEPDIR)/gmapl_avx512-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_end.c' object='gmapl_avx512-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi`
+
+gmapl_avx512-translation.o: translation.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-translation.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-translation.Tpo -c -o gmapl_avx512-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-translation.Tpo $(DEPDIR)/gmapl_avx512-translation.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='translation.c' object='gmapl_avx512-translation.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c
+
+gmapl_avx512-translation.obj: translation.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-translation.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-translation.Tpo -c -o gmapl_avx512-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-translation.Tpo $(DEPDIR)/gmapl_avx512-translation.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='translation.c' object='gmapl_avx512-translation.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi`
+
+gmapl_avx512-pbinom.o: pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pbinom.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-pbinom.Tpo -c -o gmapl_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pbinom.Tpo $(DEPDIR)/gmapl_avx512-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pbinom.c' object='gmapl_avx512-pbinom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c
+
+gmapl_avx512-pbinom.obj: pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pbinom.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-pbinom.Tpo -c -o gmapl_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pbinom.Tpo $(DEPDIR)/gmapl_avx512-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pbinom.c' object='gmapl_avx512-pbinom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi`
+
+gmapl_avx512-changepoint.o: changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-changepoint.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-changepoint.Tpo -c -o gmapl_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-changepoint.Tpo $(DEPDIR)/gmapl_avx512-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='changepoint.c' object='gmapl_avx512-changepoint.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c
+
+gmapl_avx512-changepoint.obj: changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-changepoint.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-changepoint.Tpo -c -o gmapl_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-changepoint.Tpo $(DEPDIR)/gmapl_avx512-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='changepoint.c' object='gmapl_avx512-changepoint.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi`
+
+gmapl_avx512-stage3.o: stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage3.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage3.Tpo -c -o gmapl_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage3.Tpo $(DEPDIR)/gmapl_avx512-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3.c' object='gmapl_avx512-stage3.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c
+
+gmapl_avx512-stage3.obj: stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage3.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage3.Tpo -c -o gmapl_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage3.Tpo $(DEPDIR)/gmapl_avx512-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3.c' object='gmapl_avx512-stage3.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi`
+
+gmapl_avx512-request.o: request.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-request.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-request.Tpo -c -o gmapl_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-request.Tpo $(DEPDIR)/gmapl_avx512-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='request.c' object='gmapl_avx512-request.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c
+
+gmapl_avx512-request.obj: request.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-request.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-request.Tpo -c -o gmapl_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-request.Tpo $(DEPDIR)/gmapl_avx512-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='request.c' object='gmapl_avx512-request.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi`
+
+gmapl_avx512-result.o: result.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-result.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-result.Tpo -c -o gmapl_avx512-result.o `test -f 'result.c' || echo '$(srcdir)/'`result.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-result.Tpo $(DEPDIR)/gmapl_avx512-result.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='result.c' object='gmapl_avx512-result.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-result.o `test -f 'result.c' || echo '$(srcdir)/'`result.c
+
+gmapl_avx512-result.obj: result.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-result.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-result.Tpo -c -o gmapl_avx512-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-result.Tpo $(DEPDIR)/gmapl_avx512-result.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='result.c' object='gmapl_avx512-result.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi`
+
+gmapl_avx512-output.o: output.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-output.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-output.Tpo -c -o gmapl_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-output.Tpo $(DEPDIR)/gmapl_avx512-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='output.c' object='gmapl_avx512-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+
+gmapl_avx512-output.obj: output.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-output.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-output.Tpo -c -o gmapl_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-output.Tpo $(DEPDIR)/gmapl_avx512-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='output.c' object='gmapl_avx512-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+
+gmapl_avx512-inbuffer.o: inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-inbuffer.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-inbuffer.Tpo -c -o gmapl_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-inbuffer.Tpo $(DEPDIR)/gmapl_avx512-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='inbuffer.c' object='gmapl_avx512-inbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
+
+gmapl_avx512-inbuffer.obj: inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-inbuffer.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-inbuffer.Tpo -c -o gmapl_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-inbuffer.Tpo $(DEPDIR)/gmapl_avx512-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='inbuffer.c' object='gmapl_avx512-inbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi`
+
+gmapl_avx512-samheader.o: samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-samheader.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-samheader.Tpo -c -o gmapl_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-samheader.Tpo $(DEPDIR)/gmapl_avx512-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samheader.c' object='gmapl_avx512-samheader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c
+
+gmapl_avx512-samheader.obj: samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-samheader.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-samheader.Tpo -c -o gmapl_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-samheader.Tpo $(DEPDIR)/gmapl_avx512-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samheader.c' object='gmapl_avx512-samheader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi`
+
+gmapl_avx512-outbuffer.o: outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-outbuffer.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-outbuffer.Tpo -c -o gmapl_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-outbuffer.Tpo $(DEPDIR)/gmapl_avx512-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='outbuffer.c' object='gmapl_avx512-outbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c
+
+gmapl_avx512-outbuffer.obj: outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-outbuffer.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-outbuffer.Tpo -c -o gmapl_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-outbuffer.Tpo $(DEPDIR)/gmapl_avx512-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='outbuffer.c' object='gmapl_avx512-outbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi`
+
+gmapl_avx512-chimera.o: chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chimera.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-chimera.Tpo -c -o gmapl_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chimera.Tpo $(DEPDIR)/gmapl_avx512-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chimera.c' object='gmapl_avx512-chimera.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c
+
+gmapl_avx512-chimera.obj: chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chimera.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-chimera.Tpo -c -o gmapl_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chimera.Tpo $(DEPDIR)/gmapl_avx512-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chimera.c' object='gmapl_avx512-chimera.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi`
+
+gmapl_avx512-datadir.o: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-datadir.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-datadir.Tpo -c -o gmapl_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-datadir.Tpo $(DEPDIR)/gmapl_avx512-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='gmapl_avx512-datadir.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+
+gmapl_avx512-datadir.obj: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-datadir.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-datadir.Tpo -c -o gmapl_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-datadir.Tpo $(DEPDIR)/gmapl_avx512-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='gmapl_avx512-datadir.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi`
+
+gmapl_avx512-parserange.o: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-parserange.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-parserange.Tpo -c -o gmapl_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-parserange.Tpo $(DEPDIR)/gmapl_avx512-parserange.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='gmapl_avx512-parserange.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+
+gmapl_avx512-parserange.obj: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-parserange.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-parserange.Tpo -c -o gmapl_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-parserange.Tpo $(DEPDIR)/gmapl_avx512-parserange.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='gmapl_avx512-parserange.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi`
+
+gmapl_avx512-getopt.o: getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-getopt.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-getopt.Tpo -c -o gmapl_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-getopt.Tpo $(DEPDIR)/gmapl_avx512-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt.c' object='gmapl_avx512-getopt.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c
+
+gmapl_avx512-getopt.obj: getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-getopt.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-getopt.Tpo -c -o gmapl_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-getopt.Tpo $(DEPDIR)/gmapl_avx512-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt.c' object='gmapl_avx512-getopt.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi`
+
+gmapl_avx512-getopt1.o: getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-getopt1.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-getopt1.Tpo -c -o gmapl_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-getopt1.Tpo $(DEPDIR)/gmapl_avx512-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt1.c' object='gmapl_avx512-getopt1.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c
+
+gmapl_avx512-getopt1.obj: getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-getopt1.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-getopt1.Tpo -c -o gmapl_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-getopt1.Tpo $(DEPDIR)/gmapl_avx512-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt1.c' object='gmapl_avx512-getopt1.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi`
+
+gmapl_avx512-gmap.o: gmap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gmap.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-gmap.Tpo -c -o gmapl_avx512-gmap.o `test -f 'gmap.c' || echo '$(srcdir)/'`gmap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gmap.Tpo $(DEPDIR)/gmapl_avx512-gmap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gmap.c' object='gmapl_avx512-gmap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gmap.o `test -f 'gmap.c' || echo '$(srcdir)/'`gmap.c
+
+gmapl_avx512-gmap.obj: gmap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gmap.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-gmap.Tpo -c -o gmapl_avx512-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gmap.Tpo $(DEPDIR)/gmapl_avx512-gmap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gmap.c' object='gmapl_avx512-gmap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi`
+
 gmapl_nosimd-except.o: except.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-except.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-except.Tpo -c -o gmapl_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-except.Tpo $(DEPDIR)/gmapl_nosimd-except.Po
@@ -16834,6 +20621,20 @@ gmapl_nosimd-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmapl_nosimd-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-merge.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-merge.Tpo -c -o gmapl_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-merge.Tpo $(DEPDIR)/gmapl_nosimd-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_nosimd-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmapl_nosimd-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-merge.Tpo -c -o gmapl_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-merge.Tpo $(DEPDIR)/gmapl_nosimd-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_nosimd-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmapl_nosimd-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-indexdb.Tpo -c -o gmapl_nosimd-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-indexdb.Tpo $(DEPDIR)/gmapl_nosimd-indexdb.Po
@@ -18038,6 +21839,20 @@ gmapl_sse2-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmapl_sse2-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-merge.o -MD -MP -MF $(DEPDIR)/gmapl_sse2-merge.Tpo -c -o gmapl_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-merge.Tpo $(DEPDIR)/gmapl_sse2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_sse2-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmapl_sse2-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_sse2-merge.Tpo -c -o gmapl_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-merge.Tpo $(DEPDIR)/gmapl_sse2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_sse2-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmapl_sse2-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_sse2-indexdb.Tpo -c -o gmapl_sse2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-indexdb.Tpo $(DEPDIR)/gmapl_sse2-indexdb.Po
@@ -19242,6 +23057,20 @@ gmapl_sse41-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmapl_sse41-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-merge.o -MD -MP -MF $(DEPDIR)/gmapl_sse41-merge.Tpo -c -o gmapl_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-merge.Tpo $(DEPDIR)/gmapl_sse41-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_sse41-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmapl_sse41-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_sse41-merge.Tpo -c -o gmapl_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-merge.Tpo $(DEPDIR)/gmapl_sse41-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_sse41-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmapl_sse41-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_sse41-indexdb.Tpo -c -o gmapl_sse41-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-indexdb.Tpo $(DEPDIR)/gmapl_sse41-indexdb.Po
@@ -20446,6 +24275,20 @@ gmapl_sse42-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmapl_sse42-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-merge.o -MD -MP -MF $(DEPDIR)/gmapl_sse42-merge.Tpo -c -o gmapl_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-merge.Tpo $(DEPDIR)/gmapl_sse42-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_sse42-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmapl_sse42-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_sse42-merge.Tpo -c -o gmapl_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-merge.Tpo $(DEPDIR)/gmapl_sse42-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_sse42-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmapl_sse42-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_sse42-indexdb.Tpo -c -o gmapl_sse42-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-indexdb.Tpo $(DEPDIR)/gmapl_sse42-indexdb.Po
@@ -21650,6 +25493,20 @@ gmapl_ssse3-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gmapl_ssse3-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-merge.o -MD -MP -MF $(DEPDIR)/gmapl_ssse3-merge.Tpo -c -o gmapl_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-merge.Tpo $(DEPDIR)/gmapl_ssse3-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_ssse3-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gmapl_ssse3-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_ssse3-merge.Tpo -c -o gmapl_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-merge.Tpo $(DEPDIR)/gmapl_ssse3-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gmapl_ssse3-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gmapl_ssse3-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_ssse3-indexdb.Tpo -c -o gmapl_ssse3-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-indexdb.Tpo $(DEPDIR)/gmapl_ssse3-indexdb.Po
@@ -22826,6 +26683,20 @@ gsnap_avx2-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnap_avx2-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-merge.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-merge.Tpo -c -o gsnap_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-merge.Tpo $(DEPDIR)/gsnap_avx2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_avx2-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnap_avx2-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-merge.Tpo -c -o gsnap_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-merge.Tpo $(DEPDIR)/gsnap_avx2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_avx2-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnap_avx2-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-indexdb.Tpo -c -o gsnap_avx2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-indexdb.Tpo $(DEPDIR)/gsnap_avx2-indexdb.Po
@@ -22924,6 +26795,20 @@ gsnap_avx2-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnap_avx2-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-cigar.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-cigar.Tpo -c -o gsnap_avx2-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-cigar.Tpo $(DEPDIR)/gsnap_avx2-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_avx2-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnap_avx2-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-cigar.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-cigar.Tpo -c -o gsnap_avx2-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-cigar.Tpo $(DEPDIR)/gsnap_avx2-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_avx2-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnap_avx2-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-samprint.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-samprint.Tpo -c -o gsnap_avx2-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-samprint.Tpo $(DEPDIR)/gsnap_avx2-samprint.Po
@@ -23526,6 +27411,34 @@ gsnap_avx2-sarray-read.obj: sarray-read.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
 
+gsnap_avx2-sarray-search.o: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-sarray-search.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-sarray-search.Tpo -c -o gsnap_avx2-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-sarray-search.Tpo $(DEPDIR)/gsnap_avx2-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_avx2-sarray-search.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+
+gsnap_avx2-sarray-search.obj: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-sarray-search.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-sarray-search.Tpo -c -o gsnap_avx2-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-sarray-search.Tpo $(DEPDIR)/gsnap_avx2-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_avx2-sarray-search.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+
+gsnap_avx2-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-merge-heap.Tpo -c -o gsnap_avx2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-merge-heap.Tpo $(DEPDIR)/gsnap_avx2-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_avx2-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnap_avx2-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-merge-heap.Tpo -c -o gsnap_avx2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-merge-heap.Tpo $(DEPDIR)/gsnap_avx2-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_avx2-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnap_avx2-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-stage1hr.Tpo -c -o gsnap_avx2-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-stage1hr.Tpo $(DEPDIR)/gsnap_avx2-stage1hr.Po
@@ -23694,6 +27607,1322 @@ gsnap_avx2-gsnap.obj: gsnap.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi`
 
+gsnap_avx512-except.o: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-except.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-except.Tpo -c -o gsnap_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-except.Tpo $(DEPDIR)/gsnap_avx512-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='gsnap_avx512-except.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
+
+gsnap_avx512-except.obj: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-except.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-except.Tpo -c -o gsnap_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-except.Tpo $(DEPDIR)/gsnap_avx512-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='gsnap_avx512-except.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi`
+
+gsnap_avx512-assert.o: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-assert.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-assert.Tpo -c -o gsnap_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-assert.Tpo $(DEPDIR)/gsnap_avx512-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='gsnap_avx512-assert.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+
+gsnap_avx512-assert.obj: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-assert.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-assert.Tpo -c -o gsnap_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-assert.Tpo $(DEPDIR)/gsnap_avx512-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='gsnap_avx512-assert.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi`
+
+gsnap_avx512-mem.o: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-mem.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-mem.Tpo -c -o gsnap_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-mem.Tpo $(DEPDIR)/gsnap_avx512-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='gsnap_avx512-mem.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+
+gsnap_avx512-mem.obj: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-mem.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-mem.Tpo -c -o gsnap_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-mem.Tpo $(DEPDIR)/gsnap_avx512-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='gsnap_avx512-mem.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi`
+
+gsnap_avx512-intlist.o: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-intlist.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-intlist.Tpo -c -o gsnap_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-intlist.Tpo $(DEPDIR)/gsnap_avx512-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='gsnap_avx512-intlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+
+gsnap_avx512-intlist.obj: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-intlist.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-intlist.Tpo -c -o gsnap_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-intlist.Tpo $(DEPDIR)/gsnap_avx512-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='gsnap_avx512-intlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi`
+
+gsnap_avx512-list.o: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-list.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-list.Tpo -c -o gsnap_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-list.Tpo $(DEPDIR)/gsnap_avx512-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='gsnap_avx512-list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c
+
+gsnap_avx512-list.obj: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-list.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-list.Tpo -c -o gsnap_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-list.Tpo $(DEPDIR)/gsnap_avx512-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='gsnap_avx512-list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi`
+
+gsnap_avx512-littleendian.o: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-littleendian.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-littleendian.Tpo -c -o gsnap_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-littleendian.Tpo $(DEPDIR)/gsnap_avx512-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='gsnap_avx512-littleendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+
+gsnap_avx512-littleendian.obj: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-littleendian.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-littleendian.Tpo -c -o gsnap_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-littleendian.Tpo $(DEPDIR)/gsnap_avx512-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='gsnap_avx512-littleendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi`
+
+gsnap_avx512-bigendian.o: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bigendian.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bigendian.Tpo -c -o gsnap_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bigendian.Tpo $(DEPDIR)/gsnap_avx512-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='gsnap_avx512-bigendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+
+gsnap_avx512-bigendian.obj: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bigendian.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bigendian.Tpo -c -o gsnap_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bigendian.Tpo $(DEPDIR)/gsnap_avx512-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='gsnap_avx512-bigendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi`
+
+gsnap_avx512-univinterval.o: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-univinterval.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-univinterval.Tpo -c -o gsnap_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-univinterval.Tpo $(DEPDIR)/gsnap_avx512-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='gsnap_avx512-univinterval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+
+gsnap_avx512-univinterval.obj: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-univinterval.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-univinterval.Tpo -c -o gsnap_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-univinterval.Tpo $(DEPDIR)/gsnap_avx512-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='gsnap_avx512-univinterval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi`
+
+gsnap_avx512-interval.o: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-interval.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-interval.Tpo -c -o gsnap_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-interval.Tpo $(DEPDIR)/gsnap_avx512-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='gsnap_avx512-interval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+
+gsnap_avx512-interval.obj: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-interval.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-interval.Tpo -c -o gsnap_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-interval.Tpo $(DEPDIR)/gsnap_avx512-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='gsnap_avx512-interval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+
+gsnap_avx512-uintlist.o: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-uintlist.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-uintlist.Tpo -c -o gsnap_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-uintlist.Tpo $(DEPDIR)/gsnap_avx512-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='gsnap_avx512-uintlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+
+gsnap_avx512-uintlist.obj: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-uintlist.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-uintlist.Tpo -c -o gsnap_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-uintlist.Tpo $(DEPDIR)/gsnap_avx512-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='gsnap_avx512-uintlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+
+gsnap_avx512-stopwatch.o: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stopwatch.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stopwatch.Tpo -c -o gsnap_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stopwatch.Tpo $(DEPDIR)/gsnap_avx512-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='gsnap_avx512-stopwatch.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+
+gsnap_avx512-stopwatch.obj: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stopwatch.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stopwatch.Tpo -c -o gsnap_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stopwatch.Tpo $(DEPDIR)/gsnap_avx512-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='gsnap_avx512-stopwatch.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi`
+
+gsnap_avx512-semaphore.o: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-semaphore.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-semaphore.Tpo -c -o gsnap_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-semaphore.Tpo $(DEPDIR)/gsnap_avx512-semaphore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='gsnap_avx512-semaphore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+
+gsnap_avx512-semaphore.obj: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-semaphore.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-semaphore.Tpo -c -o gsnap_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-semaphore.Tpo $(DEPDIR)/gsnap_avx512-semaphore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='gsnap_avx512-semaphore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi`
+
+gsnap_avx512-access.o: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-access.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-access.Tpo -c -o gsnap_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-access.Tpo $(DEPDIR)/gsnap_avx512-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='gsnap_avx512-access.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c
+
+gsnap_avx512-access.obj: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-access.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-access.Tpo -c -o gsnap_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-access.Tpo $(DEPDIR)/gsnap_avx512-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='gsnap_avx512-access.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+
+gsnap_avx512-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-filestring.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-filestring.Tpo -c -o gsnap_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-filestring.Tpo $(DEPDIR)/gsnap_avx512-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='gsnap_avx512-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gsnap_avx512-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-filestring.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-filestring.Tpo -c -o gsnap_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-filestring.Tpo $(DEPDIR)/gsnap_avx512-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='gsnap_avx512-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
+gsnap_avx512-iit-read-univ.o: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-iit-read-univ.Tpo -c -o gsnap_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-iit-read-univ.Tpo $(DEPDIR)/gsnap_avx512-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='gsnap_avx512-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+
+gsnap_avx512-iit-read-univ.obj: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-iit-read-univ.Tpo -c -o gsnap_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-iit-read-univ.Tpo $(DEPDIR)/gsnap_avx512-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='gsnap_avx512-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi`
+
+gsnap_avx512-iit-read.o: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-iit-read.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-iit-read.Tpo -c -o gsnap_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-iit-read.Tpo $(DEPDIR)/gsnap_avx512-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='gsnap_avx512-iit-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+
+gsnap_avx512-iit-read.obj: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-iit-read.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-iit-read.Tpo -c -o gsnap_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-iit-read.Tpo $(DEPDIR)/gsnap_avx512-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='gsnap_avx512-iit-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi`
+
+gsnap_avx512-md5.o: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-md5.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-md5.Tpo -c -o gsnap_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-md5.Tpo $(DEPDIR)/gsnap_avx512-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='gsnap_avx512-md5.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+
+gsnap_avx512-md5.obj: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-md5.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-md5.Tpo -c -o gsnap_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-md5.Tpo $(DEPDIR)/gsnap_avx512-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='gsnap_avx512-md5.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi`
+
+gsnap_avx512-bzip2.o: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bzip2.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bzip2.Tpo -c -o gsnap_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bzip2.Tpo $(DEPDIR)/gsnap_avx512-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='gsnap_avx512-bzip2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+
+gsnap_avx512-bzip2.obj: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bzip2.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bzip2.Tpo -c -o gsnap_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bzip2.Tpo $(DEPDIR)/gsnap_avx512-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='gsnap_avx512-bzip2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi`
+
+gsnap_avx512-sequence.o: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sequence.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-sequence.Tpo -c -o gsnap_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sequence.Tpo $(DEPDIR)/gsnap_avx512-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='gsnap_avx512-sequence.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+
+gsnap_avx512-sequence.obj: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sequence.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-sequence.Tpo -c -o gsnap_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sequence.Tpo $(DEPDIR)/gsnap_avx512-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='gsnap_avx512-sequence.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi`
+
+gsnap_avx512-reader.o: reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-reader.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-reader.Tpo -c -o gsnap_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-reader.Tpo $(DEPDIR)/gsnap_avx512-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='reader.c' object='gsnap_avx512-reader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c
+
+gsnap_avx512-reader.obj: reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-reader.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-reader.Tpo -c -o gsnap_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-reader.Tpo $(DEPDIR)/gsnap_avx512-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='reader.c' object='gsnap_avx512-reader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi`
+
+gsnap_avx512-genomicpos.o: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genomicpos.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-genomicpos.Tpo -c -o gsnap_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genomicpos.Tpo $(DEPDIR)/gsnap_avx512-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='gsnap_avx512-genomicpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+
+gsnap_avx512-genomicpos.obj: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genomicpos.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-genomicpos.Tpo -c -o gsnap_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genomicpos.Tpo $(DEPDIR)/gsnap_avx512-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='gsnap_avx512-genomicpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi`
+
+gsnap_avx512-compress.o: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-compress.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-compress.Tpo -c -o gsnap_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-compress.Tpo $(DEPDIR)/gsnap_avx512-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='gsnap_avx512-compress.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+
+gsnap_avx512-compress.obj: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-compress.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-compress.Tpo -c -o gsnap_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-compress.Tpo $(DEPDIR)/gsnap_avx512-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='gsnap_avx512-compress.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi`
+
+gsnap_avx512-genome.o: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome.Tpo -c -o gsnap_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome.Tpo $(DEPDIR)/gsnap_avx512-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='gsnap_avx512-genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+
+gsnap_avx512-genome.obj: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome.Tpo -c -o gsnap_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome.Tpo $(DEPDIR)/gsnap_avx512-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='gsnap_avx512-genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi`
+
+gsnap_avx512-popcount.o: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-popcount.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-popcount.Tpo -c -o gsnap_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-popcount.Tpo $(DEPDIR)/gsnap_avx512-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='gsnap_avx512-popcount.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+
+gsnap_avx512-popcount.obj: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-popcount.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-popcount.Tpo -c -o gsnap_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-popcount.Tpo $(DEPDIR)/gsnap_avx512-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='gsnap_avx512-popcount.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi`
+
+gsnap_avx512-genome128_hr.o: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome128_hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome128_hr.Tpo -c -o gsnap_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome128_hr.Tpo $(DEPDIR)/gsnap_avx512-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='gsnap_avx512-genome128_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+
+gsnap_avx512-genome128_hr.obj: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome128_hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome128_hr.Tpo -c -o gsnap_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome128_hr.Tpo $(DEPDIR)/gsnap_avx512-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='gsnap_avx512-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi`
+
+gsnap_avx512-genome_sites.o: genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome_sites.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome_sites.Tpo -c -o gsnap_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome_sites.Tpo $(DEPDIR)/gsnap_avx512-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome_sites.c' object='gsnap_avx512-genome_sites.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c
+
+gsnap_avx512-genome_sites.obj: genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome_sites.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome_sites.Tpo -c -o gsnap_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome_sites.Tpo $(DEPDIR)/gsnap_avx512-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome_sites.c' object='gsnap_avx512-genome_sites.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi`
+
+gsnap_avx512-bitpack64-read.o: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-read.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-read.Tpo -c -o gsnap_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-read.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='gsnap_avx512-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+
+gsnap_avx512-bitpack64-read.obj: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-read.Tpo -c -o gsnap_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-read.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='gsnap_avx512-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi`
+
+gsnap_avx512-bitpack64-readtwo.o: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Tpo -c -o gsnap_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='gsnap_avx512-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+
+gsnap_avx512-bitpack64-readtwo.obj: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Tpo -c -o gsnap_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='gsnap_avx512-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
+
+gsnap_avx512-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-merge.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-merge.Tpo -c -o gsnap_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-merge.Tpo $(DEPDIR)/gsnap_avx512-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_avx512-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnap_avx512-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-merge.Tpo -c -o gsnap_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-merge.Tpo $(DEPDIR)/gsnap_avx512-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_avx512-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
+gsnap_avx512-indexdb.o: indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-indexdb.Tpo -c -o gsnap_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indexdb.Tpo $(DEPDIR)/gsnap_avx512-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb.c' object='gsnap_avx512-indexdb.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
+
+gsnap_avx512-indexdb.obj: indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indexdb.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-indexdb.Tpo -c -o gsnap_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indexdb.Tpo $(DEPDIR)/gsnap_avx512-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb.c' object='gsnap_avx512-indexdb.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi`
+
+gsnap_avx512-indexdb_hr.o: indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indexdb_hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-indexdb_hr.Tpo -c -o gsnap_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indexdb_hr.Tpo $(DEPDIR)/gsnap_avx512-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb_hr.c' object='gsnap_avx512-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c
+
+gsnap_avx512-indexdb_hr.obj: indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-indexdb_hr.Tpo -c -o gsnap_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indexdb_hr.Tpo $(DEPDIR)/gsnap_avx512-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb_hr.c' object='gsnap_avx512-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi`
+
+gsnap_avx512-oligo.o: oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-oligo.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-oligo.Tpo -c -o gsnap_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-oligo.Tpo $(DEPDIR)/gsnap_avx512-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligo.c' object='gsnap_avx512-oligo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c
+
+gsnap_avx512-oligo.obj: oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-oligo.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-oligo.Tpo -c -o gsnap_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-oligo.Tpo $(DEPDIR)/gsnap_avx512-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligo.c' object='gsnap_avx512-oligo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi`
+
+gsnap_avx512-chrom.o: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chrom.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-chrom.Tpo -c -o gsnap_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chrom.Tpo $(DEPDIR)/gsnap_avx512-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='gsnap_avx512-chrom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+
+gsnap_avx512-chrom.obj: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chrom.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-chrom.Tpo -c -o gsnap_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chrom.Tpo $(DEPDIR)/gsnap_avx512-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='gsnap_avx512-chrom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi`
+
+gsnap_avx512-segmentpos.o: segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-segmentpos.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-segmentpos.Tpo -c -o gsnap_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-segmentpos.Tpo $(DEPDIR)/gsnap_avx512-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='segmentpos.c' object='gsnap_avx512-segmentpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c
+
+gsnap_avx512-segmentpos.obj: segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-segmentpos.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-segmentpos.Tpo -c -o gsnap_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-segmentpos.Tpo $(DEPDIR)/gsnap_avx512-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='segmentpos.c' object='gsnap_avx512-segmentpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi`
+
+gsnap_avx512-chrnum.o: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chrnum.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-chrnum.Tpo -c -o gsnap_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chrnum.Tpo $(DEPDIR)/gsnap_avx512-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='gsnap_avx512-chrnum.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+
+gsnap_avx512-chrnum.obj: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chrnum.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-chrnum.Tpo -c -o gsnap_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chrnum.Tpo $(DEPDIR)/gsnap_avx512-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='gsnap_avx512-chrnum.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi`
+
+gsnap_avx512-maxent_hr.o: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-maxent_hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-maxent_hr.Tpo -c -o gsnap_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-maxent_hr.Tpo $(DEPDIR)/gsnap_avx512-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='gsnap_avx512-maxent_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+
+gsnap_avx512-maxent_hr.obj: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-maxent_hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-maxent_hr.Tpo -c -o gsnap_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-maxent_hr.Tpo $(DEPDIR)/gsnap_avx512-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='gsnap_avx512-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
+
+gsnap_avx512-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cigar.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-cigar.Tpo -c -o gsnap_avx512-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cigar.Tpo $(DEPDIR)/gsnap_avx512-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_avx512-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnap_avx512-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cigar.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-cigar.Tpo -c -o gsnap_avx512-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cigar.Tpo $(DEPDIR)/gsnap_avx512-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_avx512-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
+gsnap_avx512-samprint.o: samprint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-samprint.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-samprint.Tpo -c -o gsnap_avx512-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-samprint.Tpo $(DEPDIR)/gsnap_avx512-samprint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samprint.c' object='gsnap_avx512-samprint.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
+
+gsnap_avx512-samprint.obj: samprint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-samprint.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-samprint.Tpo -c -o gsnap_avx512-samprint.obj `if test -f 'samprint.c'; then $(CYGPATH_W) 'samprint.c'; else $(CYGPATH_W) '$(srcdir)/samprint.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-samprint.Tpo $(DEPDIR)/gsnap_avx512-samprint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samprint.c' object='gsnap_avx512-samprint.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-samprint.obj `if test -f 'samprint.c'; then $(CYGPATH_W) 'samprint.c'; else $(CYGPATH_W) '$(srcdir)/samprint.c'; fi`
+
+gsnap_avx512-mapq.o: mapq.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-mapq.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-mapq.Tpo -c -o gsnap_avx512-mapq.o `test -f 'mapq.c' || echo '$(srcdir)/'`mapq.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-mapq.Tpo $(DEPDIR)/gsnap_avx512-mapq.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mapq.c' object='gsnap_avx512-mapq.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-mapq.o `test -f 'mapq.c' || echo '$(srcdir)/'`mapq.c
+
+gsnap_avx512-mapq.obj: mapq.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-mapq.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-mapq.Tpo -c -o gsnap_avx512-mapq.obj `if test -f 'mapq.c'; then $(CYGPATH_W) 'mapq.c'; else $(CYGPATH_W) '$(srcdir)/mapq.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-mapq.Tpo $(DEPDIR)/gsnap_avx512-mapq.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mapq.c' object='gsnap_avx512-mapq.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-mapq.obj `if test -f 'mapq.c'; then $(CYGPATH_W) 'mapq.c'; else $(CYGPATH_W) '$(srcdir)/mapq.c'; fi`
+
+gsnap_avx512-shortread.o: shortread.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-shortread.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-shortread.Tpo -c -o gsnap_avx512-shortread.o `test -f 'shortread.c' || echo '$(srcdir)/'`shortread.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-shortread.Tpo $(DEPDIR)/gsnap_avx512-shortread.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='shortread.c' object='gsnap_avx512-shortread.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-shortread.o `test -f 'shortread.c' || echo '$(srcdir)/'`shortread.c
+
+gsnap_avx512-shortread.obj: shortread.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-shortread.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-shortread.Tpo -c -o gsnap_avx512-shortread.obj `if test -f 'shortread.c'; then $(CYGPATH_W) 'shortread.c'; else $(CYGPATH_W) '$(srcdir)/shortread.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-shortread.Tpo $(DEPDIR)/gsnap_avx512-shortread.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='shortread.c' object='gsnap_avx512-shortread.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-shortread.obj `if test -f 'shortread.c'; then $(CYGPATH_W) 'shortread.c'; else $(CYGPATH_W) '$(srcdir)/shortread.c'; fi`
+
+gsnap_avx512-substring.o: substring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-substring.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-substring.Tpo -c -o gsnap_avx512-substring.o `test -f 'substring.c' || echo '$(srcdir)/'`substring.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-substring.Tpo $(DEPDIR)/gsnap_avx512-substring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='substring.c' object='gsnap_avx512-substring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-substring.o `test -f 'substring.c' || echo '$(srcdir)/'`substring.c
+
+gsnap_avx512-substring.obj: substring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-substring.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-substring.Tpo -c -o gsnap_avx512-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-substring.Tpo $(DEPDIR)/gsnap_avx512-substring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='substring.c' object='gsnap_avx512-substring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi`
+
+gsnap_avx512-junction.o: junction.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-junction.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-junction.Tpo -c -o gsnap_avx512-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-junction.Tpo $(DEPDIR)/gsnap_avx512-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='junction.c' object='gsnap_avx512-junction.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+
+gsnap_avx512-junction.obj: junction.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-junction.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-junction.Tpo -c -o gsnap_avx512-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-junction.Tpo $(DEPDIR)/gsnap_avx512-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='junction.c' object='gsnap_avx512-junction.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+
+gsnap_avx512-stage3hr.o: stage3hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage3hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage3hr.Tpo -c -o gsnap_avx512-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage3hr.Tpo $(DEPDIR)/gsnap_avx512-stage3hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3hr.c' object='gsnap_avx512-stage3hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c
+
+gsnap_avx512-stage3hr.obj: stage3hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage3hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage3hr.Tpo -c -o gsnap_avx512-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage3hr.Tpo $(DEPDIR)/gsnap_avx512-stage3hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3hr.c' object='gsnap_avx512-stage3hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi`
+
+gsnap_avx512-spanningelt.o: spanningelt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-spanningelt.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-spanningelt.Tpo -c -o gsnap_avx512-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-spanningelt.Tpo $(DEPDIR)/gsnap_avx512-spanningelt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='spanningelt.c' object='gsnap_avx512-spanningelt.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c
+
+gsnap_avx512-spanningelt.obj: spanningelt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-spanningelt.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-spanningelt.Tpo -c -o gsnap_avx512-spanningelt.obj `if test -f 'spanningelt.c'; then $(CYGPATH_W) 'spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/spanningelt.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-spanningelt.Tpo $(DEPDIR)/gsnap_avx512-spanningelt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='spanningelt.c' object='gsnap_avx512-spanningelt.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-spanningelt.obj `if test -f 'spanningelt.c'; then $(CYGPATH_W) 'spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/spanningelt.c'; fi`
+
+gsnap_avx512-cmet.o: cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cmet.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-cmet.Tpo -c -o gsnap_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cmet.Tpo $(DEPDIR)/gsnap_avx512-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cmet.c' object='gsnap_avx512-cmet.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c
+
+gsnap_avx512-cmet.obj: cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cmet.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-cmet.Tpo -c -o gsnap_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cmet.Tpo $(DEPDIR)/gsnap_avx512-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cmet.c' object='gsnap_avx512-cmet.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi`
+
+gsnap_avx512-atoi.o: atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-atoi.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-atoi.Tpo -c -o gsnap_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-atoi.Tpo $(DEPDIR)/gsnap_avx512-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='atoi.c' object='gsnap_avx512-atoi.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c
+
+gsnap_avx512-atoi.obj: atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-atoi.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-atoi.Tpo -c -o gsnap_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-atoi.Tpo $(DEPDIR)/gsnap_avx512-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='atoi.c' object='gsnap_avx512-atoi.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
+
+gsnap_avx512-maxent.o: maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-maxent.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-maxent.Tpo -c -o gsnap_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-maxent.Tpo $(DEPDIR)/gsnap_avx512-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent.c' object='gsnap_avx512-maxent.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
+
+gsnap_avx512-maxent.obj: maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-maxent.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-maxent.Tpo -c -o gsnap_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-maxent.Tpo $(DEPDIR)/gsnap_avx512-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent.c' object='gsnap_avx512-maxent.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi`
+
+gsnap_avx512-pair.o: pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pair.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-pair.Tpo -c -o gsnap_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pair.Tpo $(DEPDIR)/gsnap_avx512-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pair.c' object='gsnap_avx512-pair.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c
+
+gsnap_avx512-pair.obj: pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pair.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-pair.Tpo -c -o gsnap_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pair.Tpo $(DEPDIR)/gsnap_avx512-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pair.c' object='gsnap_avx512-pair.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi`
+
+gsnap_avx512-pairpool.o: pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pairpool.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-pairpool.Tpo -c -o gsnap_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pairpool.Tpo $(DEPDIR)/gsnap_avx512-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pairpool.c' object='gsnap_avx512-pairpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c
+
+gsnap_avx512-pairpool.obj: pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pairpool.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-pairpool.Tpo -c -o gsnap_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pairpool.Tpo $(DEPDIR)/gsnap_avx512-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pairpool.c' object='gsnap_avx512-pairpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi`
+
+gsnap_avx512-diag.o: diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-diag.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-diag.Tpo -c -o gsnap_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-diag.Tpo $(DEPDIR)/gsnap_avx512-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diag.c' object='gsnap_avx512-diag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c
+
+gsnap_avx512-diag.obj: diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-diag.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-diag.Tpo -c -o gsnap_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-diag.Tpo $(DEPDIR)/gsnap_avx512-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diag.c' object='gsnap_avx512-diag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi`
+
+gsnap_avx512-diagpool.o: diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-diagpool.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-diagpool.Tpo -c -o gsnap_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-diagpool.Tpo $(DEPDIR)/gsnap_avx512-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagpool.c' object='gsnap_avx512-diagpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c
+
+gsnap_avx512-diagpool.obj: diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-diagpool.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-diagpool.Tpo -c -o gsnap_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-diagpool.Tpo $(DEPDIR)/gsnap_avx512-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagpool.c' object='gsnap_avx512-diagpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi`
+
+gsnap_avx512-orderstat.o: orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-orderstat.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-orderstat.Tpo -c -o gsnap_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-orderstat.Tpo $(DEPDIR)/gsnap_avx512-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='orderstat.c' object='gsnap_avx512-orderstat.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c
+
+gsnap_avx512-orderstat.obj: orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-orderstat.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-orderstat.Tpo -c -o gsnap_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-orderstat.Tpo $(DEPDIR)/gsnap_avx512-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='orderstat.c' object='gsnap_avx512-orderstat.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi`
+
+gsnap_avx512-oligoindex_hr.o: oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-oligoindex_hr.Tpo -c -o gsnap_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-oligoindex_hr.Tpo $(DEPDIR)/gsnap_avx512-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligoindex_hr.c' object='gsnap_avx512-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c
+
+gsnap_avx512-oligoindex_hr.obj: oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-oligoindex_hr.Tpo -c -o gsnap_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-oligoindex_hr.Tpo $(DEPDIR)/gsnap_avx512-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligoindex_hr.c' object='gsnap_avx512-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi`
+
+gsnap_avx512-cellpool.o: cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cellpool.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-cellpool.Tpo -c -o gsnap_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cellpool.Tpo $(DEPDIR)/gsnap_avx512-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cellpool.c' object='gsnap_avx512-cellpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c
+
+gsnap_avx512-cellpool.obj: cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cellpool.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-cellpool.Tpo -c -o gsnap_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cellpool.Tpo $(DEPDIR)/gsnap_avx512-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cellpool.c' object='gsnap_avx512-cellpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi`
+
+gsnap_avx512-stage2.o: stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage2.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage2.Tpo -c -o gsnap_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage2.Tpo $(DEPDIR)/gsnap_avx512-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage2.c' object='gsnap_avx512-stage2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c
+
+gsnap_avx512-stage2.obj: stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage2.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage2.Tpo -c -o gsnap_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage2.Tpo $(DEPDIR)/gsnap_avx512-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage2.c' object='gsnap_avx512-stage2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi`
+
+gsnap_avx512-intron.o: intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-intron.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-intron.Tpo -c -o gsnap_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-intron.Tpo $(DEPDIR)/gsnap_avx512-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intron.c' object='gsnap_avx512-intron.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c
+
+gsnap_avx512-intron.obj: intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-intron.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-intron.Tpo -c -o gsnap_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-intron.Tpo $(DEPDIR)/gsnap_avx512-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intron.c' object='gsnap_avx512-intron.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi`
+
+gsnap_avx512-boyer-moore.o: boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-boyer-moore.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-boyer-moore.Tpo -c -o gsnap_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-boyer-moore.Tpo $(DEPDIR)/gsnap_avx512-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='boyer-moore.c' object='gsnap_avx512-boyer-moore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c
+
+gsnap_avx512-boyer-moore.obj: boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-boyer-moore.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-boyer-moore.Tpo -c -o gsnap_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-boyer-moore.Tpo $(DEPDIR)/gsnap_avx512-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='boyer-moore.c' object='gsnap_avx512-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi`
+
+gsnap_avx512-changepoint.o: changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-changepoint.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-changepoint.Tpo -c -o gsnap_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-changepoint.Tpo $(DEPDIR)/gsnap_avx512-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='changepoint.c' object='gsnap_avx512-changepoint.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c
+
+gsnap_avx512-changepoint.obj: changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-changepoint.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-changepoint.Tpo -c -o gsnap_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-changepoint.Tpo $(DEPDIR)/gsnap_avx512-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='changepoint.c' object='gsnap_avx512-changepoint.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi`
+
+gsnap_avx512-pbinom.o: pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pbinom.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-pbinom.Tpo -c -o gsnap_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pbinom.Tpo $(DEPDIR)/gsnap_avx512-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pbinom.c' object='gsnap_avx512-pbinom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c
+
+gsnap_avx512-pbinom.obj: pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pbinom.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-pbinom.Tpo -c -o gsnap_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pbinom.Tpo $(DEPDIR)/gsnap_avx512-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pbinom.c' object='gsnap_avx512-pbinom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi`
+
+gsnap_avx512-dynprog.o: dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog.Tpo -c -o gsnap_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog.Tpo $(DEPDIR)/gsnap_avx512-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog.c' object='gsnap_avx512-dynprog.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c
+
+gsnap_avx512-dynprog.obj: dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog.Tpo -c -o gsnap_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog.Tpo $(DEPDIR)/gsnap_avx512-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog.c' object='gsnap_avx512-dynprog.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi`
+
+gsnap_avx512-dynprog_simd.o: dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_simd.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_simd.Tpo -c -o gsnap_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_simd.Tpo $(DEPDIR)/gsnap_avx512-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_simd.c' object='gsnap_avx512-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c
+
+gsnap_avx512-dynprog_simd.obj: dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_simd.Tpo -c -o gsnap_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_simd.Tpo $(DEPDIR)/gsnap_avx512-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_simd.c' object='gsnap_avx512-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi`
+
+gsnap_avx512-dynprog_single.o: dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_single.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_single.Tpo -c -o gsnap_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_single.Tpo $(DEPDIR)/gsnap_avx512-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_single.c' object='gsnap_avx512-dynprog_single.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c
+
+gsnap_avx512-dynprog_single.obj: dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_single.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_single.Tpo -c -o gsnap_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_single.Tpo $(DEPDIR)/gsnap_avx512-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_single.c' object='gsnap_avx512-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi`
+
+gsnap_avx512-dynprog_genome.o: dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_genome.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_genome.Tpo -c -o gsnap_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_genome.Tpo $(DEPDIR)/gsnap_avx512-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_genome.c' object='gsnap_avx512-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c
+
+gsnap_avx512-dynprog_genome.obj: dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_genome.Tpo -c -o gsnap_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_genome.Tpo $(DEPDIR)/gsnap_avx512-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_genome.c' object='gsnap_avx512-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi`
+
+gsnap_avx512-dynprog_cdna.o: dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_cdna.Tpo -c -o gsnap_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_cdna.Tpo $(DEPDIR)/gsnap_avx512-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_cdna.c' object='gsnap_avx512-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c
+
+gsnap_avx512-dynprog_cdna.obj: dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_cdna.Tpo -c -o gsnap_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_cdna.Tpo $(DEPDIR)/gsnap_avx512-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_cdna.c' object='gsnap_avx512-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi`
+
+gsnap_avx512-dynprog_end.o: dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_end.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_end.Tpo -c -o gsnap_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_end.Tpo $(DEPDIR)/gsnap_avx512-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_end.c' object='gsnap_avx512-dynprog_end.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c
+
+gsnap_avx512-dynprog_end.obj: dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_end.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_end.Tpo -c -o gsnap_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_end.Tpo $(DEPDIR)/gsnap_avx512-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_end.c' object='gsnap_avx512-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi`
+
+gsnap_avx512-gbuffer.o: gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-gbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-gbuffer.Tpo -c -o gsnap_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-gbuffer.Tpo $(DEPDIR)/gsnap_avx512-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gbuffer.c' object='gsnap_avx512-gbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c
+
+gsnap_avx512-gbuffer.obj: gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-gbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-gbuffer.Tpo -c -o gsnap_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-gbuffer.Tpo $(DEPDIR)/gsnap_avx512-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gbuffer.c' object='gsnap_avx512-gbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
+
+gsnap_avx512-doublelist.o: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-doublelist.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-doublelist.Tpo -c -o gsnap_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-doublelist.Tpo $(DEPDIR)/gsnap_avx512-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='gsnap_avx512-doublelist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+
+gsnap_avx512-doublelist.obj: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-doublelist.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-doublelist.Tpo -c -o gsnap_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-doublelist.Tpo $(DEPDIR)/gsnap_avx512-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='gsnap_avx512-doublelist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi`
+
+gsnap_avx512-smooth.o: smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-smooth.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-smooth.Tpo -c -o gsnap_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-smooth.Tpo $(DEPDIR)/gsnap_avx512-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='smooth.c' object='gsnap_avx512-smooth.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c
+
+gsnap_avx512-smooth.obj: smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-smooth.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-smooth.Tpo -c -o gsnap_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-smooth.Tpo $(DEPDIR)/gsnap_avx512-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='smooth.c' object='gsnap_avx512-smooth.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi`
+
+gsnap_avx512-chimera.o: chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chimera.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-chimera.Tpo -c -o gsnap_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chimera.Tpo $(DEPDIR)/gsnap_avx512-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chimera.c' object='gsnap_avx512-chimera.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c
+
+gsnap_avx512-chimera.obj: chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chimera.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-chimera.Tpo -c -o gsnap_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chimera.Tpo $(DEPDIR)/gsnap_avx512-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chimera.c' object='gsnap_avx512-chimera.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi`
+
+gsnap_avx512-stage3.o: stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage3.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage3.Tpo -c -o gsnap_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage3.Tpo $(DEPDIR)/gsnap_avx512-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3.c' object='gsnap_avx512-stage3.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c
+
+gsnap_avx512-stage3.obj: stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage3.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage3.Tpo -c -o gsnap_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage3.Tpo $(DEPDIR)/gsnap_avx512-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3.c' object='gsnap_avx512-stage3.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi`
+
+gsnap_avx512-splicestringpool.o: splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicestringpool.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicestringpool.Tpo -c -o gsnap_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicestringpool.Tpo $(DEPDIR)/gsnap_avx512-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicestringpool.c' object='gsnap_avx512-splicestringpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c
+
+gsnap_avx512-splicestringpool.obj: splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicestringpool.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicestringpool.Tpo -c -o gsnap_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicestringpool.Tpo $(DEPDIR)/gsnap_avx512-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicestringpool.c' object='gsnap_avx512-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi`
+
+gsnap_avx512-splicetrie_build.o: splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicetrie_build.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicetrie_build.Tpo -c -o gsnap_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicetrie_build.Tpo $(DEPDIR)/gsnap_avx512-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie_build.c' object='gsnap_avx512-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c
+
+gsnap_avx512-splicetrie_build.obj: splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicetrie_build.Tpo -c -o gsnap_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicetrie_build.Tpo $(DEPDIR)/gsnap_avx512-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie_build.c' object='gsnap_avx512-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi`
+
+gsnap_avx512-splicetrie.o: splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicetrie.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicetrie.Tpo -c -o gsnap_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicetrie.Tpo $(DEPDIR)/gsnap_avx512-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie.c' object='gsnap_avx512-splicetrie.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c
+
+gsnap_avx512-splicetrie.obj: splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicetrie.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicetrie.Tpo -c -o gsnap_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicetrie.Tpo $(DEPDIR)/gsnap_avx512-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie.c' object='gsnap_avx512-splicetrie.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi`
+
+gsnap_avx512-splice.o: splice.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splice.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-splice.Tpo -c -o gsnap_avx512-splice.o `test -f 'splice.c' || echo '$(srcdir)/'`splice.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splice.Tpo $(DEPDIR)/gsnap_avx512-splice.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splice.c' object='gsnap_avx512-splice.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splice.o `test -f 'splice.c' || echo '$(srcdir)/'`splice.c
+
+gsnap_avx512-splice.obj: splice.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splice.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-splice.Tpo -c -o gsnap_avx512-splice.obj `if test -f 'splice.c'; then $(CYGPATH_W) 'splice.c'; else $(CYGPATH_W) '$(srcdir)/splice.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splice.Tpo $(DEPDIR)/gsnap_avx512-splice.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splice.c' object='gsnap_avx512-splice.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splice.obj `if test -f 'splice.c'; then $(CYGPATH_W) 'splice.c'; else $(CYGPATH_W) '$(srcdir)/splice.c'; fi`
+
+gsnap_avx512-indel.o: indel.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indel.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-indel.Tpo -c -o gsnap_avx512-indel.o `test -f 'indel.c' || echo '$(srcdir)/'`indel.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indel.Tpo $(DEPDIR)/gsnap_avx512-indel.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indel.c' object='gsnap_avx512-indel.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indel.o `test -f 'indel.c' || echo '$(srcdir)/'`indel.c
+
+gsnap_avx512-indel.obj: indel.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indel.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-indel.Tpo -c -o gsnap_avx512-indel.obj `if test -f 'indel.c'; then $(CYGPATH_W) 'indel.c'; else $(CYGPATH_W) '$(srcdir)/indel.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indel.Tpo $(DEPDIR)/gsnap_avx512-indel.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indel.c' object='gsnap_avx512-indel.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indel.obj `if test -f 'indel.c'; then $(CYGPATH_W) 'indel.c'; else $(CYGPATH_W) '$(srcdir)/indel.c'; fi`
+
+gsnap_avx512-bitpack64-access.o: bitpack64-access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-access.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-access.Tpo -c -o gsnap_avx512-bitpack64-access.o `test -f 'bitpack64-access.c' || echo '$(srcdir)/'`bitpack64-access.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-access.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-access.c' object='gsnap_avx512-bitpack64-access.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-access.o `test -f 'bitpack64-access.c' || echo '$(srcdir)/'`bitpack64-access.c
+
+gsnap_avx512-bitpack64-access.obj: bitpack64-access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-access.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-access.Tpo -c -o gsnap_avx512-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-access.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-access.c' object='gsnap_avx512-bitpack64-access.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
+
+gsnap_avx512-bytecoding.o: bytecoding.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bytecoding.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bytecoding.Tpo -c -o gsnap_avx512-bytecoding.o `test -f 'bytecoding.c' || echo '$(srcdir)/'`bytecoding.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bytecoding.Tpo $(DEPDIR)/gsnap_avx512-bytecoding.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bytecoding.c' object='gsnap_avx512-bytecoding.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bytecoding.o `test -f 'bytecoding.c' || echo '$(srcdir)/'`bytecoding.c
+
+gsnap_avx512-bytecoding.obj: bytecoding.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bytecoding.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bytecoding.Tpo -c -o gsnap_avx512-bytecoding.obj `if test -f 'bytecoding.c'; then $(CYGPATH_W) 'bytecoding.c'; else $(CYGPATH_W) '$(srcdir)/bytecoding.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bytecoding.Tpo $(DEPDIR)/gsnap_avx512-bytecoding.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bytecoding.c' object='gsnap_avx512-bytecoding.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bytecoding.obj `if test -f 'bytecoding.c'; then $(CYGPATH_W) 'bytecoding.c'; else $(CYGPATH_W) '$(srcdir)/bytecoding.c'; fi`
+
+gsnap_avx512-univdiag.o: univdiag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-univdiag.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-univdiag.Tpo -c -o gsnap_avx512-univdiag.o `test -f 'univdiag.c' || echo '$(srcdir)/'`univdiag.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-univdiag.Tpo $(DEPDIR)/gsnap_avx512-univdiag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univdiag.c' object='gsnap_avx512-univdiag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-univdiag.o `test -f 'univdiag.c' || echo '$(srcdir)/'`univdiag.c
+
+gsnap_avx512-univdiag.obj: univdiag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-univdiag.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-univdiag.Tpo -c -o gsnap_avx512-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-univdiag.Tpo $(DEPDIR)/gsnap_avx512-univdiag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univdiag.c' object='gsnap_avx512-univdiag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi`
+
+gsnap_avx512-sedgesort.o: sedgesort.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sedgesort.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-sedgesort.Tpo -c -o gsnap_avx512-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sedgesort.Tpo $(DEPDIR)/gsnap_avx512-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sedgesort.c' object='gsnap_avx512-sedgesort.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c
+
+gsnap_avx512-sedgesort.obj: sedgesort.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sedgesort.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-sedgesort.Tpo -c -o gsnap_avx512-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sedgesort.Tpo $(DEPDIR)/gsnap_avx512-sedgesort.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sedgesort.c' object='gsnap_avx512-sedgesort.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi`
+
+gsnap_avx512-sarray-read.o: sarray-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-sarray-read.Tpo -c -o gsnap_avx512-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sarray-read.Tpo $(DEPDIR)/gsnap_avx512-sarray-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-read.c' object='gsnap_avx512-sarray-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c
+
+gsnap_avx512-sarray-read.obj: sarray-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sarray-read.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-sarray-read.Tpo -c -o gsnap_avx512-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sarray-read.Tpo $(DEPDIR)/gsnap_avx512-sarray-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-read.c' object='gsnap_avx512-sarray-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
+
+gsnap_avx512-sarray-search.o: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sarray-search.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-sarray-search.Tpo -c -o gsnap_avx512-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sarray-search.Tpo $(DEPDIR)/gsnap_avx512-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_avx512-sarray-search.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+
+gsnap_avx512-sarray-search.obj: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sarray-search.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-sarray-search.Tpo -c -o gsnap_avx512-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sarray-search.Tpo $(DEPDIR)/gsnap_avx512-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_avx512-sarray-search.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+
+gsnap_avx512-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-merge-heap.Tpo -c -o gsnap_avx512-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-merge-heap.Tpo $(DEPDIR)/gsnap_avx512-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_avx512-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnap_avx512-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-merge-heap.Tpo -c -o gsnap_avx512-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-merge-heap.Tpo $(DEPDIR)/gsnap_avx512-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_avx512-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
+gsnap_avx512-stage1hr.o: stage1hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage1hr.Tpo -c -o gsnap_avx512-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage1hr.Tpo $(DEPDIR)/gsnap_avx512-stage1hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage1hr.c' object='gsnap_avx512-stage1hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
+
+gsnap_avx512-stage1hr.obj: stage1hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage1hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage1hr.Tpo -c -o gsnap_avx512-stage1hr.obj `if test -f 'stage1hr.c'; then $(CYGPATH_W) 'stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/stage1hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage1hr.Tpo $(DEPDIR)/gsnap_avx512-stage1hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage1hr.c' object='gsnap_avx512-stage1hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage1hr.obj `if test -f 'stage1hr.c'; then $(CYGPATH_W) 'stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/stage1hr.c'; fi`
+
+gsnap_avx512-request.o: request.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-request.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-request.Tpo -c -o gsnap_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-request.Tpo $(DEPDIR)/gsnap_avx512-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='request.c' object='gsnap_avx512-request.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c
+
+gsnap_avx512-request.obj: request.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-request.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-request.Tpo -c -o gsnap_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-request.Tpo $(DEPDIR)/gsnap_avx512-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='request.c' object='gsnap_avx512-request.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi`
+
+gsnap_avx512-resulthr.o: resulthr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-resulthr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-resulthr.Tpo -c -o gsnap_avx512-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-resulthr.Tpo $(DEPDIR)/gsnap_avx512-resulthr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='resulthr.c' object='gsnap_avx512-resulthr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c
+
+gsnap_avx512-resulthr.obj: resulthr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-resulthr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-resulthr.Tpo -c -o gsnap_avx512-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-resulthr.Tpo $(DEPDIR)/gsnap_avx512-resulthr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='resulthr.c' object='gsnap_avx512-resulthr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi`
+
+gsnap_avx512-output.o: output.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-output.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-output.Tpo -c -o gsnap_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-output.Tpo $(DEPDIR)/gsnap_avx512-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='output.c' object='gsnap_avx512-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+
+gsnap_avx512-output.obj: output.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-output.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-output.Tpo -c -o gsnap_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-output.Tpo $(DEPDIR)/gsnap_avx512-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='output.c' object='gsnap_avx512-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+
+gsnap_avx512-inbuffer.o: inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-inbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-inbuffer.Tpo -c -o gsnap_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-inbuffer.Tpo $(DEPDIR)/gsnap_avx512-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='inbuffer.c' object='gsnap_avx512-inbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
+
+gsnap_avx512-inbuffer.obj: inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-inbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-inbuffer.Tpo -c -o gsnap_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-inbuffer.Tpo $(DEPDIR)/gsnap_avx512-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='inbuffer.c' object='gsnap_avx512-inbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi`
+
+gsnap_avx512-samheader.o: samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-samheader.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-samheader.Tpo -c -o gsnap_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-samheader.Tpo $(DEPDIR)/gsnap_avx512-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samheader.c' object='gsnap_avx512-samheader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c
+
+gsnap_avx512-samheader.obj: samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-samheader.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-samheader.Tpo -c -o gsnap_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-samheader.Tpo $(DEPDIR)/gsnap_avx512-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samheader.c' object='gsnap_avx512-samheader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi`
+
+gsnap_avx512-outbuffer.o: outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-outbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-outbuffer.Tpo -c -o gsnap_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-outbuffer.Tpo $(DEPDIR)/gsnap_avx512-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='outbuffer.c' object='gsnap_avx512-outbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c
+
+gsnap_avx512-outbuffer.obj: outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-outbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-outbuffer.Tpo -c -o gsnap_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-outbuffer.Tpo $(DEPDIR)/gsnap_avx512-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='outbuffer.c' object='gsnap_avx512-outbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi`
+
+gsnap_avx512-datadir.o: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-datadir.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-datadir.Tpo -c -o gsnap_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-datadir.Tpo $(DEPDIR)/gsnap_avx512-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='gsnap_avx512-datadir.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+
+gsnap_avx512-datadir.obj: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-datadir.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-datadir.Tpo -c -o gsnap_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-datadir.Tpo $(DEPDIR)/gsnap_avx512-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='gsnap_avx512-datadir.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi`
+
+gsnap_avx512-parserange.o: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-parserange.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-parserange.Tpo -c -o gsnap_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-parserange.Tpo $(DEPDIR)/gsnap_avx512-parserange.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='gsnap_avx512-parserange.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+
+gsnap_avx512-parserange.obj: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-parserange.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-parserange.Tpo -c -o gsnap_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-parserange.Tpo $(DEPDIR)/gsnap_avx512-parserange.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='gsnap_avx512-parserange.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi`
+
+gsnap_avx512-getopt.o: getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-getopt.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-getopt.Tpo -c -o gsnap_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-getopt.Tpo $(DEPDIR)/gsnap_avx512-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt.c' object='gsnap_avx512-getopt.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c
+
+gsnap_avx512-getopt.obj: getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-getopt.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-getopt.Tpo -c -o gsnap_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-getopt.Tpo $(DEPDIR)/gsnap_avx512-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt.c' object='gsnap_avx512-getopt.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi`
+
+gsnap_avx512-getopt1.o: getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-getopt1.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-getopt1.Tpo -c -o gsnap_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-getopt1.Tpo $(DEPDIR)/gsnap_avx512-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt1.c' object='gsnap_avx512-getopt1.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c
+
+gsnap_avx512-getopt1.obj: getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-getopt1.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-getopt1.Tpo -c -o gsnap_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-getopt1.Tpo $(DEPDIR)/gsnap_avx512-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt1.c' object='gsnap_avx512-getopt1.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi`
+
+gsnap_avx512-gsnap.o: gsnap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-gsnap.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-gsnap.Tpo -c -o gsnap_avx512-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-gsnap.Tpo $(DEPDIR)/gsnap_avx512-gsnap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gsnap.c' object='gsnap_avx512-gsnap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c
+
+gsnap_avx512-gsnap.obj: gsnap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-gsnap.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-gsnap.Tpo -c -o gsnap_avx512-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-gsnap.Tpo $(DEPDIR)/gsnap_avx512-gsnap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gsnap.c' object='gsnap_avx512-gsnap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi`
+
 gsnap_nosimd-except.o: except.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-except.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-except.Tpo -c -o gsnap_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-except.Tpo $(DEPDIR)/gsnap_nosimd-except.Po
@@ -24086,6 +29315,20 @@ gsnap_nosimd-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnap_nosimd-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-merge.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-merge.Tpo -c -o gsnap_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-merge.Tpo $(DEPDIR)/gsnap_nosimd-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_nosimd-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnap_nosimd-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_nosimd-merge.Tpo -c -o gsnap_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-merge.Tpo $(DEPDIR)/gsnap_nosimd-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_nosimd-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnap_nosimd-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-indexdb.Tpo -c -o gsnap_nosimd-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-indexdb.Tpo $(DEPDIR)/gsnap_nosimd-indexdb.Po
@@ -24184,6 +29427,20 @@ gsnap_nosimd-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnap_nosimd-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-cigar.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-cigar.Tpo -c -o gsnap_nosimd-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-cigar.Tpo $(DEPDIR)/gsnap_nosimd-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_nosimd-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnap_nosimd-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-cigar.obj -MD -MP -MF $(DEPDIR)/gsnap_nosimd-cigar.Tpo -c -o gsnap_nosimd-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-cigar.Tpo $(DEPDIR)/gsnap_nosimd-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_nosimd-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnap_nosimd-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-samprint.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-samprint.Tpo -c -o gsnap_nosimd-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-samprint.Tpo $(DEPDIR)/gsnap_nosimd-samprint.Po
@@ -24786,6 +30043,34 @@ gsnap_nosimd-sarray-read.obj: sarray-read.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
 
+gsnap_nosimd-sarray-search.o: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-sarray-search.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-sarray-search.Tpo -c -o gsnap_nosimd-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-sarray-search.Tpo $(DEPDIR)/gsnap_nosimd-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_nosimd-sarray-search.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+
+gsnap_nosimd-sarray-search.obj: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-sarray-search.obj -MD -MP -MF $(DEPDIR)/gsnap_nosimd-sarray-search.Tpo -c -o gsnap_nosimd-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-sarray-search.Tpo $(DEPDIR)/gsnap_nosimd-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_nosimd-sarray-search.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+
+gsnap_nosimd-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-merge-heap.Tpo -c -o gsnap_nosimd-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-merge-heap.Tpo $(DEPDIR)/gsnap_nosimd-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_nosimd-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnap_nosimd-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_nosimd-merge-heap.Tpo -c -o gsnap_nosimd-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-merge-heap.Tpo $(DEPDIR)/gsnap_nosimd-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_nosimd-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnap_nosimd-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-stage1hr.Tpo -c -o gsnap_nosimd-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-stage1hr.Tpo $(DEPDIR)/gsnap_nosimd-stage1hr.Po
@@ -25346,6 +30631,20 @@ gsnap_sse2-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnap_sse2-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-merge.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-merge.Tpo -c -o gsnap_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-merge.Tpo $(DEPDIR)/gsnap_sse2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_sse2-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnap_sse2-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_sse2-merge.Tpo -c -o gsnap_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-merge.Tpo $(DEPDIR)/gsnap_sse2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_sse2-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnap_sse2-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-indexdb.Tpo -c -o gsnap_sse2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-indexdb.Tpo $(DEPDIR)/gsnap_sse2-indexdb.Po
@@ -25444,6 +30743,20 @@ gsnap_sse2-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnap_sse2-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-cigar.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-cigar.Tpo -c -o gsnap_sse2-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-cigar.Tpo $(DEPDIR)/gsnap_sse2-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_sse2-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnap_sse2-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-cigar.obj -MD -MP -MF $(DEPDIR)/gsnap_sse2-cigar.Tpo -c -o gsnap_sse2-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-cigar.Tpo $(DEPDIR)/gsnap_sse2-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_sse2-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnap_sse2-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-samprint.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-samprint.Tpo -c -o gsnap_sse2-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-samprint.Tpo $(DEPDIR)/gsnap_sse2-samprint.Po
@@ -26046,6 +31359,34 @@ gsnap_sse2-sarray-read.obj: sarray-read.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
 
+gsnap_sse2-sarray-search.o: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-sarray-search.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-sarray-search.Tpo -c -o gsnap_sse2-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-sarray-search.Tpo $(DEPDIR)/gsnap_sse2-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_sse2-sarray-search.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+
+gsnap_sse2-sarray-search.obj: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-sarray-search.obj -MD -MP -MF $(DEPDIR)/gsnap_sse2-sarray-search.Tpo -c -o gsnap_sse2-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-sarray-search.Tpo $(DEPDIR)/gsnap_sse2-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_sse2-sarray-search.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+
+gsnap_sse2-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-merge-heap.Tpo -c -o gsnap_sse2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-merge-heap.Tpo $(DEPDIR)/gsnap_sse2-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_sse2-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnap_sse2-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_sse2-merge-heap.Tpo -c -o gsnap_sse2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-merge-heap.Tpo $(DEPDIR)/gsnap_sse2-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_sse2-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnap_sse2-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-stage1hr.Tpo -c -o gsnap_sse2-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-stage1hr.Tpo $(DEPDIR)/gsnap_sse2-stage1hr.Po
@@ -26606,6 +31947,20 @@ gsnap_sse41-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnap_sse41-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-merge.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-merge.Tpo -c -o gsnap_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-merge.Tpo $(DEPDIR)/gsnap_sse41-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_sse41-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnap_sse41-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_sse41-merge.Tpo -c -o gsnap_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-merge.Tpo $(DEPDIR)/gsnap_sse41-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_sse41-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnap_sse41-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-indexdb.Tpo -c -o gsnap_sse41-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-indexdb.Tpo $(DEPDIR)/gsnap_sse41-indexdb.Po
@@ -26704,6 +32059,20 @@ gsnap_sse41-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnap_sse41-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-cigar.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-cigar.Tpo -c -o gsnap_sse41-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-cigar.Tpo $(DEPDIR)/gsnap_sse41-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_sse41-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnap_sse41-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-cigar.obj -MD -MP -MF $(DEPDIR)/gsnap_sse41-cigar.Tpo -c -o gsnap_sse41-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-cigar.Tpo $(DEPDIR)/gsnap_sse41-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_sse41-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnap_sse41-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-samprint.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-samprint.Tpo -c -o gsnap_sse41-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-samprint.Tpo $(DEPDIR)/gsnap_sse41-samprint.Po
@@ -27306,6 +32675,34 @@ gsnap_sse41-sarray-read.obj: sarray-read.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
 
+gsnap_sse41-sarray-search.o: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-sarray-search.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-sarray-search.Tpo -c -o gsnap_sse41-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-sarray-search.Tpo $(DEPDIR)/gsnap_sse41-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_sse41-sarray-search.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+
+gsnap_sse41-sarray-search.obj: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-sarray-search.obj -MD -MP -MF $(DEPDIR)/gsnap_sse41-sarray-search.Tpo -c -o gsnap_sse41-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-sarray-search.Tpo $(DEPDIR)/gsnap_sse41-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_sse41-sarray-search.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+
+gsnap_sse41-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-merge-heap.Tpo -c -o gsnap_sse41-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-merge-heap.Tpo $(DEPDIR)/gsnap_sse41-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_sse41-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnap_sse41-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_sse41-merge-heap.Tpo -c -o gsnap_sse41-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-merge-heap.Tpo $(DEPDIR)/gsnap_sse41-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_sse41-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnap_sse41-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-stage1hr.Tpo -c -o gsnap_sse41-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-stage1hr.Tpo $(DEPDIR)/gsnap_sse41-stage1hr.Po
@@ -27866,6 +33263,20 @@ gsnap_sse42-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnap_sse42-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-merge.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-merge.Tpo -c -o gsnap_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-merge.Tpo $(DEPDIR)/gsnap_sse42-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_sse42-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnap_sse42-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_sse42-merge.Tpo -c -o gsnap_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-merge.Tpo $(DEPDIR)/gsnap_sse42-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_sse42-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnap_sse42-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-indexdb.Tpo -c -o gsnap_sse42-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-indexdb.Tpo $(DEPDIR)/gsnap_sse42-indexdb.Po
@@ -27964,6 +33375,20 @@ gsnap_sse42-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnap_sse42-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-cigar.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-cigar.Tpo -c -o gsnap_sse42-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-cigar.Tpo $(DEPDIR)/gsnap_sse42-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_sse42-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnap_sse42-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-cigar.obj -MD -MP -MF $(DEPDIR)/gsnap_sse42-cigar.Tpo -c -o gsnap_sse42-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-cigar.Tpo $(DEPDIR)/gsnap_sse42-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_sse42-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnap_sse42-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-samprint.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-samprint.Tpo -c -o gsnap_sse42-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-samprint.Tpo $(DEPDIR)/gsnap_sse42-samprint.Po
@@ -28566,6 +33991,34 @@ gsnap_sse42-sarray-read.obj: sarray-read.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
 
+gsnap_sse42-sarray-search.o: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-sarray-search.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-sarray-search.Tpo -c -o gsnap_sse42-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-sarray-search.Tpo $(DEPDIR)/gsnap_sse42-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_sse42-sarray-search.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+
+gsnap_sse42-sarray-search.obj: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-sarray-search.obj -MD -MP -MF $(DEPDIR)/gsnap_sse42-sarray-search.Tpo -c -o gsnap_sse42-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-sarray-search.Tpo $(DEPDIR)/gsnap_sse42-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_sse42-sarray-search.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+
+gsnap_sse42-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-merge-heap.Tpo -c -o gsnap_sse42-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-merge-heap.Tpo $(DEPDIR)/gsnap_sse42-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_sse42-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnap_sse42-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_sse42-merge-heap.Tpo -c -o gsnap_sse42-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-merge-heap.Tpo $(DEPDIR)/gsnap_sse42-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_sse42-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnap_sse42-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-stage1hr.Tpo -c -o gsnap_sse42-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-stage1hr.Tpo $(DEPDIR)/gsnap_sse42-stage1hr.Po
@@ -29126,6 +34579,20 @@ gsnap_ssse3-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnap_ssse3-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-merge.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-merge.Tpo -c -o gsnap_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-merge.Tpo $(DEPDIR)/gsnap_ssse3-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_ssse3-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnap_ssse3-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_ssse3-merge.Tpo -c -o gsnap_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-merge.Tpo $(DEPDIR)/gsnap_ssse3-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnap_ssse3-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnap_ssse3-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-indexdb.Tpo -c -o gsnap_ssse3-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-indexdb.Tpo $(DEPDIR)/gsnap_ssse3-indexdb.Po
@@ -29224,6 +34691,20 @@ gsnap_ssse3-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnap_ssse3-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-cigar.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-cigar.Tpo -c -o gsnap_ssse3-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-cigar.Tpo $(DEPDIR)/gsnap_ssse3-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_ssse3-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnap_ssse3-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-cigar.obj -MD -MP -MF $(DEPDIR)/gsnap_ssse3-cigar.Tpo -c -o gsnap_ssse3-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-cigar.Tpo $(DEPDIR)/gsnap_ssse3-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnap_ssse3-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnap_ssse3-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-samprint.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-samprint.Tpo -c -o gsnap_ssse3-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-samprint.Tpo $(DEPDIR)/gsnap_ssse3-samprint.Po
@@ -29826,6 +35307,34 @@ gsnap_ssse3-sarray-read.obj: sarray-read.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
 
+gsnap_ssse3-sarray-search.o: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-sarray-search.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-sarray-search.Tpo -c -o gsnap_ssse3-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-sarray-search.Tpo $(DEPDIR)/gsnap_ssse3-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_ssse3-sarray-search.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+
+gsnap_ssse3-sarray-search.obj: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-sarray-search.obj -MD -MP -MF $(DEPDIR)/gsnap_ssse3-sarray-search.Tpo -c -o gsnap_ssse3-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-sarray-search.Tpo $(DEPDIR)/gsnap_ssse3-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='gsnap_ssse3-sarray-search.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+
+gsnap_ssse3-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-merge-heap.Tpo -c -o gsnap_ssse3-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-merge-heap.Tpo $(DEPDIR)/gsnap_ssse3-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_ssse3-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnap_ssse3-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_ssse3-merge-heap.Tpo -c -o gsnap_ssse3-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-merge-heap.Tpo $(DEPDIR)/gsnap_ssse3-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnap_ssse3-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnap_ssse3-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-stage1hr.Tpo -c -o gsnap_ssse3-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-stage1hr.Tpo $(DEPDIR)/gsnap_ssse3-stage1hr.Po
@@ -30428,6 +35937,20 @@ gsnapl_avx2-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnapl_avx2-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-merge.Tpo -c -o gsnapl_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-merge.Tpo $(DEPDIR)/gsnapl_avx2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_avx2-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnapl_avx2-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx2-merge.Tpo -c -o gsnapl_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-merge.Tpo $(DEPDIR)/gsnapl_avx2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_avx2-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnapl_avx2-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-indexdb.Tpo -c -o gsnapl_avx2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-indexdb.Tpo $(DEPDIR)/gsnapl_avx2-indexdb.Po
@@ -30526,6 +36049,20 @@ gsnapl_avx2-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnapl_avx2-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-cigar.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-cigar.Tpo -c -o gsnapl_avx2-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-cigar.Tpo $(DEPDIR)/gsnapl_avx2-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_avx2-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnapl_avx2-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-cigar.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx2-cigar.Tpo -c -o gsnapl_avx2-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-cigar.Tpo $(DEPDIR)/gsnapl_avx2-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_avx2-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnapl_avx2-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-samprint.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-samprint.Tpo -c -o gsnapl_avx2-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-samprint.Tpo $(DEPDIR)/gsnapl_avx2-samprint.Po
@@ -31072,6 +36609,20 @@ gsnapl_avx2-bitpack64-access.obj: bitpack64-access.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
 
+gsnapl_avx2-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-merge-heap.Tpo -c -o gsnapl_avx2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-merge-heap.Tpo $(DEPDIR)/gsnapl_avx2-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_avx2-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnapl_avx2-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx2-merge-heap.Tpo -c -o gsnapl_avx2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-merge-heap.Tpo $(DEPDIR)/gsnapl_avx2-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_avx2-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnapl_avx2-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-stage1hr.Tpo -c -o gsnapl_avx2-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-stage1hr.Tpo $(DEPDIR)/gsnapl_avx2-stage1hr.Po
@@ -31240,6 +36791,1266 @@ gsnapl_avx2-gsnap.obj: gsnap.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi`
 
+gsnapl_avx512-except.o: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-except.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-except.Tpo -c -o gsnapl_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-except.Tpo $(DEPDIR)/gsnapl_avx512-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='gsnapl_avx512-except.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
+
+gsnapl_avx512-except.obj: except.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-except.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-except.Tpo -c -o gsnapl_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-except.Tpo $(DEPDIR)/gsnapl_avx512-except.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='except.c' object='gsnapl_avx512-except.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi`
+
+gsnapl_avx512-assert.o: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-assert.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-assert.Tpo -c -o gsnapl_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-assert.Tpo $(DEPDIR)/gsnapl_avx512-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='gsnapl_avx512-assert.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c
+
+gsnapl_avx512-assert.obj: assert.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-assert.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-assert.Tpo -c -o gsnapl_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-assert.Tpo $(DEPDIR)/gsnapl_avx512-assert.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='assert.c' object='gsnapl_avx512-assert.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi`
+
+gsnapl_avx512-mem.o: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-mem.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-mem.Tpo -c -o gsnapl_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-mem.Tpo $(DEPDIR)/gsnapl_avx512-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='gsnapl_avx512-mem.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c
+
+gsnapl_avx512-mem.obj: mem.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-mem.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-mem.Tpo -c -o gsnapl_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-mem.Tpo $(DEPDIR)/gsnapl_avx512-mem.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mem.c' object='gsnapl_avx512-mem.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi`
+
+gsnapl_avx512-intlist.o: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-intlist.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-intlist.Tpo -c -o gsnapl_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-intlist.Tpo $(DEPDIR)/gsnapl_avx512-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='gsnapl_avx512-intlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c
+
+gsnapl_avx512-intlist.obj: intlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-intlist.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-intlist.Tpo -c -o gsnapl_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-intlist.Tpo $(DEPDIR)/gsnapl_avx512-intlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intlist.c' object='gsnapl_avx512-intlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi`
+
+gsnapl_avx512-list.o: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-list.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-list.Tpo -c -o gsnapl_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-list.Tpo $(DEPDIR)/gsnapl_avx512-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='gsnapl_avx512-list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c
+
+gsnapl_avx512-list.obj: list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-list.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-list.Tpo -c -o gsnapl_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-list.Tpo $(DEPDIR)/gsnapl_avx512-list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='list.c' object='gsnapl_avx512-list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi`
+
+gsnapl_avx512-littleendian.o: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-littleendian.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-littleendian.Tpo -c -o gsnapl_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-littleendian.Tpo $(DEPDIR)/gsnapl_avx512-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='gsnapl_avx512-littleendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c
+
+gsnapl_avx512-littleendian.obj: littleendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-littleendian.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-littleendian.Tpo -c -o gsnapl_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-littleendian.Tpo $(DEPDIR)/gsnapl_avx512-littleendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='littleendian.c' object='gsnapl_avx512-littleendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi`
+
+gsnapl_avx512-bigendian.o: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bigendian.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bigendian.Tpo -c -o gsnapl_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bigendian.Tpo $(DEPDIR)/gsnapl_avx512-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='gsnapl_avx512-bigendian.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c
+
+gsnapl_avx512-bigendian.obj: bigendian.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bigendian.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bigendian.Tpo -c -o gsnapl_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bigendian.Tpo $(DEPDIR)/gsnapl_avx512-bigendian.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bigendian.c' object='gsnapl_avx512-bigendian.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi`
+
+gsnapl_avx512-univinterval.o: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-univinterval.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-univinterval.Tpo -c -o gsnapl_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-univinterval.Tpo $(DEPDIR)/gsnapl_avx512-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='gsnapl_avx512-univinterval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c
+
+gsnapl_avx512-univinterval.obj: univinterval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-univinterval.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-univinterval.Tpo -c -o gsnapl_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-univinterval.Tpo $(DEPDIR)/gsnapl_avx512-univinterval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='univinterval.c' object='gsnapl_avx512-univinterval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi`
+
+gsnapl_avx512-interval.o: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-interval.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-interval.Tpo -c -o gsnapl_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-interval.Tpo $(DEPDIR)/gsnapl_avx512-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='gsnapl_avx512-interval.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c
+
+gsnapl_avx512-interval.obj: interval.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-interval.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-interval.Tpo -c -o gsnapl_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-interval.Tpo $(DEPDIR)/gsnapl_avx512-interval.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='interval.c' object='gsnapl_avx512-interval.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi`
+
+gsnapl_avx512-uintlist.o: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-uintlist.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-uintlist.Tpo -c -o gsnapl_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-uintlist.Tpo $(DEPDIR)/gsnapl_avx512-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='gsnapl_avx512-uintlist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c
+
+gsnapl_avx512-uintlist.obj: uintlist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-uintlist.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-uintlist.Tpo -c -o gsnapl_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-uintlist.Tpo $(DEPDIR)/gsnapl_avx512-uintlist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uintlist.c' object='gsnapl_avx512-uintlist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi`
+
+gsnapl_avx512-uint8list.o: uint8list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-uint8list.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-uint8list.Tpo -c -o gsnapl_avx512-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-uint8list.Tpo $(DEPDIR)/gsnapl_avx512-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uint8list.c' object='gsnapl_avx512-uint8list.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c
+
+gsnapl_avx512-uint8list.obj: uint8list.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-uint8list.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-uint8list.Tpo -c -o gsnapl_avx512-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-uint8list.Tpo $(DEPDIR)/gsnapl_avx512-uint8list.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='uint8list.c' object='gsnapl_avx512-uint8list.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi`
+
+gsnapl_avx512-stopwatch.o: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stopwatch.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stopwatch.Tpo -c -o gsnapl_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stopwatch.Tpo $(DEPDIR)/gsnapl_avx512-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='gsnapl_avx512-stopwatch.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c
+
+gsnapl_avx512-stopwatch.obj: stopwatch.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stopwatch.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stopwatch.Tpo -c -o gsnapl_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stopwatch.Tpo $(DEPDIR)/gsnapl_avx512-stopwatch.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stopwatch.c' object='gsnapl_avx512-stopwatch.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi`
+
+gsnapl_avx512-semaphore.o: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-semaphore.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-semaphore.Tpo -c -o gsnapl_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-semaphore.Tpo $(DEPDIR)/gsnapl_avx512-semaphore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='gsnapl_avx512-semaphore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c
+
+gsnapl_avx512-semaphore.obj: semaphore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-semaphore.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-semaphore.Tpo -c -o gsnapl_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-semaphore.Tpo $(DEPDIR)/gsnapl_avx512-semaphore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='semaphore.c' object='gsnapl_avx512-semaphore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi`
+
+gsnapl_avx512-access.o: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-access.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-access.Tpo -c -o gsnapl_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-access.Tpo $(DEPDIR)/gsnapl_avx512-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='gsnapl_avx512-access.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c
+
+gsnapl_avx512-access.obj: access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-access.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-access.Tpo -c -o gsnapl_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-access.Tpo $(DEPDIR)/gsnapl_avx512-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='access.c' object='gsnapl_avx512-access.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi`
+
+gsnapl_avx512-filestring.o: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-filestring.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-filestring.Tpo -c -o gsnapl_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-filestring.Tpo $(DEPDIR)/gsnapl_avx512-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='gsnapl_avx512-filestring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c
+
+gsnapl_avx512-filestring.obj: filestring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-filestring.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-filestring.Tpo -c -o gsnapl_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-filestring.Tpo $(DEPDIR)/gsnapl_avx512-filestring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='filestring.c' object='gsnapl_avx512-filestring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi`
+
+gsnapl_avx512-iit-read-univ.o: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-iit-read-univ.Tpo -c -o gsnapl_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-iit-read-univ.Tpo $(DEPDIR)/gsnapl_avx512-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='gsnapl_avx512-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c
+
+gsnapl_avx512-iit-read-univ.obj: iit-read-univ.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-iit-read-univ.Tpo -c -o gsnapl_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-iit-read-univ.Tpo $(DEPDIR)/gsnapl_avx512-iit-read-univ.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read-univ.c' object='gsnapl_avx512-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi`
+
+gsnapl_avx512-iit-read.o: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-iit-read.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-iit-read.Tpo -c -o gsnapl_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-iit-read.Tpo $(DEPDIR)/gsnapl_avx512-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='gsnapl_avx512-iit-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c
+
+gsnapl_avx512-iit-read.obj: iit-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-iit-read.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-iit-read.Tpo -c -o gsnapl_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-iit-read.Tpo $(DEPDIR)/gsnapl_avx512-iit-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='iit-read.c' object='gsnapl_avx512-iit-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi`
+
+gsnapl_avx512-md5.o: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-md5.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-md5.Tpo -c -o gsnapl_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-md5.Tpo $(DEPDIR)/gsnapl_avx512-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='gsnapl_avx512-md5.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c
+
+gsnapl_avx512-md5.obj: md5.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-md5.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-md5.Tpo -c -o gsnapl_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-md5.Tpo $(DEPDIR)/gsnapl_avx512-md5.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='md5.c' object='gsnapl_avx512-md5.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi`
+
+gsnapl_avx512-bzip2.o: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bzip2.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bzip2.Tpo -c -o gsnapl_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bzip2.Tpo $(DEPDIR)/gsnapl_avx512-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='gsnapl_avx512-bzip2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c
+
+gsnapl_avx512-bzip2.obj: bzip2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bzip2.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bzip2.Tpo -c -o gsnapl_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bzip2.Tpo $(DEPDIR)/gsnapl_avx512-bzip2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bzip2.c' object='gsnapl_avx512-bzip2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi`
+
+gsnapl_avx512-sequence.o: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-sequence.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-sequence.Tpo -c -o gsnapl_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-sequence.Tpo $(DEPDIR)/gsnapl_avx512-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='gsnapl_avx512-sequence.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c
+
+gsnapl_avx512-sequence.obj: sequence.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-sequence.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-sequence.Tpo -c -o gsnapl_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-sequence.Tpo $(DEPDIR)/gsnapl_avx512-sequence.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sequence.c' object='gsnapl_avx512-sequence.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi`
+
+gsnapl_avx512-reader.o: reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-reader.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-reader.Tpo -c -o gsnapl_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-reader.Tpo $(DEPDIR)/gsnapl_avx512-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='reader.c' object='gsnapl_avx512-reader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c
+
+gsnapl_avx512-reader.obj: reader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-reader.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-reader.Tpo -c -o gsnapl_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-reader.Tpo $(DEPDIR)/gsnapl_avx512-reader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='reader.c' object='gsnapl_avx512-reader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi`
+
+gsnapl_avx512-genomicpos.o: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genomicpos.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genomicpos.Tpo -c -o gsnapl_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genomicpos.Tpo $(DEPDIR)/gsnapl_avx512-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='gsnapl_avx512-genomicpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c
+
+gsnapl_avx512-genomicpos.obj: genomicpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genomicpos.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genomicpos.Tpo -c -o gsnapl_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genomicpos.Tpo $(DEPDIR)/gsnapl_avx512-genomicpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genomicpos.c' object='gsnapl_avx512-genomicpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi`
+
+gsnapl_avx512-compress.o: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-compress.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-compress.Tpo -c -o gsnapl_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-compress.Tpo $(DEPDIR)/gsnapl_avx512-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='gsnapl_avx512-compress.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c
+
+gsnapl_avx512-compress.obj: compress.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-compress.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-compress.Tpo -c -o gsnapl_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-compress.Tpo $(DEPDIR)/gsnapl_avx512-compress.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='compress.c' object='gsnapl_avx512-compress.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi`
+
+gsnapl_avx512-genome.o: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome.Tpo -c -o gsnapl_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome.Tpo $(DEPDIR)/gsnapl_avx512-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='gsnapl_avx512-genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c
+
+gsnapl_avx512-genome.obj: genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome.Tpo -c -o gsnapl_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome.Tpo $(DEPDIR)/gsnapl_avx512-genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome.c' object='gsnapl_avx512-genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi`
+
+gsnapl_avx512-popcount.o: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-popcount.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-popcount.Tpo -c -o gsnapl_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-popcount.Tpo $(DEPDIR)/gsnapl_avx512-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='gsnapl_avx512-popcount.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c
+
+gsnapl_avx512-popcount.obj: popcount.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-popcount.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-popcount.Tpo -c -o gsnapl_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-popcount.Tpo $(DEPDIR)/gsnapl_avx512-popcount.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='popcount.c' object='gsnapl_avx512-popcount.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi`
+
+gsnapl_avx512-genome128_hr.o: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome128_hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome128_hr.Tpo -c -o gsnapl_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome128_hr.Tpo $(DEPDIR)/gsnapl_avx512-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='gsnapl_avx512-genome128_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c
+
+gsnapl_avx512-genome128_hr.obj: genome128_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome128_hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome128_hr.Tpo -c -o gsnapl_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome128_hr.Tpo $(DEPDIR)/gsnapl_avx512-genome128_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome128_hr.c' object='gsnapl_avx512-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi`
+
+gsnapl_avx512-genome_sites.o: genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome_sites.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome_sites.Tpo -c -o gsnapl_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome_sites.Tpo $(DEPDIR)/gsnapl_avx512-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome_sites.c' object='gsnapl_avx512-genome_sites.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c
+
+gsnapl_avx512-genome_sites.obj: genome_sites.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome_sites.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome_sites.Tpo -c -o gsnapl_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome_sites.Tpo $(DEPDIR)/gsnapl_avx512-genome_sites.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='genome_sites.c' object='gsnapl_avx512-genome_sites.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi`
+
+gsnapl_avx512-bitpack64-read.o: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-read.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-read.Tpo -c -o gsnapl_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-read.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='gsnapl_avx512-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c
+
+gsnapl_avx512-bitpack64-read.obj: bitpack64-read.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-read.Tpo -c -o gsnapl_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-read.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-read.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-read.c' object='gsnapl_avx512-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi`
+
+gsnapl_avx512-bitpack64-readtwo.o: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Tpo -c -o gsnapl_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='gsnapl_avx512-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c
+
+gsnapl_avx512-bitpack64-readtwo.obj: bitpack64-readtwo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Tpo -c -o gsnapl_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-readtwo.c' object='gsnapl_avx512-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
+
+gsnapl_avx512-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-merge.Tpo -c -o gsnapl_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-merge.Tpo $(DEPDIR)/gsnapl_avx512-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_avx512-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnapl_avx512-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-merge.Tpo -c -o gsnapl_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-merge.Tpo $(DEPDIR)/gsnapl_avx512-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_avx512-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
+gsnapl_avx512-indexdb.o: indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indexdb.Tpo -c -o gsnapl_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indexdb.Tpo $(DEPDIR)/gsnapl_avx512-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb.c' object='gsnapl_avx512-indexdb.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
+
+gsnapl_avx512-indexdb.obj: indexdb.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indexdb.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indexdb.Tpo -c -o gsnapl_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indexdb.Tpo $(DEPDIR)/gsnapl_avx512-indexdb.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb.c' object='gsnapl_avx512-indexdb.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi`
+
+gsnapl_avx512-indexdb_hr.o: indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indexdb_hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indexdb_hr.Tpo -c -o gsnapl_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indexdb_hr.Tpo $(DEPDIR)/gsnapl_avx512-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb_hr.c' object='gsnapl_avx512-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c
+
+gsnapl_avx512-indexdb_hr.obj: indexdb_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indexdb_hr.Tpo -c -o gsnapl_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indexdb_hr.Tpo $(DEPDIR)/gsnapl_avx512-indexdb_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indexdb_hr.c' object='gsnapl_avx512-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi`
+
+gsnapl_avx512-oligo.o: oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-oligo.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-oligo.Tpo -c -o gsnapl_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-oligo.Tpo $(DEPDIR)/gsnapl_avx512-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligo.c' object='gsnapl_avx512-oligo.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c
+
+gsnapl_avx512-oligo.obj: oligo.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-oligo.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-oligo.Tpo -c -o gsnapl_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-oligo.Tpo $(DEPDIR)/gsnapl_avx512-oligo.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligo.c' object='gsnapl_avx512-oligo.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi`
+
+gsnapl_avx512-chrom.o: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chrom.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chrom.Tpo -c -o gsnapl_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chrom.Tpo $(DEPDIR)/gsnapl_avx512-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='gsnapl_avx512-chrom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
+
+gsnapl_avx512-chrom.obj: chrom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chrom.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chrom.Tpo -c -o gsnapl_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chrom.Tpo $(DEPDIR)/gsnapl_avx512-chrom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrom.c' object='gsnapl_avx512-chrom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi`
+
+gsnapl_avx512-segmentpos.o: segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-segmentpos.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-segmentpos.Tpo -c -o gsnapl_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-segmentpos.Tpo $(DEPDIR)/gsnapl_avx512-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='segmentpos.c' object='gsnapl_avx512-segmentpos.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c
+
+gsnapl_avx512-segmentpos.obj: segmentpos.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-segmentpos.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-segmentpos.Tpo -c -o gsnapl_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-segmentpos.Tpo $(DEPDIR)/gsnapl_avx512-segmentpos.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='segmentpos.c' object='gsnapl_avx512-segmentpos.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi`
+
+gsnapl_avx512-chrnum.o: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chrnum.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chrnum.Tpo -c -o gsnapl_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chrnum.Tpo $(DEPDIR)/gsnapl_avx512-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='gsnapl_avx512-chrnum.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c
+
+gsnapl_avx512-chrnum.obj: chrnum.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chrnum.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chrnum.Tpo -c -o gsnapl_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chrnum.Tpo $(DEPDIR)/gsnapl_avx512-chrnum.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chrnum.c' object='gsnapl_avx512-chrnum.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi`
+
+gsnapl_avx512-maxent_hr.o: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-maxent_hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-maxent_hr.Tpo -c -o gsnapl_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-maxent_hr.Tpo $(DEPDIR)/gsnapl_avx512-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='gsnapl_avx512-maxent_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c
+
+gsnapl_avx512-maxent_hr.obj: maxent_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-maxent_hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-maxent_hr.Tpo -c -o gsnapl_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-maxent_hr.Tpo $(DEPDIR)/gsnapl_avx512-maxent_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent_hr.c' object='gsnapl_avx512-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
+
+gsnapl_avx512-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cigar.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cigar.Tpo -c -o gsnapl_avx512-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cigar.Tpo $(DEPDIR)/gsnapl_avx512-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_avx512-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnapl_avx512-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cigar.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cigar.Tpo -c -o gsnapl_avx512-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cigar.Tpo $(DEPDIR)/gsnapl_avx512-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_avx512-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
+gsnapl_avx512-samprint.o: samprint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-samprint.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-samprint.Tpo -c -o gsnapl_avx512-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-samprint.Tpo $(DEPDIR)/gsnapl_avx512-samprint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samprint.c' object='gsnapl_avx512-samprint.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
+
+gsnapl_avx512-samprint.obj: samprint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-samprint.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-samprint.Tpo -c -o gsnapl_avx512-samprint.obj `if test -f 'samprint.c'; then $(CYGPATH_W) 'samprint.c'; else $(CYGPATH_W) '$(srcdir)/samprint.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-samprint.Tpo $(DEPDIR)/gsnapl_avx512-samprint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samprint.c' object='gsnapl_avx512-samprint.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-samprint.obj `if test -f 'samprint.c'; then $(CYGPATH_W) 'samprint.c'; else $(CYGPATH_W) '$(srcdir)/samprint.c'; fi`
+
+gsnapl_avx512-mapq.o: mapq.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-mapq.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-mapq.Tpo -c -o gsnapl_avx512-mapq.o `test -f 'mapq.c' || echo '$(srcdir)/'`mapq.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-mapq.Tpo $(DEPDIR)/gsnapl_avx512-mapq.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mapq.c' object='gsnapl_avx512-mapq.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-mapq.o `test -f 'mapq.c' || echo '$(srcdir)/'`mapq.c
+
+gsnapl_avx512-mapq.obj: mapq.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-mapq.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-mapq.Tpo -c -o gsnapl_avx512-mapq.obj `if test -f 'mapq.c'; then $(CYGPATH_W) 'mapq.c'; else $(CYGPATH_W) '$(srcdir)/mapq.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-mapq.Tpo $(DEPDIR)/gsnapl_avx512-mapq.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='mapq.c' object='gsnapl_avx512-mapq.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-mapq.obj `if test -f 'mapq.c'; then $(CYGPATH_W) 'mapq.c'; else $(CYGPATH_W) '$(srcdir)/mapq.c'; fi`
+
+gsnapl_avx512-shortread.o: shortread.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-shortread.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-shortread.Tpo -c -o gsnapl_avx512-shortread.o `test -f 'shortread.c' || echo '$(srcdir)/'`shortread.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-shortread.Tpo $(DEPDIR)/gsnapl_avx512-shortread.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='shortread.c' object='gsnapl_avx512-shortread.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-shortread.o `test -f 'shortread.c' || echo '$(srcdir)/'`shortread.c
+
+gsnapl_avx512-shortread.obj: shortread.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-shortread.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-shortread.Tpo -c -o gsnapl_avx512-shortread.obj `if test -f 'shortread.c'; then $(CYGPATH_W) 'shortread.c'; else $(CYGPATH_W) '$(srcdir)/shortread.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-shortread.Tpo $(DEPDIR)/gsnapl_avx512-shortread.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='shortread.c' object='gsnapl_avx512-shortread.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-shortread.obj `if test -f 'shortread.c'; then $(CYGPATH_W) 'shortread.c'; else $(CYGPATH_W) '$(srcdir)/shortread.c'; fi`
+
+gsnapl_avx512-substring.o: substring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-substring.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-substring.Tpo -c -o gsnapl_avx512-substring.o `test -f 'substring.c' || echo '$(srcdir)/'`substring.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-substring.Tpo $(DEPDIR)/gsnapl_avx512-substring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='substring.c' object='gsnapl_avx512-substring.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-substring.o `test -f 'substring.c' || echo '$(srcdir)/'`substring.c
+
+gsnapl_avx512-substring.obj: substring.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-substring.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-substring.Tpo -c -o gsnapl_avx512-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-substring.Tpo $(DEPDIR)/gsnapl_avx512-substring.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='substring.c' object='gsnapl_avx512-substring.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi`
+
+gsnapl_avx512-junction.o: junction.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-junction.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-junction.Tpo -c -o gsnapl_avx512-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-junction.Tpo $(DEPDIR)/gsnapl_avx512-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='junction.c' object='gsnapl_avx512-junction.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c
+
+gsnapl_avx512-junction.obj: junction.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-junction.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-junction.Tpo -c -o gsnapl_avx512-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-junction.Tpo $(DEPDIR)/gsnapl_avx512-junction.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='junction.c' object='gsnapl_avx512-junction.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi`
+
+gsnapl_avx512-stage3hr.o: stage3hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage3hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage3hr.Tpo -c -o gsnapl_avx512-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage3hr.Tpo $(DEPDIR)/gsnapl_avx512-stage3hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3hr.c' object='gsnapl_avx512-stage3hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c
+
+gsnapl_avx512-stage3hr.obj: stage3hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage3hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage3hr.Tpo -c -o gsnapl_avx512-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage3hr.Tpo $(DEPDIR)/gsnapl_avx512-stage3hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3hr.c' object='gsnapl_avx512-stage3hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi`
+
+gsnapl_avx512-spanningelt.o: spanningelt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-spanningelt.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-spanningelt.Tpo -c -o gsnapl_avx512-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-spanningelt.Tpo $(DEPDIR)/gsnapl_avx512-spanningelt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='spanningelt.c' object='gsnapl_avx512-spanningelt.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c
+
+gsnapl_avx512-spanningelt.obj: spanningelt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-spanningelt.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-spanningelt.Tpo -c -o gsnapl_avx512-spanningelt.obj `if test -f 'spanningelt.c'; then $(CYGPATH_W) 'spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/spanningelt.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-spanningelt.Tpo $(DEPDIR)/gsnapl_avx512-spanningelt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='spanningelt.c' object='gsnapl_avx512-spanningelt.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-spanningelt.obj `if test -f 'spanningelt.c'; then $(CYGPATH_W) 'spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/spanningelt.c'; fi`
+
+gsnapl_avx512-cmet.o: cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cmet.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cmet.Tpo -c -o gsnapl_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cmet.Tpo $(DEPDIR)/gsnapl_avx512-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cmet.c' object='gsnapl_avx512-cmet.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c
+
+gsnapl_avx512-cmet.obj: cmet.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cmet.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cmet.Tpo -c -o gsnapl_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cmet.Tpo $(DEPDIR)/gsnapl_avx512-cmet.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cmet.c' object='gsnapl_avx512-cmet.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi`
+
+gsnapl_avx512-atoi.o: atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-atoi.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-atoi.Tpo -c -o gsnapl_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-atoi.Tpo $(DEPDIR)/gsnapl_avx512-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='atoi.c' object='gsnapl_avx512-atoi.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c
+
+gsnapl_avx512-atoi.obj: atoi.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-atoi.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-atoi.Tpo -c -o gsnapl_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-atoi.Tpo $(DEPDIR)/gsnapl_avx512-atoi.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='atoi.c' object='gsnapl_avx512-atoi.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
+
+gsnapl_avx512-maxent.o: maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-maxent.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-maxent.Tpo -c -o gsnapl_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-maxent.Tpo $(DEPDIR)/gsnapl_avx512-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent.c' object='gsnapl_avx512-maxent.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
+
+gsnapl_avx512-maxent.obj: maxent.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-maxent.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-maxent.Tpo -c -o gsnapl_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-maxent.Tpo $(DEPDIR)/gsnapl_avx512-maxent.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='maxent.c' object='gsnapl_avx512-maxent.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi`
+
+gsnapl_avx512-pair.o: pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pair.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pair.Tpo -c -o gsnapl_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pair.Tpo $(DEPDIR)/gsnapl_avx512-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pair.c' object='gsnapl_avx512-pair.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c
+
+gsnapl_avx512-pair.obj: pair.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pair.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pair.Tpo -c -o gsnapl_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pair.Tpo $(DEPDIR)/gsnapl_avx512-pair.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pair.c' object='gsnapl_avx512-pair.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi`
+
+gsnapl_avx512-pairpool.o: pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pairpool.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pairpool.Tpo -c -o gsnapl_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pairpool.Tpo $(DEPDIR)/gsnapl_avx512-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pairpool.c' object='gsnapl_avx512-pairpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c
+
+gsnapl_avx512-pairpool.obj: pairpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pairpool.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pairpool.Tpo -c -o gsnapl_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pairpool.Tpo $(DEPDIR)/gsnapl_avx512-pairpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pairpool.c' object='gsnapl_avx512-pairpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi`
+
+gsnapl_avx512-diag.o: diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-diag.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-diag.Tpo -c -o gsnapl_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-diag.Tpo $(DEPDIR)/gsnapl_avx512-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diag.c' object='gsnapl_avx512-diag.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c
+
+gsnapl_avx512-diag.obj: diag.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-diag.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-diag.Tpo -c -o gsnapl_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-diag.Tpo $(DEPDIR)/gsnapl_avx512-diag.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diag.c' object='gsnapl_avx512-diag.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi`
+
+gsnapl_avx512-diagpool.o: diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-diagpool.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-diagpool.Tpo -c -o gsnapl_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-diagpool.Tpo $(DEPDIR)/gsnapl_avx512-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagpool.c' object='gsnapl_avx512-diagpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c
+
+gsnapl_avx512-diagpool.obj: diagpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-diagpool.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-diagpool.Tpo -c -o gsnapl_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-diagpool.Tpo $(DEPDIR)/gsnapl_avx512-diagpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='diagpool.c' object='gsnapl_avx512-diagpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi`
+
+gsnapl_avx512-orderstat.o: orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-orderstat.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-orderstat.Tpo -c -o gsnapl_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-orderstat.Tpo $(DEPDIR)/gsnapl_avx512-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='orderstat.c' object='gsnapl_avx512-orderstat.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c
+
+gsnapl_avx512-orderstat.obj: orderstat.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-orderstat.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-orderstat.Tpo -c -o gsnapl_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-orderstat.Tpo $(DEPDIR)/gsnapl_avx512-orderstat.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='orderstat.c' object='gsnapl_avx512-orderstat.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi`
+
+gsnapl_avx512-oligoindex_hr.o: oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Tpo -c -o gsnapl_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Tpo $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligoindex_hr.c' object='gsnapl_avx512-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c
+
+gsnapl_avx512-oligoindex_hr.obj: oligoindex_hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Tpo -c -o gsnapl_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Tpo $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='oligoindex_hr.c' object='gsnapl_avx512-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi`
+
+gsnapl_avx512-cellpool.o: cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cellpool.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cellpool.Tpo -c -o gsnapl_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cellpool.Tpo $(DEPDIR)/gsnapl_avx512-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cellpool.c' object='gsnapl_avx512-cellpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c
+
+gsnapl_avx512-cellpool.obj: cellpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cellpool.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cellpool.Tpo -c -o gsnapl_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cellpool.Tpo $(DEPDIR)/gsnapl_avx512-cellpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cellpool.c' object='gsnapl_avx512-cellpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi`
+
+gsnapl_avx512-stage2.o: stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage2.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage2.Tpo -c -o gsnapl_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage2.Tpo $(DEPDIR)/gsnapl_avx512-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage2.c' object='gsnapl_avx512-stage2.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c
+
+gsnapl_avx512-stage2.obj: stage2.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage2.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage2.Tpo -c -o gsnapl_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage2.Tpo $(DEPDIR)/gsnapl_avx512-stage2.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage2.c' object='gsnapl_avx512-stage2.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi`
+
+gsnapl_avx512-intron.o: intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-intron.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-intron.Tpo -c -o gsnapl_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-intron.Tpo $(DEPDIR)/gsnapl_avx512-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intron.c' object='gsnapl_avx512-intron.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c
+
+gsnapl_avx512-intron.obj: intron.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-intron.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-intron.Tpo -c -o gsnapl_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-intron.Tpo $(DEPDIR)/gsnapl_avx512-intron.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='intron.c' object='gsnapl_avx512-intron.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi`
+
+gsnapl_avx512-boyer-moore.o: boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-boyer-moore.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-boyer-moore.Tpo -c -o gsnapl_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-boyer-moore.Tpo $(DEPDIR)/gsnapl_avx512-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='boyer-moore.c' object='gsnapl_avx512-boyer-moore.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c
+
+gsnapl_avx512-boyer-moore.obj: boyer-moore.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-boyer-moore.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-boyer-moore.Tpo -c -o gsnapl_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-boyer-moore.Tpo $(DEPDIR)/gsnapl_avx512-boyer-moore.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='boyer-moore.c' object='gsnapl_avx512-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi`
+
+gsnapl_avx512-changepoint.o: changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-changepoint.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-changepoint.Tpo -c -o gsnapl_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-changepoint.Tpo $(DEPDIR)/gsnapl_avx512-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='changepoint.c' object='gsnapl_avx512-changepoint.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c
+
+gsnapl_avx512-changepoint.obj: changepoint.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-changepoint.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-changepoint.Tpo -c -o gsnapl_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-changepoint.Tpo $(DEPDIR)/gsnapl_avx512-changepoint.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='changepoint.c' object='gsnapl_avx512-changepoint.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi`
+
+gsnapl_avx512-pbinom.o: pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pbinom.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pbinom.Tpo -c -o gsnapl_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pbinom.Tpo $(DEPDIR)/gsnapl_avx512-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pbinom.c' object='gsnapl_avx512-pbinom.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c
+
+gsnapl_avx512-pbinom.obj: pbinom.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pbinom.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pbinom.Tpo -c -o gsnapl_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pbinom.Tpo $(DEPDIR)/gsnapl_avx512-pbinom.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='pbinom.c' object='gsnapl_avx512-pbinom.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi`
+
+gsnapl_avx512-dynprog.o: dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog.Tpo -c -o gsnapl_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog.Tpo $(DEPDIR)/gsnapl_avx512-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog.c' object='gsnapl_avx512-dynprog.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c
+
+gsnapl_avx512-dynprog.obj: dynprog.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog.Tpo -c -o gsnapl_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog.Tpo $(DEPDIR)/gsnapl_avx512-dynprog.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog.c' object='gsnapl_avx512-dynprog.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi`
+
+gsnapl_avx512-dynprog_simd.o: dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_simd.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_simd.Tpo -c -o gsnapl_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_simd.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_simd.c' object='gsnapl_avx512-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c
+
+gsnapl_avx512-dynprog_simd.obj: dynprog_simd.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_simd.Tpo -c -o gsnapl_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_simd.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_simd.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_simd.c' object='gsnapl_avx512-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi`
+
+gsnapl_avx512-dynprog_single.o: dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_single.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_single.Tpo -c -o gsnapl_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_single.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_single.c' object='gsnapl_avx512-dynprog_single.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c
+
+gsnapl_avx512-dynprog_single.obj: dynprog_single.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_single.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_single.Tpo -c -o gsnapl_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_single.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_single.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_single.c' object='gsnapl_avx512-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi`
+
+gsnapl_avx512-dynprog_genome.o: dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_genome.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_genome.Tpo -c -o gsnapl_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_genome.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_genome.c' object='gsnapl_avx512-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c
+
+gsnapl_avx512-dynprog_genome.obj: dynprog_genome.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_genome.Tpo -c -o gsnapl_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_genome.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_genome.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_genome.c' object='gsnapl_avx512-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi`
+
+gsnapl_avx512-dynprog_cdna.o: dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Tpo -c -o gsnapl_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_cdna.c' object='gsnapl_avx512-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c
+
+gsnapl_avx512-dynprog_cdna.obj: dynprog_cdna.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Tpo -c -o gsnapl_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_cdna.c' object='gsnapl_avx512-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi`
+
+gsnapl_avx512-dynprog_end.o: dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_end.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_end.Tpo -c -o gsnapl_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_end.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_end.c' object='gsnapl_avx512-dynprog_end.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c
+
+gsnapl_avx512-dynprog_end.obj: dynprog_end.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_end.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_end.Tpo -c -o gsnapl_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_end.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_end.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='dynprog_end.c' object='gsnapl_avx512-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi`
+
+gsnapl_avx512-gbuffer.o: gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-gbuffer.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-gbuffer.Tpo -c -o gsnapl_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-gbuffer.Tpo $(DEPDIR)/gsnapl_avx512-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gbuffer.c' object='gsnapl_avx512-gbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c
+
+gsnapl_avx512-gbuffer.obj: gbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-gbuffer.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-gbuffer.Tpo -c -o gsnapl_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-gbuffer.Tpo $(DEPDIR)/gsnapl_avx512-gbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gbuffer.c' object='gsnapl_avx512-gbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi`
+
+gsnapl_avx512-doublelist.o: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-doublelist.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-doublelist.Tpo -c -o gsnapl_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-doublelist.Tpo $(DEPDIR)/gsnapl_avx512-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='gsnapl_avx512-doublelist.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c
+
+gsnapl_avx512-doublelist.obj: doublelist.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-doublelist.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-doublelist.Tpo -c -o gsnapl_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-doublelist.Tpo $(DEPDIR)/gsnapl_avx512-doublelist.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='doublelist.c' object='gsnapl_avx512-doublelist.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi`
+
+gsnapl_avx512-smooth.o: smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-smooth.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-smooth.Tpo -c -o gsnapl_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-smooth.Tpo $(DEPDIR)/gsnapl_avx512-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='smooth.c' object='gsnapl_avx512-smooth.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c
+
+gsnapl_avx512-smooth.obj: smooth.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-smooth.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-smooth.Tpo -c -o gsnapl_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-smooth.Tpo $(DEPDIR)/gsnapl_avx512-smooth.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='smooth.c' object='gsnapl_avx512-smooth.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi`
+
+gsnapl_avx512-chimera.o: chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chimera.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chimera.Tpo -c -o gsnapl_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chimera.Tpo $(DEPDIR)/gsnapl_avx512-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chimera.c' object='gsnapl_avx512-chimera.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c
+
+gsnapl_avx512-chimera.obj: chimera.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chimera.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chimera.Tpo -c -o gsnapl_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chimera.Tpo $(DEPDIR)/gsnapl_avx512-chimera.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='chimera.c' object='gsnapl_avx512-chimera.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi`
+
+gsnapl_avx512-stage3.o: stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage3.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage3.Tpo -c -o gsnapl_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage3.Tpo $(DEPDIR)/gsnapl_avx512-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3.c' object='gsnapl_avx512-stage3.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c
+
+gsnapl_avx512-stage3.obj: stage3.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage3.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage3.Tpo -c -o gsnapl_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage3.Tpo $(DEPDIR)/gsnapl_avx512-stage3.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage3.c' object='gsnapl_avx512-stage3.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi`
+
+gsnapl_avx512-splicestringpool.o: splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicestringpool.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicestringpool.Tpo -c -o gsnapl_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicestringpool.Tpo $(DEPDIR)/gsnapl_avx512-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicestringpool.c' object='gsnapl_avx512-splicestringpool.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c
+
+gsnapl_avx512-splicestringpool.obj: splicestringpool.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicestringpool.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicestringpool.Tpo -c -o gsnapl_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicestringpool.Tpo $(DEPDIR)/gsnapl_avx512-splicestringpool.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicestringpool.c' object='gsnapl_avx512-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi`
+
+gsnapl_avx512-splicetrie_build.o: splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicetrie_build.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicetrie_build.Tpo -c -o gsnapl_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicetrie_build.Tpo $(DEPDIR)/gsnapl_avx512-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie_build.c' object='gsnapl_avx512-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c
+
+gsnapl_avx512-splicetrie_build.obj: splicetrie_build.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicetrie_build.Tpo -c -o gsnapl_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicetrie_build.Tpo $(DEPDIR)/gsnapl_avx512-splicetrie_build.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie_build.c' object='gsnapl_avx512-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi`
+
+gsnapl_avx512-splicetrie.o: splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicetrie.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicetrie.Tpo -c -o gsnapl_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicetrie.Tpo $(DEPDIR)/gsnapl_avx512-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie.c' object='gsnapl_avx512-splicetrie.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c
+
+gsnapl_avx512-splicetrie.obj: splicetrie.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicetrie.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicetrie.Tpo -c -o gsnapl_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicetrie.Tpo $(DEPDIR)/gsnapl_avx512-splicetrie.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splicetrie.c' object='gsnapl_avx512-splicetrie.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi`
+
+gsnapl_avx512-splice.o: splice.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splice.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splice.Tpo -c -o gsnapl_avx512-splice.o `test -f 'splice.c' || echo '$(srcdir)/'`splice.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splice.Tpo $(DEPDIR)/gsnapl_avx512-splice.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splice.c' object='gsnapl_avx512-splice.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splice.o `test -f 'splice.c' || echo '$(srcdir)/'`splice.c
+
+gsnapl_avx512-splice.obj: splice.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splice.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splice.Tpo -c -o gsnapl_avx512-splice.obj `if test -f 'splice.c'; then $(CYGPATH_W) 'splice.c'; else $(CYGPATH_W) '$(srcdir)/splice.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splice.Tpo $(DEPDIR)/gsnapl_avx512-splice.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='splice.c' object='gsnapl_avx512-splice.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splice.obj `if test -f 'splice.c'; then $(CYGPATH_W) 'splice.c'; else $(CYGPATH_W) '$(srcdir)/splice.c'; fi`
+
+gsnapl_avx512-indel.o: indel.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indel.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indel.Tpo -c -o gsnapl_avx512-indel.o `test -f 'indel.c' || echo '$(srcdir)/'`indel.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indel.Tpo $(DEPDIR)/gsnapl_avx512-indel.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indel.c' object='gsnapl_avx512-indel.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indel.o `test -f 'indel.c' || echo '$(srcdir)/'`indel.c
+
+gsnapl_avx512-indel.obj: indel.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indel.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indel.Tpo -c -o gsnapl_avx512-indel.obj `if test -f 'indel.c'; then $(CYGPATH_W) 'indel.c'; else $(CYGPATH_W) '$(srcdir)/indel.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indel.Tpo $(DEPDIR)/gsnapl_avx512-indel.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='indel.c' object='gsnapl_avx512-indel.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indel.obj `if test -f 'indel.c'; then $(CYGPATH_W) 'indel.c'; else $(CYGPATH_W) '$(srcdir)/indel.c'; fi`
+
+gsnapl_avx512-bitpack64-access.o: bitpack64-access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-access.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-access.Tpo -c -o gsnapl_avx512-bitpack64-access.o `test -f 'bitpack64-access.c' || echo '$(srcdir)/'`bitpack64-access.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-access.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-access.c' object='gsnapl_avx512-bitpack64-access.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-access.o `test -f 'bitpack64-access.c' || echo '$(srcdir)/'`bitpack64-access.c
+
+gsnapl_avx512-bitpack64-access.obj: bitpack64-access.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-access.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-access.Tpo -c -o gsnapl_avx512-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-access.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-access.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='bitpack64-access.c' object='gsnapl_avx512-bitpack64-access.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
+
+gsnapl_avx512-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-merge-heap.Tpo -c -o gsnapl_avx512-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-merge-heap.Tpo $(DEPDIR)/gsnapl_avx512-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_avx512-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnapl_avx512-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-merge-heap.Tpo -c -o gsnapl_avx512-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-merge-heap.Tpo $(DEPDIR)/gsnapl_avx512-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_avx512-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
+gsnapl_avx512-stage1hr.o: stage1hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage1hr.Tpo -c -o gsnapl_avx512-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage1hr.Tpo $(DEPDIR)/gsnapl_avx512-stage1hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage1hr.c' object='gsnapl_avx512-stage1hr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
+
+gsnapl_avx512-stage1hr.obj: stage1hr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage1hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage1hr.Tpo -c -o gsnapl_avx512-stage1hr.obj `if test -f 'stage1hr.c'; then $(CYGPATH_W) 'stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/stage1hr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage1hr.Tpo $(DEPDIR)/gsnapl_avx512-stage1hr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='stage1hr.c' object='gsnapl_avx512-stage1hr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage1hr.obj `if test -f 'stage1hr.c'; then $(CYGPATH_W) 'stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/stage1hr.c'; fi`
+
+gsnapl_avx512-request.o: request.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-request.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-request.Tpo -c -o gsnapl_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-request.Tpo $(DEPDIR)/gsnapl_avx512-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='request.c' object='gsnapl_avx512-request.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c
+
+gsnapl_avx512-request.obj: request.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-request.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-request.Tpo -c -o gsnapl_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-request.Tpo $(DEPDIR)/gsnapl_avx512-request.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='request.c' object='gsnapl_avx512-request.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi`
+
+gsnapl_avx512-resulthr.o: resulthr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-resulthr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-resulthr.Tpo -c -o gsnapl_avx512-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-resulthr.Tpo $(DEPDIR)/gsnapl_avx512-resulthr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='resulthr.c' object='gsnapl_avx512-resulthr.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c
+
+gsnapl_avx512-resulthr.obj: resulthr.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-resulthr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-resulthr.Tpo -c -o gsnapl_avx512-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-resulthr.Tpo $(DEPDIR)/gsnapl_avx512-resulthr.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='resulthr.c' object='gsnapl_avx512-resulthr.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi`
+
+gsnapl_avx512-output.o: output.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-output.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-output.Tpo -c -o gsnapl_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-output.Tpo $(DEPDIR)/gsnapl_avx512-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='output.c' object='gsnapl_avx512-output.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c
+
+gsnapl_avx512-output.obj: output.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-output.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-output.Tpo -c -o gsnapl_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-output.Tpo $(DEPDIR)/gsnapl_avx512-output.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='output.c' object='gsnapl_avx512-output.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi`
+
+gsnapl_avx512-inbuffer.o: inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-inbuffer.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-inbuffer.Tpo -c -o gsnapl_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-inbuffer.Tpo $(DEPDIR)/gsnapl_avx512-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='inbuffer.c' object='gsnapl_avx512-inbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c
+
+gsnapl_avx512-inbuffer.obj: inbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-inbuffer.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-inbuffer.Tpo -c -o gsnapl_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-inbuffer.Tpo $(DEPDIR)/gsnapl_avx512-inbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='inbuffer.c' object='gsnapl_avx512-inbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi`
+
+gsnapl_avx512-samheader.o: samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-samheader.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-samheader.Tpo -c -o gsnapl_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-samheader.Tpo $(DEPDIR)/gsnapl_avx512-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samheader.c' object='gsnapl_avx512-samheader.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c
+
+gsnapl_avx512-samheader.obj: samheader.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-samheader.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-samheader.Tpo -c -o gsnapl_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-samheader.Tpo $(DEPDIR)/gsnapl_avx512-samheader.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='samheader.c' object='gsnapl_avx512-samheader.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi`
+
+gsnapl_avx512-outbuffer.o: outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-outbuffer.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-outbuffer.Tpo -c -o gsnapl_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-outbuffer.Tpo $(DEPDIR)/gsnapl_avx512-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='outbuffer.c' object='gsnapl_avx512-outbuffer.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c
+
+gsnapl_avx512-outbuffer.obj: outbuffer.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-outbuffer.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-outbuffer.Tpo -c -o gsnapl_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-outbuffer.Tpo $(DEPDIR)/gsnapl_avx512-outbuffer.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='outbuffer.c' object='gsnapl_avx512-outbuffer.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi`
+
+gsnapl_avx512-datadir.o: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-datadir.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-datadir.Tpo -c -o gsnapl_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-datadir.Tpo $(DEPDIR)/gsnapl_avx512-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='gsnapl_avx512-datadir.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c
+
+gsnapl_avx512-datadir.obj: datadir.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-datadir.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-datadir.Tpo -c -o gsnapl_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-datadir.Tpo $(DEPDIR)/gsnapl_avx512-datadir.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='datadir.c' object='gsnapl_avx512-datadir.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi`
+
+gsnapl_avx512-parserange.o: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-parserange.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-parserange.Tpo -c -o gsnapl_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-parserange.Tpo $(DEPDIR)/gsnapl_avx512-parserange.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='gsnapl_avx512-parserange.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c
+
+gsnapl_avx512-parserange.obj: parserange.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-parserange.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-parserange.Tpo -c -o gsnapl_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-parserange.Tpo $(DEPDIR)/gsnapl_avx512-parserange.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='parserange.c' object='gsnapl_avx512-parserange.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi`
+
+gsnapl_avx512-getopt.o: getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-getopt.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-getopt.Tpo -c -o gsnapl_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-getopt.Tpo $(DEPDIR)/gsnapl_avx512-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt.c' object='gsnapl_avx512-getopt.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c
+
+gsnapl_avx512-getopt.obj: getopt.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-getopt.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-getopt.Tpo -c -o gsnapl_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-getopt.Tpo $(DEPDIR)/gsnapl_avx512-getopt.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt.c' object='gsnapl_avx512-getopt.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi`
+
+gsnapl_avx512-getopt1.o: getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-getopt1.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-getopt1.Tpo -c -o gsnapl_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-getopt1.Tpo $(DEPDIR)/gsnapl_avx512-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt1.c' object='gsnapl_avx512-getopt1.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c
+
+gsnapl_avx512-getopt1.obj: getopt1.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-getopt1.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-getopt1.Tpo -c -o gsnapl_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-getopt1.Tpo $(DEPDIR)/gsnapl_avx512-getopt1.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='getopt1.c' object='gsnapl_avx512-getopt1.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi`
+
+gsnapl_avx512-gsnap.o: gsnap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-gsnap.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-gsnap.Tpo -c -o gsnapl_avx512-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-gsnap.Tpo $(DEPDIR)/gsnapl_avx512-gsnap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gsnap.c' object='gsnapl_avx512-gsnap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c
+
+gsnapl_avx512-gsnap.obj: gsnap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-gsnap.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-gsnap.Tpo -c -o gsnapl_avx512-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-gsnap.Tpo $(DEPDIR)/gsnapl_avx512-gsnap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='gsnap.c' object='gsnapl_avx512-gsnap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi`
+
 gsnapl_nosimd-except.o: except.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-except.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-except.Tpo -c -o gsnapl_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-except.Tpo $(DEPDIR)/gsnapl_nosimd-except.Po
@@ -31646,6 +38457,20 @@ gsnapl_nosimd-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnapl_nosimd-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-merge.Tpo -c -o gsnapl_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-merge.Tpo $(DEPDIR)/gsnapl_nosimd-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_nosimd-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnapl_nosimd-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-merge.Tpo -c -o gsnapl_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-merge.Tpo $(DEPDIR)/gsnapl_nosimd-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_nosimd-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnapl_nosimd-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-indexdb.Tpo -c -o gsnapl_nosimd-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-indexdb.Tpo $(DEPDIR)/gsnapl_nosimd-indexdb.Po
@@ -31744,6 +38569,20 @@ gsnapl_nosimd-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnapl_nosimd-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-cigar.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-cigar.Tpo -c -o gsnapl_nosimd-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-cigar.Tpo $(DEPDIR)/gsnapl_nosimd-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_nosimd-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnapl_nosimd-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-cigar.obj -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-cigar.Tpo -c -o gsnapl_nosimd-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-cigar.Tpo $(DEPDIR)/gsnapl_nosimd-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_nosimd-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnapl_nosimd-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-samprint.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-samprint.Tpo -c -o gsnapl_nosimd-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-samprint.Tpo $(DEPDIR)/gsnapl_nosimd-samprint.Po
@@ -32290,6 +39129,20 @@ gsnapl_nosimd-bitpack64-access.obj: bitpack64-access.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
 
+gsnapl_nosimd-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-merge-heap.Tpo -c -o gsnapl_nosimd-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-merge-heap.Tpo $(DEPDIR)/gsnapl_nosimd-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_nosimd-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnapl_nosimd-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-merge-heap.Tpo -c -o gsnapl_nosimd-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-merge-heap.Tpo $(DEPDIR)/gsnapl_nosimd-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_nosimd-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnapl_nosimd-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-stage1hr.Tpo -c -o gsnapl_nosimd-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-stage1hr.Tpo $(DEPDIR)/gsnapl_nosimd-stage1hr.Po
@@ -32864,6 +39717,20 @@ gsnapl_sse2-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnapl_sse2-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-merge.Tpo -c -o gsnapl_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-merge.Tpo $(DEPDIR)/gsnapl_sse2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_sse2-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnapl_sse2-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse2-merge.Tpo -c -o gsnapl_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-merge.Tpo $(DEPDIR)/gsnapl_sse2-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_sse2-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnapl_sse2-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-indexdb.Tpo -c -o gsnapl_sse2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-indexdb.Tpo $(DEPDIR)/gsnapl_sse2-indexdb.Po
@@ -32962,6 +39829,20 @@ gsnapl_sse2-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnapl_sse2-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-cigar.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-cigar.Tpo -c -o gsnapl_sse2-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-cigar.Tpo $(DEPDIR)/gsnapl_sse2-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_sse2-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnapl_sse2-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-cigar.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse2-cigar.Tpo -c -o gsnapl_sse2-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-cigar.Tpo $(DEPDIR)/gsnapl_sse2-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_sse2-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnapl_sse2-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-samprint.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-samprint.Tpo -c -o gsnapl_sse2-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-samprint.Tpo $(DEPDIR)/gsnapl_sse2-samprint.Po
@@ -33508,6 +40389,20 @@ gsnapl_sse2-bitpack64-access.obj: bitpack64-access.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
 
+gsnapl_sse2-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-merge-heap.Tpo -c -o gsnapl_sse2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-merge-heap.Tpo $(DEPDIR)/gsnapl_sse2-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_sse2-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnapl_sse2-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse2-merge-heap.Tpo -c -o gsnapl_sse2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-merge-heap.Tpo $(DEPDIR)/gsnapl_sse2-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_sse2-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnapl_sse2-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-stage1hr.Tpo -c -o gsnapl_sse2-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-stage1hr.Tpo $(DEPDIR)/gsnapl_sse2-stage1hr.Po
@@ -34082,6 +40977,20 @@ gsnapl_sse41-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnapl_sse41-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-merge.Tpo -c -o gsnapl_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-merge.Tpo $(DEPDIR)/gsnapl_sse41-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_sse41-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnapl_sse41-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse41-merge.Tpo -c -o gsnapl_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-merge.Tpo $(DEPDIR)/gsnapl_sse41-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_sse41-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnapl_sse41-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-indexdb.Tpo -c -o gsnapl_sse41-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-indexdb.Tpo $(DEPDIR)/gsnapl_sse41-indexdb.Po
@@ -34180,6 +41089,20 @@ gsnapl_sse41-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnapl_sse41-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-cigar.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-cigar.Tpo -c -o gsnapl_sse41-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-cigar.Tpo $(DEPDIR)/gsnapl_sse41-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_sse41-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnapl_sse41-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-cigar.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse41-cigar.Tpo -c -o gsnapl_sse41-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-cigar.Tpo $(DEPDIR)/gsnapl_sse41-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_sse41-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnapl_sse41-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-samprint.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-samprint.Tpo -c -o gsnapl_sse41-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-samprint.Tpo $(DEPDIR)/gsnapl_sse41-samprint.Po
@@ -34726,6 +41649,20 @@ gsnapl_sse41-bitpack64-access.obj: bitpack64-access.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
 
+gsnapl_sse41-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-merge-heap.Tpo -c -o gsnapl_sse41-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-merge-heap.Tpo $(DEPDIR)/gsnapl_sse41-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_sse41-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnapl_sse41-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse41-merge-heap.Tpo -c -o gsnapl_sse41-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-merge-heap.Tpo $(DEPDIR)/gsnapl_sse41-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_sse41-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnapl_sse41-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-stage1hr.Tpo -c -o gsnapl_sse41-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-stage1hr.Tpo $(DEPDIR)/gsnapl_sse41-stage1hr.Po
@@ -35300,6 +42237,20 @@ gsnapl_sse42-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnapl_sse42-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-merge.Tpo -c -o gsnapl_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-merge.Tpo $(DEPDIR)/gsnapl_sse42-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_sse42-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnapl_sse42-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse42-merge.Tpo -c -o gsnapl_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-merge.Tpo $(DEPDIR)/gsnapl_sse42-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_sse42-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnapl_sse42-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-indexdb.Tpo -c -o gsnapl_sse42-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-indexdb.Tpo $(DEPDIR)/gsnapl_sse42-indexdb.Po
@@ -35398,6 +42349,20 @@ gsnapl_sse42-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnapl_sse42-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-cigar.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-cigar.Tpo -c -o gsnapl_sse42-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-cigar.Tpo $(DEPDIR)/gsnapl_sse42-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_sse42-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnapl_sse42-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-cigar.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse42-cigar.Tpo -c -o gsnapl_sse42-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-cigar.Tpo $(DEPDIR)/gsnapl_sse42-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_sse42-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnapl_sse42-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-samprint.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-samprint.Tpo -c -o gsnapl_sse42-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-samprint.Tpo $(DEPDIR)/gsnapl_sse42-samprint.Po
@@ -35944,6 +42909,20 @@ gsnapl_sse42-bitpack64-access.obj: bitpack64-access.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
 
+gsnapl_sse42-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-merge-heap.Tpo -c -o gsnapl_sse42-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-merge-heap.Tpo $(DEPDIR)/gsnapl_sse42-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_sse42-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnapl_sse42-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse42-merge-heap.Tpo -c -o gsnapl_sse42-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-merge-heap.Tpo $(DEPDIR)/gsnapl_sse42-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_sse42-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnapl_sse42-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-stage1hr.Tpo -c -o gsnapl_sse42-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-stage1hr.Tpo $(DEPDIR)/gsnapl_sse42-stage1hr.Po
@@ -36518,6 +43497,20 @@ gsnapl_ssse3-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+gsnapl_ssse3-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-merge.Tpo -c -o gsnapl_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-merge.Tpo $(DEPDIR)/gsnapl_ssse3-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_ssse3-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+gsnapl_ssse3-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-merge.Tpo -c -o gsnapl_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-merge.Tpo $(DEPDIR)/gsnapl_ssse3-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='gsnapl_ssse3-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 gsnapl_ssse3-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-indexdb.Tpo -c -o gsnapl_ssse3-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-indexdb.Tpo $(DEPDIR)/gsnapl_ssse3-indexdb.Po
@@ -36616,6 +43609,20 @@ gsnapl_ssse3-maxent_hr.obj: maxent_hr.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi`
 
+gsnapl_ssse3-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-cigar.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-cigar.Tpo -c -o gsnapl_ssse3-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-cigar.Tpo $(DEPDIR)/gsnapl_ssse3-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_ssse3-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+gsnapl_ssse3-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-cigar.obj -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-cigar.Tpo -c -o gsnapl_ssse3-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-cigar.Tpo $(DEPDIR)/gsnapl_ssse3-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='gsnapl_ssse3-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 gsnapl_ssse3-samprint.o: samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-samprint.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-samprint.Tpo -c -o gsnapl_ssse3-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-samprint.Tpo $(DEPDIR)/gsnapl_ssse3-samprint.Po
@@ -37162,6 +44169,20 @@ gsnapl_ssse3-bitpack64-access.obj: bitpack64-access.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
 
+gsnapl_ssse3-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-merge-heap.Tpo -c -o gsnapl_ssse3-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-merge-heap.Tpo $(DEPDIR)/gsnapl_ssse3-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_ssse3-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+gsnapl_ssse3-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-merge-heap.Tpo -c -o gsnapl_ssse3-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-merge-heap.Tpo $(DEPDIR)/gsnapl_ssse3-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='gsnapl_ssse3-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 gsnapl_ssse3-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-stage1hr.Tpo -c -o gsnapl_ssse3-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-stage1hr.Tpo $(DEPDIR)/gsnapl_ssse3-stage1hr.Po
@@ -39444,6 +46465,20 @@ uniqscan-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+uniqscan-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-merge.o -MD -MP -MF $(DEPDIR)/uniqscan-merge.Tpo -c -o uniqscan-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-merge.Tpo $(DEPDIR)/uniqscan-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='uniqscan-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+uniqscan-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-merge.obj -MD -MP -MF $(DEPDIR)/uniqscan-merge.Tpo -c -o uniqscan-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-merge.Tpo $(DEPDIR)/uniqscan-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='uniqscan-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 uniqscan-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-indexdb.o -MD -MP -MF $(DEPDIR)/uniqscan-indexdb.Tpo -c -o uniqscan-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-indexdb.Tpo $(DEPDIR)/uniqscan-indexdb.Po
@@ -39654,6 +46689,20 @@ uniqscan-atoi.obj: atoi.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
 
+uniqscan-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-cigar.o -MD -MP -MF $(DEPDIR)/uniqscan-cigar.Tpo -c -o uniqscan-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-cigar.Tpo $(DEPDIR)/uniqscan-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='uniqscan-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+uniqscan-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-cigar.obj -MD -MP -MF $(DEPDIR)/uniqscan-cigar.Tpo -c -o uniqscan-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-cigar.Tpo $(DEPDIR)/uniqscan-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='uniqscan-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 uniqscan-maxent.o: maxent.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-maxent.o -MD -MP -MF $(DEPDIR)/uniqscan-maxent.Tpo -c -o uniqscan-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-maxent.Tpo $(DEPDIR)/uniqscan-maxent.Po
@@ -40130,6 +47179,34 @@ uniqscan-sarray-read.obj: sarray-read.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi`
 
+uniqscan-sarray-search.o: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-sarray-search.o -MD -MP -MF $(DEPDIR)/uniqscan-sarray-search.Tpo -c -o uniqscan-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-sarray-search.Tpo $(DEPDIR)/uniqscan-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='uniqscan-sarray-search.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-sarray-search.o `test -f 'sarray-search.c' || echo '$(srcdir)/'`sarray-search.c
+
+uniqscan-sarray-search.obj: sarray-search.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-sarray-search.obj -MD -MP -MF $(DEPDIR)/uniqscan-sarray-search.Tpo -c -o uniqscan-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-sarray-search.Tpo $(DEPDIR)/uniqscan-sarray-search.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='sarray-search.c' object='uniqscan-sarray-search.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-sarray-search.obj `if test -f 'sarray-search.c'; then $(CYGPATH_W) 'sarray-search.c'; else $(CYGPATH_W) '$(srcdir)/sarray-search.c'; fi`
+
+uniqscan-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-merge-heap.o -MD -MP -MF $(DEPDIR)/uniqscan-merge-heap.Tpo -c -o uniqscan-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-merge-heap.Tpo $(DEPDIR)/uniqscan-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='uniqscan-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+uniqscan-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-merge-heap.obj -MD -MP -MF $(DEPDIR)/uniqscan-merge-heap.Tpo -c -o uniqscan-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-merge-heap.Tpo $(DEPDIR)/uniqscan-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='uniqscan-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 uniqscan-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-stage1hr.o -MD -MP -MF $(DEPDIR)/uniqscan-stage1hr.Tpo -c -o uniqscan-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-stage1hr.Tpo $(DEPDIR)/uniqscan-stage1hr.Po
@@ -40634,6 +47711,20 @@ uniqscanl-bitpack64-readtwo.obj: bitpack64-readtwo.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi`
 
+uniqscanl-merge.o: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-merge.o -MD -MP -MF $(DEPDIR)/uniqscanl-merge.Tpo -c -o uniqscanl-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-merge.Tpo $(DEPDIR)/uniqscanl-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='uniqscanl-merge.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c
+
+uniqscanl-merge.obj: merge.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-merge.obj -MD -MP -MF $(DEPDIR)/uniqscanl-merge.Tpo -c -o uniqscanl-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-merge.Tpo $(DEPDIR)/uniqscanl-merge.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge.c' object='uniqscanl-merge.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi`
+
 uniqscanl-indexdb.o: indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-indexdb.o -MD -MP -MF $(DEPDIR)/uniqscanl-indexdb.Tpo -c -o uniqscanl-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-indexdb.Tpo $(DEPDIR)/uniqscanl-indexdb.Po
@@ -40844,6 +47935,20 @@ uniqscanl-atoi.obj: atoi.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi`
 
+uniqscanl-cigar.o: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-cigar.o -MD -MP -MF $(DEPDIR)/uniqscanl-cigar.Tpo -c -o uniqscanl-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-cigar.Tpo $(DEPDIR)/uniqscanl-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='uniqscanl-cigar.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-cigar.o `test -f 'cigar.c' || echo '$(srcdir)/'`cigar.c
+
+uniqscanl-cigar.obj: cigar.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-cigar.obj -MD -MP -MF $(DEPDIR)/uniqscanl-cigar.Tpo -c -o uniqscanl-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-cigar.Tpo $(DEPDIR)/uniqscanl-cigar.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='cigar.c' object='uniqscanl-cigar.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-cigar.obj `if test -f 'cigar.c'; then $(CYGPATH_W) 'cigar.c'; else $(CYGPATH_W) '$(srcdir)/cigar.c'; fi`
+
 uniqscanl-maxent.o: maxent.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-maxent.o -MD -MP -MF $(DEPDIR)/uniqscanl-maxent.Tpo -c -o uniqscanl-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-maxent.Tpo $(DEPDIR)/uniqscanl-maxent.Po
@@ -41264,6 +48369,20 @@ uniqscanl-bitpack64-access.obj: bitpack64-access.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi`
 
+uniqscanl-merge-heap.o: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-merge-heap.o -MD -MP -MF $(DEPDIR)/uniqscanl-merge-heap.Tpo -c -o uniqscanl-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-merge-heap.Tpo $(DEPDIR)/uniqscanl-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='uniqscanl-merge-heap.o' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c
+
+uniqscanl-merge-heap.obj: merge-heap.c
+ at am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-merge-heap.obj -MD -MP -MF $(DEPDIR)/uniqscanl-merge-heap.Tpo -c -o uniqscanl-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+ at am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-merge-heap.Tpo $(DEPDIR)/uniqscanl-merge-heap.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	$(AM_V_CC)source='merge-heap.c' object='uniqscanl-merge-heap.obj' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@	$(AM_V_CC at am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi`
+
 uniqscanl-stage1hr.o: stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-stage1hr.o -MD -MP -MF $(DEPDIR)/uniqscanl-stage1hr.Tpo -c -o uniqscanl-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c
 @am__fastdepCC_TRUE@	$(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-stage1hr.Tpo $(DEPDIR)/uniqscanl-stage1hr.Po
@@ -41367,6 +48486,27 @@ mostlyclean-libtool:
 
 clean-libtool:
 	-rm -rf .libs _libs
+install-includeHEADERS: $(include_HEADERS)
+	@$(NORMAL_INSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	if test -n "$$list"; then \
+	  echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
+	  $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
+	fi; \
+	for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  echo "$$d$$p"; \
+	done | $(am__base_list) | \
+	while read files; do \
+	  echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(includedir)'"; \
+	  $(INSTALL_HEADER) $$files "$(DESTDIR)$(includedir)" || exit $$?; \
+	done
+
+uninstall-includeHEADERS:
+	@$(NORMAL_UNINSTALL)
+	@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+	files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+	dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
 
 ID: $(am__tagged_files)
 	$(am__define_uniq_tagged_files); mkid -fID $$unique
@@ -41452,9 +48592,11 @@ distdir: $(DISTFILES)
 	done
 check-am: all-am
 check: check-am
-all-am: Makefile $(PROGRAMS) config.h
+all-am: Makefile $(LTLIBRARIES) $(PROGRAMS) $(HEADERS) config.h
+install-binPROGRAMS: install-libLTLIBRARIES
+
 installdirs:
-	for dir in "$(DESTDIR)$(bindir)"; do \
+	for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" "$(DESTDIR)$(includedir)"; do \
 	  test -z "$$dir" || $(MKDIR_P) "$$dir"; \
 	done
 install: install-am
@@ -41490,7 +48632,8 @@ maintainer-clean-generic:
 	@echo "it deletes files that may require special tools to rebuild."
 clean: clean-am
 
-clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+clean-am: clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \
+	clean-libtool mostlyclean-am
 
 distclean: distclean-am
 	-rm -rf ./$(DEPDIR)
@@ -41510,13 +48653,13 @@ info: info-am
 
 info-am:
 
-install-data-am:
+install-data-am: install-includeHEADERS
 
 install-dvi: install-dvi-am
 
 install-dvi-am:
 
-install-exec-am: install-binPROGRAMS
+install-exec-am: install-binPROGRAMS install-libLTLIBRARIES
 
 install-html: install-html-am
 
@@ -41556,24 +48699,27 @@ ps: ps-am
 
 ps-am:
 
-uninstall-am: uninstall-binPROGRAMS
+uninstall-am: uninstall-binPROGRAMS uninstall-includeHEADERS \
+	uninstall-libLTLIBRARIES
 
 .MAKE: all install-am install-strip
 
 .PHONY: CTAGS GTAGS TAGS all all-am check check-am clean \
-	clean-binPROGRAMS clean-generic clean-libtool cscopelist-am \
-	ctags ctags-am distclean distclean-compile distclean-generic \
-	distclean-hdr distclean-libtool distclean-tags distdir dvi \
-	dvi-am html html-am info info-am install install-am \
-	install-binPROGRAMS install-data install-data-am install-dvi \
-	install-dvi-am install-exec install-exec-am install-html \
-	install-html-am install-info install-info-am install-man \
-	install-pdf install-pdf-am install-ps install-ps-am \
-	install-strip installcheck installcheck-am installdirs \
-	maintainer-clean maintainer-clean-generic mostlyclean \
-	mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
-	pdf pdf-am ps ps-am tags tags-am uninstall uninstall-am \
-	uninstall-binPROGRAMS
+	clean-binPROGRAMS clean-generic clean-libLTLIBRARIES \
+	clean-libtool cscopelist-am ctags ctags-am distclean \
+	distclean-compile distclean-generic distclean-hdr \
+	distclean-libtool distclean-tags distdir dvi dvi-am html \
+	html-am info info-am install install-am install-binPROGRAMS \
+	install-data install-data-am install-dvi install-dvi-am \
+	install-exec install-exec-am install-html install-html-am \
+	install-includeHEADERS install-info install-info-am \
+	install-libLTLIBRARIES install-man install-pdf install-pdf-am \
+	install-ps install-ps-am install-strip installcheck \
+	installcheck-am installdirs maintainer-clean \
+	maintainer-clean-generic mostlyclean mostlyclean-compile \
+	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+	tags tags-am uninstall uninstall-am uninstall-binPROGRAMS \
+	uninstall-includeHEADERS uninstall-libLTLIBRARIES
 
 .PRECIOUS: Makefile
 
@@ -41590,7 +48736,7 @@ uninstall-am: uninstall-binPROGRAMS
 # genome.c genome.h \
 # genomicpos.c genomicpos.h \
 # chrnum.c chrnum.h \
-# maxent.c maxent.h \
+# maxent.c maxent.h maxent_hr.c maxent_hr.h \
 # branchpoint.c branchpoint.h \
 # parserange.c parserange.h datadir.c datadir.h getopt.c getopt1.c getopt.h splicing-score.c
 #
diff --git a/src/block.c b/src/block.c
index 01ec723..1b5520d 100644
--- a/src/block.c
+++ b/src/block.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: block.c 180699 2015-12-10 19:10:10Z twu $";
+static char rcsid[] = "$Id: block.c 207312 2017-06-14 19:27:12Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -248,12 +248,12 @@ Block_next (T this) {
 				  &this->forward,&this->revcomp,this->reader,this->cdnaend);
     debug(
 	  if (this->cdnaend == THREE) {
-	    nt_fwd = Oligo_one_nt(this->forward >> this->leftreadshift,12);
-	    nt_rev = Oligo_one_nt(this->revcomp,12);
+	    nt_fwd = Oligo_one_nt(this->forward >> this->leftreadshift,this->oligosize);
+	    nt_rev = Oligo_one_nt(this->revcomp,this->oligosize);
 	  }
 	  if (this->cdnaend == FIVE) {
-	    nt_fwd = Oligo_one_nt(this->forward,12);
-	    nt_rev = Oligo_one_nt(this->revcomp >> this->leftreadshift,12);
+	    nt_fwd = Oligo_one_nt(this->forward,this->oligosize);
+	    nt_rev = Oligo_one_nt(this->revcomp >> this->leftreadshift,this->oligosize);
 	  }
 	  printf("Block has oligo forward %s, revcomp %s at querypos %d\n",
 		 nt_fwd,nt_rev,this->last_querypos);
diff --git a/src/chimera.c b/src/chimera.c
index 475776e..ba7a23f 100644
--- a/src/chimera.c
+++ b/src/chimera.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: chimera.c 184432 2016-02-17 20:00:02Z twu $";
+static char rcsid[] = "$Id: chimera.c 206139 2017-05-11 03:28:35Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -625,11 +625,12 @@ static char *complCode = COMPLEMENT_UC;
 /* Modeled after Chimera_bestpath */
 /* Called if Chimera_find_exonexon fails */
 int
-Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char *acceptor2, char *acceptor1,
+Chimera_find_breakpoint (int *chimeraequivpos, int *rangelow, int *rangehigh,
+			 char *donor1, char *donor2, char *acceptor2, char *acceptor1,
 			 Stage3_T left_part, Stage3_T right_part, int queryntlength, Genome_T genome,
 			 Chrpos_T left_chrlength, Chrpos_T right_chrlength) {
   int chimerapos = 0, breakpoint;
-  int *matrix_sub1, *matrix_sub2, pos, score, bestscore;
+  int *matrix_sub1, *matrix_sub2, pos, score, bestscore, secondbest;
   bool *gapp_sub1, *gapp_sub2;
   Univcoord_T left;
 
@@ -647,7 +648,7 @@ Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char
 		  Stage3_cdna_direction(right_part),queryntlength,THREE,/*pre_extension_slop*/0);
 
 
-  bestscore = -100000;
+  bestscore = secondbest = -100000;
   for (pos = 0; pos < queryntlength - 1; pos++) {
     debug(
 	  printf("%d:",pos);
@@ -674,10 +675,13 @@ Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char
 #endif
 
 	if (score > bestscore) {
+	  secondbest = bestscore;
 	  bestscore = score;
 	  chimerapos = *chimeraequivpos = pos;
 	} else if (score == bestscore) {
 	  *chimeraequivpos = pos;
+	} else if (score > secondbest) {
+	  secondbest = score;
 	}
 
 	debug(
@@ -693,6 +697,39 @@ Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char
   }
   debug(printf("chimerapos %d, chimeraequivpos %d\n",chimerapos,*chimeraequivpos));
 
+
+  /* Use secondbest to find a range for exon-exon searching */
+  *rangelow = 0;
+  for (pos = 0; pos < queryntlength - 1; pos++) {
+    if (gapp_sub1[pos] == false) {
+      if (gapp_sub2[pos+1] == false) {
+	/* Check for the same stage3 object on both lists */
+#if 0
+	/* ? Old formula for use before Pair_pathscores had cdnaend argument */
+	score = matrix_sub2[queryntlength-1] - matrix_sub2[pos] + matrix_sub1[pos] /* - 0 */;
+#else
+	score = matrix_sub1[pos] + matrix_sub2[pos+1];
+#endif
+
+	if (score == secondbest) {
+	  if (*rangelow == 0) {
+	    *rangelow = *rangehigh = pos;
+	  } else {
+	    *rangehigh = pos;
+	  }
+	}
+      }
+    }
+  }
+  if (*rangelow > chimerapos) {
+    *rangelow = chimerapos;
+  }
+  if (*rangehigh < *chimeraequivpos) {
+    *rangehigh = *chimeraequivpos;
+  }
+  debug(printf("For secondbest score of %d: rangelow %d, rangehigh %d\n",secondbest,*rangelow,*rangehigh));
+
+
 #if 0
   *five_score = matrix_sub1[*chimerapos] /* - 0 */;
   *three_score = matrix_sub2[queryntlength-1] - matrix_sub2[*chimerapos];
diff --git a/src/chimera.h b/src/chimera.h
index f2e2e8e..2f02db1 100644
--- a/src/chimera.h
+++ b/src/chimera.h
@@ -1,4 +1,4 @@
-/* $Id: chimera.h 173162 2015-09-01 18:13:07Z twu $ */
+/* $Id: chimera.h 206139 2017-05-11 03:28:35Z twu $ */
 #ifndef CHIMERA_INCLUDED
 #define CHIMERA_INCLUDED
 
@@ -53,7 +53,8 @@ Chimera_bestpath (int *five_score, int *three_score, int *chimerapos, int *chime
 		  Stage3_T *stage3array_sub1, int npaths_sub1, Stage3_T *stage3array_sub2, int npaths_sub2, 
 		  int queryntlength, int chimera_slop, bool localp);
 extern int
-Chimera_find_breakpoint (int *chimeraequivpos, char *donor1, char *donor2, char *acceptor2, char *acceptor1,
+Chimera_find_breakpoint (int *chimeraequivpos, int *rangelow, int *rangehigh,
+			 char *donor1, char *donor2, char *acceptor2, char *acceptor1,
 			 Stage3_T left_part, Stage3_T right_part, int queryntlength, Genome_T genome,
 			 Chrpos_T left_chrlength, Chrpos_T right_chrlength);
 
diff --git a/src/chrom.c b/src/chrom.c
index 3a70a4d..11da4e6 100644
--- a/src/chrom.c
+++ b/src/chrom.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: chrom.c 182428 2016-01-15 22:08:51Z twu $";
+static char rcsid[] = "$Id: chrom.c 207849 2017-06-29 19:20:50Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -7,6 +7,7 @@ static char rcsid[] = "$Id: chrom.c 182428 2016-01-15 22:08:51Z twu $";
 #include <stdio.h>
 #include <stdlib.h>		/* For atoi */
 #include <string.h>
+#include "assert.h"
 #include "mem.h"
 #include "interval.h"
 
@@ -19,7 +20,7 @@ static char rcsid[] = "$Id: chrom.c 182428 2016-01-15 22:08:51Z twu $";
 
 
 /* This is the order for chrom sort */
-typedef enum {PURE_NUMERIC, SEX_CHROMOSOME, MITOCHONDRIAL, NUMERIC_ALPHA, PURE_ALPHA} Chromtype_T;
+typedef enum {PURE_NUMERIC, SEX_CHROMOSOME, MITOCHONDRIAL, NUMERIC_ALPHA, ALPHA_NUMERIC, PURE_ALPHA} Chromtype_T;
 
 #ifdef DEBUG
 static char *
@@ -29,6 +30,7 @@ Chromtype_string (Chromtype_T chromtype) {
   case SEX_CHROMOSOME: return "sex";
   case MITOCHONDRIAL: return "mitochondrial";
   case NUMERIC_ALPHA: return "numeric_alpha";
+  case ALPHA_NUMERIC: return "alpha_numeric";
   case PURE_ALPHA: return "alpha";
   default: abort();
   }
@@ -51,9 +53,7 @@ struct T {
 
 void
 Chrom_free (T *old) {
-  if ((*old)->numericp == true) {
-    FREE((*old)->alpha);
-  }
+  FREE((*old)->alpha);
   FREE((*old)->string);
   FREE(*old);
   return;
@@ -112,8 +112,8 @@ Chrom_from_string (char *string, char *mitochondrial_string, Univcoord_T order,
     mitochondrial_p = true;
   }
 
-  if (!strncmp(string,"chr",3)) {
-    /* Ignore leading chr for sorting purposes */
+  if (!strncmp(string,"chr",3) || !strncmp(string,"Chr",3)) {
+    /* Ignore leading chr or Chr for sorting purposes */
     string += 3;
     debug(printf("  => chop chr to yield %s\n",string));
   }
@@ -165,11 +165,24 @@ Chrom_from_string (char *string, char *mitochondrial_string, Univcoord_T order,
     } else if (sex_p == true) {
       new->chromtype = SEX_CHROMOSOME;
     } else {
-      new->chromtype = PURE_ALPHA;
+      while (p != '\0' && (*p < '1' || *p > '9')) {
+	/* Stop at initial '1' through '9'.  An initial '0' must be alphabetic, not numeric. */
+	p++;
+      }
+      if (p != '\0') {
+	new->chromtype = ALPHA_NUMERIC;
+	new->num = atoi(p);
+	new->alpha = (char *) MALLOC((p - string + 1)*sizeof(char));
+	strncpy(new->alpha,string,(p - string)*sizeof(char));
+	new->alpha[p - string] = '\0';
+	debug(printf("  => alpha_numeric with alpha %s and then num %d, type %s\n",
+		     new->alpha,new->num,Chromtype_string(new->chromtype)));
+      } else {
+	new->chromtype = PURE_ALPHA;
+	debug(printf("  => alphabetical %s, type %s\n",
+		     new->string,Chromtype_string(new->chromtype)));
+      }
     }
-
-    debug(printf("  => alphabetical %s, type %s\n",
-		 new->string,Chromtype_string(new->chromtype)));
   }
 
   return new;
@@ -185,6 +198,7 @@ Chrom_cmp_alpha (T a, T b) {
 
 int
 Chrom_cmp_numeric_alpha (T a, T b) {
+  int cmp;
 
   if (a->numericp == true && b->numericp == false) {
     /* 1 and X */
@@ -203,22 +217,44 @@ Chrom_cmp_numeric_alpha (T a, T b) {
       return strcmp(a->alpha,b->alpha);
     }
   } else {
-    return strcmp(a->string,b->string);
+    cmp = strcmp(a->string,b->string);
+    if (cmp < 0) {
+      return -1;
+    } else if (cmp > 0) {
+      return +1;
+    } else if (a->chromtype == PURE_ALPHA && b->chromtype == ALPHA_NUMERIC) {
+      return -1;
+    } else if (a->chromtype == ALPHA_NUMERIC && b->chromtype == PURE_ALPHA) {
+      return +1;
+    } else if (a->chromtype == PURE_ALPHA && b->chromtype == PURE_ALPHA) {
+      return 0;
+    } else if (a->num < b->num) {
+      /* Chr2 and Chr10 */
+      return -1;
+    } else if (a->num > b->num) {
+      /* Chr10 and Chr2 */
+      return +1;
+    } else {
+      return 0;
+    }
   }
 }
 
 
 int
 Chrom_cmp_chrom (T a, T b) {
+  int cmp;
 
   debug(printf("Comparing %s and %s => ",a->string,b->string));
 
   if (a->chromtype < b->chromtype) {
     debug(printf("chromtype %d < %d => -1\n",a->chromtype,b->chromtype));
     return -1;
+
   } else if (b->chromtype < a->chromtype) {
     debug(printf("chromtype %d > %d => +1\n",a->chromtype,b->chromtype));
     return +1;
+
   } else if (a->numericp == true && b->numericp == true) {
     if (a->num < b->num) {
       /* 1 and 2U */
@@ -233,9 +269,34 @@ Chrom_cmp_chrom (T a, T b) {
 		   a->alpha,b->alpha,strcmp(a->alpha,b->alpha)));
       return strcmp(a->alpha,b->alpha);
     }
+
+  } else if (a->chromtype == ALPHA_NUMERIC) {
+    cmp = strcmp(a->alpha,b->alpha);
+    if (cmp < 0) {
+      debug(printf("alpha_numeric %s cmp %s => %d\n",
+		   a->alpha,b->alpha,strcmp(a->alpha,b->alpha)));
+      return -1;
+    } else if (cmp > 0) {
+      debug(printf("alpha_numeric %s cmp %s => %d\n",
+		   a->alpha,b->alpha,strcmp(a->alpha,b->alpha)));
+      return +1;
+    } else if (a->num < b->num) {
+      /* Chr2 and Chr10 */
+      debug(printf("alpha_numeric %d < %d => -1\n",a->num,b->num));
+      return -1;
+    } else if (a->num > b->num) {
+      /* Chr10 and Chr2 */
+      debug(printf("alpha_numeric %d > %d => +1\n",a->num,b->num));
+      return +1;
+    } else {
+      debug(printf("alpha_numeric %d == %d => %d\n",a->num,b->num,strcmp(a->string,b->string)));
+      return strcmp(a->string,b->string);
+    }
+
   } else {
-      debug(printf("alpha %s cmp %s => %d\n",
-		   a->string,b->string,strcmp(a->string,b->string)));
+    /* assert(a->chromtype == PURE_ALPHA); or MITOCHONDRIAL or SEX_CHROMOSOME */
+    debug(printf("pure alpha or mitochondrial or sex chromosome %s cmp %s => %d\n",
+		 a->string,b->string,strcmp(a->string,b->string)));
     return strcmp(a->string,b->string);
   }
 }
diff --git a/src/cigar.c b/src/cigar.c
new file mode 100644
index 0000000..7911135
--- /dev/null
+++ b/src/cigar.c
@@ -0,0 +1,1647 @@
+static char rcsid[] = "$Id: cigar.c 207314 2017-06-14 19:28:08Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <ctype.h>		/* For isupper */
+
+#include "cigar.h"
+#include "mem.h"
+#include "complement.h"
+
+
+
+#ifdef DEBUG
+#define debug(x) x
+#else
+#define debug(x)
+#endif
+
+#ifdef DEBUG1
+#define debug1(x) x
+#else
+#define debug1(x)
+#endif
+
+#ifdef DEBUG2
+#define debug2(x) x
+#else
+#define debug2(x)
+#endif
+
+
+static bool hide_soft_clips_p;
+static bool cigar_extended_p = false;
+static bool merge_samechr_p;
+static bool md_lowercase_variant_p;
+
+
+
+#if 0
+static void
+print_tokens_stdout (List_T tokens) {
+  List_T p;
+  char *token;
+  
+  for (p = tokens; p != NULL; p = List_next(p)) {
+    token = (char *) List_head(p);
+    printf("%s",token);
+  }
+
+  return;
+}
+#endif
+
+
+
+#if 0
+/* Derived from print_tokens_gff3 */
+static void
+print_tokens_sam (Filestring_T fp, List_T tokens) {
+  List_T p;
+  char *token;
+  
+  for (p = tokens; p != NULL; p = List_next(p)) {
+    token = (char *) List_head(p);
+    FPRINTF(fp,"%s",token);
+    FREE(token);
+  }
+
+  return;
+}
+#endif
+
+#if 0
+static List_T
+push_token (List_T tokens, char *token) {
+  char *copy;
+
+  copy = (char *) CALLOC(strlen(token)+1,sizeof(char));
+  strcpy(copy,token);
+  return List_push(tokens,(void *) copy);
+}
+#endif
+
+
+#if 0
+/* Currently used for insertions and deletions */
+static List_T
+compute_cigar_old (List_T tokens, char type, int stringlength, int querypos, int querylength,
+		   int hardclip_low, int hardclip_high, bool plusp, bool firstp, bool lastp) {
+  char token[10];
+  
+  debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plusp %d\n",
+		type,stringlength,querypos,querylength,hardclip_low,hardclip_high,plusp));
+
+  if (firstp == true) {
+    debug1(printf("firstp is true\n"));
+    if (plusp == true) {
+      if (hardclip_low > 0) {
+	sprintf(token,"%dH",hardclip_low);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+      if (querypos > hardclip_low) {
+	sprintf(token,"%dS",querypos - hardclip_low);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+    } else {
+      if (hardclip_high > 0) {
+	sprintf(token,"%dH",hardclip_high);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+      if (querypos < querylength - hardclip_high) {
+	sprintf(token,"%dS",querypos - hardclip_high);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+    }
+  }
+
+  if (type == 'D' || type == 'N') {
+    if (querypos < hardclip_low || querypos >= querylength - hardclip_high) {
+      stringlength = 0;
+    }
+
+  } else if (plusp == true) {
+    debug1(printf("Comparing querypos %d..%d against %d..%d\n",
+		  querypos,querypos + stringlength,hardclip_low,querylength - hardclip_high));
+    if (/* querypos < hardclip_low && */querypos + stringlength < hardclip_low) {
+      /* Print nothing */
+      stringlength = 0;
+      debug1(printf("Case 1: stringlength 0\n"));
+    } else if (querypos < hardclip_low) {
+      if (querypos + stringlength < querylength - hardclip_high) {
+	/* Print part after hardclip_low */
+	stringlength = (querypos + stringlength) - hardclip_low;
+	debug1(printf("Case 2: stringlength %d\n",stringlength));
+      } else {
+	/* Print part between hardclip_low and hardclip_high */
+	stringlength = (querylength - hardclip_high) - hardclip_low;
+	debug1(printf("Case 3: stringlength %d\n",stringlength));
+      }
+    } else if (querypos < querylength - hardclip_high) {
+      if (querypos + stringlength >= querylength - hardclip_high) {
+	/* Print up to hardclip_high */
+	stringlength = (querylength - hardclip_high) - querypos;
+	debug1(printf("Case 4: stringlength %d\n",stringlength));
+      } else {
+	/* Print full stringlength */
+	debug1(printf("Case 5: stringlength %d\n",stringlength));
+      }
+    } else {
+      /* Print nothing */
+      stringlength = 0;
+      debug1(printf("Case 6: stringlength 0\n"));
+    }
+
+  } else {
+    debug1(printf("Comparing querypos %d..%d against %d..%d\n",
+		  querypos,querypos - stringlength,hardclip_low,querylength - hardclip_high));
+    if (/* querypos >= querylength - hardclip_high && */ querypos - stringlength >= querylength - hardclip_high) {
+      /* Print nothing */
+      stringlength = 0;
+      debug1(printf("Case 1: stringlength 0\n"));
+    } else if (querypos >= querylength - hardclip_high) {
+      if (querypos - stringlength >= hardclip_low) {
+	/* Print part after hardclip_high */
+	stringlength = (querylength - hardclip_high) - (querypos - stringlength);
+	debug1(printf("Case 2: stringlength %d\n",stringlength));
+      } else {
+	/* Print part between hardclip_low and hardclip_high */
+	stringlength = (querylength - hardclip_high) - hardclip_low;
+	debug1(printf("Case 3: stringlength %d\n",stringlength));
+      }
+    } else if (querypos >= hardclip_low) {
+      if (querypos - stringlength < hardclip_low) {
+	/* Print up to hardclip_low */
+	stringlength = querypos - hardclip_low;
+	debug1(printf("Case 4: stringlength %d\n",stringlength));
+      } else {
+	/* Print full stringlength */
+	debug1(printf("Case 5: stringlength %d\n",stringlength));
+      }
+    } else {
+      /* Print nothing */
+      stringlength = 0;
+      debug1(printf("Case 5: stringlength 0\n"));
+    }
+  }
+
+  if (stringlength > 0) {
+    sprintf(token,"%d%c",stringlength,type);
+    debug1(printf("Pushing token %s\n",token));
+    tokens = push_token(tokens,token);
+  }
+
+  if (lastp == true) {
+    debug1(printf("lastp is true\n"));
+    if (plusp == true) {
+      querypos += stringlength;
+      if (querypos < querylength - 1 - hardclip_high) {
+	sprintf(token,"%dS",querylength - 1 - hardclip_high - querypos);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+      if (hardclip_high > 0) {
+	sprintf(token,"%dH",hardclip_high);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+    } else {
+      querypos -= stringlength;
+      if (querypos > hardclip_low) {
+	sprintf(token,"%dS",hardclip_low - querypos);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+      if (hardclip_low > 0) {
+	sprintf(token,"%dH",hardclip_low);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+    }
+  }
+
+  return tokens;
+}
+#endif
+
+
+#if 0
+/* Currently used for insertions and deletions */
+static List_T
+compute_cigar (List_T tokens, char type, int stringlength, int querypos, int querylength,
+	       int hardclip_low, int hardclip_high, bool plusp, int lastp) {
+  int matchlength = 0;
+  int startpos, endpos;
+  int cliplength = 0;
+  char token[10];
+  
+  if (plusp == true) {
+    debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus\n",
+		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
+    if (hardclip_low > querypos) { /* > not >= */
+      startpos = hardclip_low;
+      cliplength = hardclip_low;
+    } else {
+      startpos = querypos;
+    }
+
+    if (querylength - hardclip_high < querypos + stringlength) {
+      endpos = querylength - hardclip_high;
+      debug1(printf("  endpos %d = querylength %d - hardclip_high %d\n",endpos,querylength,hardclip_high));
+    } else {
+      endpos = querypos + stringlength;
+      debug1(printf("  endpos %d = querypos %d + stringlength %d\n",endpos,querypos,stringlength));
+    }
+
+    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
+
+    if (endpos >= startpos) {
+      if (cliplength > 0) {
+	debug1(printf("  Pushing initial %dH\n",cliplength));
+	sprintf(token,"%dH",cliplength);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+      matchlength = endpos - startpos;
+      if (matchlength > 0) {
+	debug1(printf("  Pushing %d%c\n",matchlength,type));
+	sprintf(token,"%d%c",matchlength,type);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+    }
+
+
+    if (lastp == true) {
+      /* cliplength = querypos + stringlength - endpos; */
+      cliplength = querylength - endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	sprintf(token,"%dH",cliplength);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+    }
+
+  } else {
+    debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus\n",
+		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
+
+    if (querylength - hardclip_low < querypos) {
+      startpos = querylength - hardclip_low;
+      cliplength = hardclip_low;
+    } else {
+      startpos = querypos;
+    }
+
+    if (hardclip_high >= querypos - stringlength) {
+      endpos = hardclip_high;
+      debug1(printf("  endpos %d = hardclip_high %d\n",endpos,hardclip_high));
+    } else {
+      endpos = querypos - stringlength;
+      debug1(printf("  endpos %d = querypos %d - stringlength %d\n",endpos,querypos,stringlength));
+    }
+
+    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
+
+    if (endpos <= startpos) {
+      if (cliplength > 0) {
+	debug1(printf("  Pushing initial %dH\n",cliplength));
+	sprintf(token,"%dH",cliplength);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+      matchlength = startpos - endpos;
+      if (matchlength > 0) {
+	debug1(printf("  Pushing %d%c\n",matchlength,type));
+	sprintf(token,"%d%c",matchlength,type);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+    }
+
+
+    if (lastp == true) {
+      cliplength = endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	sprintf(token,"%dH",cliplength);
+	debug1(printf("Pushing token %s\n",token));
+	tokens = push_token(tokens,token);
+      }
+    }
+  }
+
+  return tokens;
+}
+#endif
+
+
+#if 0
+/* Modified from compute_cigar */
+static Intlist_T
+compute_cigar_types_only (Intlist_T types, char type, int stringlength, int querypos, int querylength,
+			  int hardclip_low, int hardclip_high, bool plusp, int lastp) {
+  int matchlength = 0;
+  int startpos, endpos;
+  int cliplength = 0;
+  
+  if (plusp == true) {
+    debug1(printf("\nEntering compute_cigar_types_only with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus\n",
+		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
+    if (hardclip_low > querypos) { /* > not >= */
+      startpos = hardclip_low;
+      cliplength = hardclip_low;
+    } else {
+      startpos = querypos;
+    }
+
+    if (querylength - hardclip_high < querypos + stringlength) {
+      endpos = querylength - hardclip_high;
+      debug1(printf("  endpos %d = querylength %d - hardclip_high %d\n",endpos,querylength,hardclip_high));
+    } else {
+      endpos = querypos + stringlength;
+      debug1(printf("  endpos %d = querypos %d + stringlength %d\n",endpos,querypos,stringlength));
+    }
+
+    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
+
+    if (endpos >= startpos) {
+      if (cliplength > 0) {
+	debug1(printf("  Pushing initial %dH\n",cliplength));
+	types = Intlist_push(types,'H');
+      }
+      matchlength = endpos - startpos;
+      if (matchlength > 0) {
+	debug1(printf("  Pushing %d%c\n",matchlength,type));
+	types = Intlist_push(types,type);
+      }
+    }
+
+
+    if (lastp == true) {
+      /* cliplength = querypos + stringlength - endpos; */
+      cliplength = querylength - endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	types = Intlist_push(types,'H');
+      }
+    }
+
+  } else {
+    debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus\n",
+		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
+
+    if (querylength - hardclip_low < querypos) {
+      startpos = querylength - hardclip_low;
+      cliplength = hardclip_low;
+    } else {
+      startpos = querypos;
+    }
+
+    if (hardclip_high >= querypos - stringlength) {
+      endpos = hardclip_high;
+      debug1(printf("  endpos %d = hardclip_high %d\n",endpos,hardclip_high));
+    } else {
+      endpos = querypos - stringlength;
+      debug1(printf("  endpos %d = querypos %d - stringlength %d\n",endpos,querypos,stringlength));
+    }
+
+    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
+
+    if (endpos <= startpos) {
+      if (cliplength > 0) {
+	debug1(printf("  Pushing initial %dH\n",cliplength));
+	types = Intlist_push(types,'H');
+      }
+      matchlength = startpos - endpos;
+      if (matchlength > 0) {
+	debug1(printf("  Pushing %d%c\n",matchlength,type));
+	types = Intlist_push(types,type);
+      }
+    }
+
+
+    if (lastp == true) {
+      cliplength = endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	types = Intlist_push(types,'H');
+      }
+    }
+  }
+
+  return types;
+}
+#endif
+
+
+static void
+print_cigar (Filestring_T fp, char type, int stringlength, int querypos, int querylength,
+	     int hardclip_low, int hardclip_high, bool plusp, bool lastp, int trimlength) {
+  int matchlength = 0;
+  int startpos, endpos;
+  int cliplength = 0;
+  
+  if (plusp == true) {
+    debug1(printf("\nEntering print_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus\n",
+		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
+    if (hardclip_low > querypos) { /* > not >= */
+      startpos = hardclip_low;
+      cliplength = hardclip_low;
+    } else {
+      startpos = querypos;
+    }
+
+    if (querylength - hardclip_high < querypos + stringlength) {
+      endpos = querylength - hardclip_high;
+      debug1(printf("  endpos %d = querylength %d - hardclip_high %d\n",endpos,querylength,hardclip_high));
+    } else {
+      endpos = querypos + stringlength;
+      debug1(printf("  endpos %d = querypos %d + stringlength %d\n",endpos,querypos,stringlength));
+    }
+
+    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
+
+    if (endpos >= startpos) {
+      if (cliplength > 0) {
+	debug1(printf("  Pushing initial %dH\n",cliplength));
+	FPRINTF(fp,"%dH",cliplength);
+      }
+      matchlength = endpos - startpos;
+      if (matchlength <= 0) {
+	/* Skip */
+      } else if (type != 'E') {
+	debug1(printf("  Pushing %d%c\n",matchlength,type));
+	FPRINTF(fp,"%d%c",matchlength,type);
+      } else if (matchlength == trimlength) {
+	debug1(printf("  Pushing %dS\n",matchlength));
+	FPRINTF(fp,"%dS",matchlength);
+      } else {
+	debug1(printf("  Pushing %dH because matchlength %d != trimlength %d\n",
+		      matchlength,matchlength,trimlength));
+	FPRINTF(fp,"%dH",matchlength);
+      }
+    }
+
+
+    if (lastp == true) {
+      /* cliplength = querypos + stringlength - endpos; */
+      cliplength = querylength - endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	FPRINTF(fp,"%dH",cliplength);
+      }
+    }
+
+  } else {
+    debug1(printf("\nEntering print_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus\n",
+		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
+
+    if (querylength - hardclip_low < querypos) {
+      startpos = querylength - hardclip_low;
+      cliplength = hardclip_low;
+    } else {
+      startpos = querypos;
+    }
+
+    if (hardclip_high >= querypos - stringlength) {
+      endpos = hardclip_high;
+      debug1(printf("  endpos %d = hardclip_high %d\n",endpos,hardclip_high));
+    } else {
+      endpos = querypos - stringlength;
+      debug1(printf("  endpos %d = querypos %d - stringlength %d\n",endpos,querypos,stringlength));
+    }
+
+    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
+
+    if (endpos <= startpos) {
+      if (cliplength > 0) {
+	debug1(printf("  Pushing initial %dH\n",cliplength));
+	FPRINTF(fp,"%dH",cliplength);
+      }
+      matchlength = startpos - endpos;
+      if (matchlength <= 0) {
+	/* Skip */
+      } else if (type != 'E') {
+	debug1(printf("  Pushing %d%c\n",matchlength,type));
+	FPRINTF(fp,"%d%c",matchlength,type);
+      } else if (matchlength == trimlength) {
+	debug1(printf("  Pushing %dS\n",matchlength));
+	FPRINTF(fp,"%dS",matchlength);
+      } else {
+	debug1(printf("  Pushing %dH because matchlength %d != trimlength %d\n",
+		      matchlength,matchlength,trimlength));
+	FPRINTF(fp,"%dH",matchlength);
+      }
+    }
+
+
+    if (lastp == true) {
+      cliplength = endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	FPRINTF(fp,"%dH",cliplength);
+      }
+    }
+  }
+
+  return;
+}
+
+
+
+
+
+/* Based on print_md_string */
+static void
+print_extended_cigar (Filestring_T fp, char *genomicfwd_refdiff,
+		      int stringlength, int querypos, int querylength,
+		      int hardclip_low, int hardclip_high, bool plusp, bool lastp) {
+  int nmatches = 0, nmismatches = 0;
+  int starti, endi, i;
+  bool hardclip_end_p = false;
+  int cliplength, endpos;
+
+  if (plusp == true) {
+    debug2(printf("\nEntering print_extended_cigar with querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus: %s ref, %s both\n",
+		  querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
+    if (hardclip_low == 0) {
+      starti = 0;
+      hardclip_end_p = true;
+    } else if (hardclip_low > querypos) {
+      /* startpos = hardclip_low; */
+      starti = hardclip_low - querypos;
+      hardclip_end_p = true;
+      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
+		    starti,hardclip_low,querypos));
+    } else {
+      /* startpos = querypos; */
+      starti = 0;
+    }
+
+    if (querylength - hardclip_high < querypos + stringlength) {
+      endpos = querylength - hardclip_high;
+      endi = (querylength - hardclip_high) - querypos;
+      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
+		    endi,querylength,hardclip_high,querypos));
+    } else {
+      endpos = querypos + stringlength;
+      endi = stringlength;
+    }
+
+    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
+
+    if (genomicfwd_refdiff == NULL) {
+      if (endi > starti) {
+	nmatches += (endi - starti);
+      }
+
+    } else if (md_lowercase_variant_p == false) {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  if (nmismatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%dX",nmismatches);
+	    nmismatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmatches++;
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+	}
+      }
+
+    } else {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  if (nmismatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%dX",nmismatches);
+	    nmismatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmatches++;
+
+#if 0
+	} else if (isupper(genomicfwd_bothdiff[i])) {
+	  /* A mismatch against the reference only => alternate variant */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+#endif
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+	}
+      }
+    }
+
+    if (nmatches > 0) {
+      FPRINTF(fp,"%d=",nmatches);
+    } else if (nmismatches > 0) {
+      FPRINTF(fp,"%dX",nmismatches);
+    }
+
+    if (lastp == true) {
+      /* cliplength = querypos + stringlength - endpos; */
+      cliplength = querylength - endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	FPRINTF(fp,"%dH",cliplength);
+      }
+    }
+
+  } else {
+    debug2(printf("\nEntering print_extended_cigar with querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus: %s ref, %s both\n",
+		  querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
+    querypos = querylength - querypos - stringlength;
+    debug2(printf("  Revising querypos to be %d\n",querypos));
+
+    if (hardclip_low == 0) {
+      starti = 0;
+      hardclip_end_p = true;
+    } else if (hardclip_low > querypos) {
+      /* startpos = hardclip_low; */
+      starti = hardclip_low - querypos;
+      hardclip_end_p = true;
+      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
+		    starti,hardclip_low,querypos));
+    } else {
+      /* startpos = querypos; */
+      starti = 0;
+    }
+
+    if (querylength - hardclip_high < querypos + stringlength) {
+      endpos = querylength - hardclip_high;
+      endi = (querylength - hardclip_high) - querypos;
+      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
+		    endi,querylength,hardclip_high,querypos));
+    } else {
+      endpos = querypos + stringlength;
+      endi = stringlength;
+    }
+
+    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
+
+    if (genomicfwd_refdiff == NULL) {
+      if (endi > starti) {
+	nmatches += (endi - starti);
+      }
+
+    } else if (md_lowercase_variant_p == false) {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  if (nmismatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%dX",nmismatches);
+	    nmismatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmatches++;
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+	}
+      }
+
+    } else {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  if (nmismatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%dX",nmismatches);
+	    nmismatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmatches++;
+
+#if 0
+	} else if (isupper(genomicfwd_bothdiff[i])) {
+	  /* A mismatch against the reference only => alternate variant */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+#endif
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (nmatches > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d=",nmatches);
+	    nmatches = 0;
+	    hardclip_end_p = false;
+	  }
+	  nmismatches++;
+	}
+      }
+    }
+
+    if (nmatches > 0) {
+      FPRINTF(fp,"%d=",nmatches);
+    } else if (nmismatches > 0) {
+      FPRINTF(fp,"%dX",nmismatches);
+    }
+
+    if (lastp == true) {
+      cliplength = endpos;
+      if (cliplength > 0) {
+	debug1(printf("  Pushing final %dH\n",cliplength));
+	FPRINTF(fp,"%dH",cliplength);
+      }
+    }
+  }
+
+  return;
+}
+
+
+static char complCode[128] = COMPLEMENT_LC;
+
+static void
+make_complement_buffered (char *complement, char *sequence, unsigned int length) {
+  int i, j;
+
+  /* complement = (char *) CALLOC(length+1,sizeof(char)); */
+  for (i = length-1, j = 0; i >= 0; i--, j++) {
+    complement[j] = complCode[(int) sequence[i]];
+  }
+  complement[length] = '\0';
+  return;
+}
+
+
+static void
+print_cigar_M (Filestring_T fp, Substring_T substring, int substring_length, int substring_start,
+	       int stringlength, int querypos, int querylength,
+	       int hardclip_low, int hardclip_high, bool plusp, bool lastp, int trimlength) {
+  char *genomicfwd_refdiff, *genomicdir_refdiff;
+  
+  if (cigar_extended_p == false) {
+    print_cigar(fp,/*type*/'M',stringlength,querypos,querylength,
+		hardclip_low,hardclip_high,plusp,lastp,trimlength);
+  } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == NULL) {
+    print_extended_cigar(fp,/*genomicfwd_refdiff*/NULL,/*stringlength*/substring_length,
+			 /*querypos*/substring_start,querylength,
+			 hardclip_low,hardclip_high,plusp,lastp);
+  } else if (plusp == true) {
+    print_extended_cigar(fp,&(genomicdir_refdiff[substring_start]),/*stringlength*/substring_length,
+			 /*querypos*/substring_start,querylength,
+			 hardclip_low,hardclip_high,plusp,lastp);
+  } else {
+    genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
+    make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
+    print_extended_cigar(fp,genomicfwd_refdiff,/*stringlength*/substring_length,
+			 /*querypos*/substring_start,querylength,
+			 hardclip_low,hardclip_high,plusp,lastp);
+    FREEA(genomicfwd_refdiff);
+  }
+}
+
+
+#if 0
+/* Copy also in pair.c for GMAP */
+static bool
+check_cigar_types (Intlist_T cigar_types) {
+  Intlist_T p;
+  int type;
+  bool M_present_p = false;
+
+  for (p = cigar_types; p != NULL; p = Intlist_next(p)) {
+    type = Intlist_head(p);
+    if (type == 'M') {
+      M_present_p = true;
+#if 0
+    } else if (type == 'H' && last_type == 'S') {
+      debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
+      return false;
+    } else if (type == 'S' && last_type == 'H') {
+      debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
+      return false;
+#endif
+    }
+  }
+
+  return M_present_p;
+}
+#endif
+
+
+
+void
+Cigar_print_substrings (int *nindels, List_T *startp, List_T *startq, List_T *prevp, List_T *nextp, List_T *finalp, List_T *endp,
+			Filestring_T fp, Stage3end_T stage3end,
+			int querylength, int hardclip_low, int hardclip_high) {
+  Substring_T substring, substringL, substringH;
+  Junction_T post_junction;
+  int type;
+
+  List_T substrings_LtoH, junctions_LtoH;
+  List_T p, q;
+  int substring_start, substring_length;
+
+  bool plusp;
+#ifdef PRINT_AMBIG_COORDS
+  Univcoord_T chroffset;
+#endif
+
+  
+  *nindels = 0;
+  if ((substrings_LtoH = Stage3end_substrings_LtoH(stage3end)) == NULL) {
+    FPRINTF(fp,"*");
+    return;
+  } else {
+    plusp = Stage3end_plusp(stage3end);
+    substrings_LtoH = Stage3end_substrings_LtoH(stage3end);
+    junctions_LtoH = Stage3end_junctions_LtoH(stage3end);
+    substringL = (Substring_T) List_head(substrings_LtoH);
+    substringH = (Substring_T) List_last_value(substrings_LtoH);
+  }
+
+
+  if (Substring_ambiguous_p(substringL) == true) {
+    *prevp = substrings_LtoH;
+    *startp = List_next(substrings_LtoH);
+    *startq = List_next(junctions_LtoH);
+  } else {
+    *prevp = (List_T) NULL;
+    *startp = substrings_LtoH;
+    *startq = junctions_LtoH;
+  }
+  if (Substring_ambiguous_p(substringH) == true) {
+    *endp = List_last_item(substrings_LtoH);
+  } else {
+    *endp = (List_T) NULL;
+  }
+
+  debug(printf("End has %d substrings\n",List_length(substrings_LtoH)));
+
+  p = *startp;
+  q = *startq;
+  if (plusp == true) {
+    /* Plus */
+    while (p != *endp && Substring_queryend((Substring_T) List_head(p)) < hardclip_low) {
+      /* Skip, because substring entirely in hard-clipped region */
+      debug(printf("Skipping %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		   Substring_queryend((Substring_T) List_head(p))));
+      *prevp = p;
+      p = List_next(p);
+      q = List_next(q);
+    }
+
+    substring = (Substring_T) List_head(p);
+    if (List_next(p) == *endp ||	Substring_queryend(substring) >= querylength - hardclip_high) {
+      /* Single substring */
+      debug(printf("Single substring %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		   Substring_queryend((Substring_T) List_head(p))));
+
+      if (hide_soft_clips_p == true) {
+	substring_start = Substring_querystart_orig(substring);
+	substring_length = Substring_match_length_orig(substring);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_querystart(substring) + Substring_match_length(substring) +
+		      (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
+		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+      } else {
+	substring_start = Substring_querystart(substring);
+	substring_length = Substring_match_length(substring);
+	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_match_length(substring),
+		      /*querypos*/Substring_querystart(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+		    /*querypos*/Substring_queryend(substring),querylength,
+		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+      }
+      *finalp = p;
+      *nextp = List_next(p);
+
+    } else {
+      /* First substring, plus */
+      debug(printf("First substring, plus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		   Substring_queryend((Substring_T) List_head(p))));
+
+      post_junction = (Junction_T) List_head(q);
+
+      if (hide_soft_clips_p == true) {
+	substring_start = Substring_querystart_orig(substring);
+	substring_length = Substring_match_length_orig(substring);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_querystart(substring) +
+		      Substring_match_length(substring),
+		      /*querypos*/0,querylength,hardclip_low,hardclip_high,
+		      /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+      } else {
+	substring_start = Substring_querystart(substring);
+	substring_length = Substring_match_length(substring);
+	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_match_length(substring),
+		      /*querypos*/Substring_querystart(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+      }
+      p = List_next(p);
+      
+      while (p != *endp && Substring_queryend((Substring_T) List_head(p)) < querylength - hardclip_high) {
+	if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+	  debug1(printf("1. Pushing %dD\n",Junction_nindels(post_junction)));
+	  FPRINTF(fp,"%dD",Junction_nindels(post_junction));
+	  *nindels += Junction_nindels(post_junction);
+	} else if (type == INS_JUNCTION) {
+	  debug1(printf("1. Pushing %dI\n",Junction_nindels(post_junction)));
+	  FPRINTF(fp,"%dI",Junction_nindels(post_junction));
+	  *nindels += Junction_nindels(post_junction);
+	} else if (type == SPLICE_JUNCTION) {
+	  debug1(printf("1. Pushing %dN\n",Junction_splice_distance(post_junction)));
+	  FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
+	}
+	q = List_next(q);
+	if (q == NULL) {
+	} else {
+	  post_junction = (Junction_T) List_head(q);
+	}
+
+	substring = (Substring_T) List_head(p);
+	if (List_next(p) == *endp) {
+	  /* Last substring, plus, not hard-clipped */
+	  debug(printf("Last substring, plus, not hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		       Substring_queryend((Substring_T) List_head(p))));
+	  
+	  if (hide_soft_clips_p == true) {
+	    substring_start = Substring_querystart_orig(substring);
+	    substring_length = Substring_match_length_orig(substring);
+	    print_cigar_M(fp,substring,substring_length,substring_start,
+			  Substring_match_length(substring) +
+			  (querylength - Substring_queryend(substring)),
+			  /*querypos*/Substring_querystart(substring),querylength,
+			  hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+	  } else {
+	    substring_start = Substring_querystart(substring);
+	    substring_length = Substring_match_length(substring);
+	    print_cigar_M(fp,substring,substring_length,substring_start,Substring_match_length(substring),
+			  /*querypos*/Substring_querystart(substring),querylength,
+			  hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	    print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+			/*querypos*/Substring_queryend(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+	  }
+	  *finalp = p;
+	  *nextp = List_next(p);
+
+	} else {
+	  /* Middle substring, plus */
+	  debug(printf("Middle substring, plus %d..%d\n",Substring_querystart((Substring_T) List_head(p)), 
+		       Substring_queryend((Substring_T) List_head(p))));
+	  substring_start = Substring_querystart(substring);
+	  substring_length = Substring_match_length(substring);
+
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring),
+			/*querypos*/Substring_querystart(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	}
+	p = List_next(p);
+      }
+      
+      if (p != *endp) {
+	if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+	  debug1(printf("2. Pushing %dD\n",Junction_nindels(post_junction)));
+	  FPRINTF(fp,"%dD",Junction_nindels(post_junction));
+	  *nindels += Junction_nindels(post_junction);
+	} else if (type == INS_JUNCTION) {
+	  debug1(printf("2. Pushing %dI\n",Junction_nindels(post_junction)));
+	  FPRINTF(fp,"%dI",Junction_nindels(post_junction));
+	  *nindels += Junction_nindels(post_junction);
+	} else if (type == SPLICE_JUNCTION) {
+	  debug1(printf("2. Pushing %dN\n",Junction_splice_distance(post_junction)));
+	  FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
+	}
+
+	/* Last substring, plus, hard-clipped */
+	substring = (Substring_T) List_head(p);
+	debug(printf("Last substring, plus, hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		     Substring_queryend((Substring_T) List_head(p))));
+	if (hide_soft_clips_p == true) {
+	  substring_start = Substring_querystart_orig(substring);
+	  substring_length = Substring_match_length_orig(substring);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring) +
+			(querylength - Substring_queryend(substring)),
+			/*querypos*/Substring_querystart(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+	} else {
+	  substring_start = Substring_querystart(substring);
+	  substring_length = Substring_match_length(substring);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring),
+			/*querypos*/Substring_querystart(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	  print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+		      /*querypos*/Substring_queryend(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
+	}
+	*finalp = p;
+	*nextp = List_next(p);
+
+      }
+    }
+
+  } else {
+    /* Minus */
+    while (p != *endp && Substring_querystart((Substring_T) List_head(p)) >= querylength - hardclip_low) {
+      /* Skip, because substring entirely in hard-clipped region */
+      debug(printf("Skipping %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		   Substring_queryend((Substring_T) List_head(p))));
+      *prevp = p;
+      p = List_next(p);
+      q = List_next(q);
+    }
+
+    substring = (Substring_T) List_head(p);
+    if (List_next(p) == *endp || Substring_querystart(substring) < hardclip_high) {
+      /* Single substring */
+      debug(printf("Single substring %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		   Substring_queryend((Substring_T) List_head(p))));
+
+      if (hide_soft_clips_p == true) {
+	substring_start = Substring_querystart_orig(substring);
+	substring_length = Substring_match_length_orig(substring);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      (querylength - Substring_queryend(substring)) + 
+		      Substring_match_length(substring) + Substring_querystart(substring),
+		      /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+		      /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+      } else {
+	substring_start = Substring_querystart(substring);
+	substring_length = Substring_match_length(substring);
+	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+		    /*querypos*/querylength,querylength,
+		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_match_length(substring),
+		      /*querypos*/Substring_queryend(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+		    /*querypos*/Substring_querystart(substring),querylength,
+		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+      }
+      *finalp = p;
+      *nextp = List_next(p);
+
+    } else {
+      /* First substring, minus */
+      debug(printf("First substring, minus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		   Substring_queryend((Substring_T) List_head(p))));
+    
+      post_junction = (Junction_T) List_head(q);
+
+      if (hide_soft_clips_p == true) {
+	substring_start = Substring_querystart_orig(substring);
+	substring_length = Substring_match_length_orig(substring);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      (querylength - Substring_queryend(substring)) +
+		      Substring_match_length(substring),
+		      /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
+		      /*plusp*/false,/*lastp*/false,/*trimlength*/0);
+      } else {
+	substring_start = Substring_querystart(substring);
+	substring_length = Substring_match_length(substring);
+	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
+		    /*querypos*/querylength,querylength,
+		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	print_cigar_M(fp,substring,substring_length,substring_start,
+		      Substring_match_length(substring),
+		      /*querypos*/Substring_queryend(substring),querylength,
+		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+      }
+      p = List_next(p);
+
+      while (p != *endp && Substring_querystart((Substring_T) List_head(p)) >= hardclip_high) {
+	if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+	  debug1(printf("3. Pushing %dD\n",Junction_nindels(post_junction)));
+	  FPRINTF(fp,"%dD",Junction_nindels(post_junction));
+	  *nindels += Junction_nindels(post_junction);
+	} else if (type == INS_JUNCTION) {
+	  debug1(printf("3. Pushing %dI\n",Junction_nindels(post_junction)));
+	  FPRINTF(fp,"%dI",Junction_nindels(post_junction));
+	  *nindels += Junction_nindels(post_junction);
+	} else if (type == SPLICE_JUNCTION) {
+	  debug1(printf("3. Pushing %dN\n",Junction_splice_distance(post_junction)));
+	  FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
+	}
+	q = List_next(q);
+	if (q == NULL) {
+	} else {
+	  post_junction = (Junction_T) List_head(q);
+	}
+
+	substring = (Substring_T) List_head(p);
+	if (List_next(p) == *endp) {
+	  /* Last substring, minus, not hard-clipped */
+	  debug(printf("Last substring, minus, not hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		       Substring_queryend((Substring_T) List_head(p))));
+
+	  if (hide_soft_clips_p == true) {
+	    substring_start = Substring_querystart_orig(substring);
+	    substring_length = Substring_match_length_orig(substring);
+	    print_cigar_M(fp,substring,substring_length,substring_start,
+			  Substring_match_length(substring) +
+			  Substring_querystart(substring),
+			  /*querypos*/Substring_queryend(substring),querylength,
+			  hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+	  } else {
+	    substring_start = Substring_querystart(substring);
+	    substring_length = Substring_match_length(substring);
+	    print_cigar_M(fp,substring,substring_length,substring_start,
+			  Substring_match_length(substring),
+			  /*querypos*/Substring_queryend(substring),querylength,
+			  hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	    print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+			/*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
+			/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+	  }
+	  *finalp = p;
+	  *nextp = List_next(p);
+
+	} else {
+	  /* Middle substring, minus */
+	  debug(printf("Middle substring, minus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		       Substring_queryend((Substring_T) List_head(p))));
+	  substring_start = Substring_querystart(substring);
+	  substring_length = Substring_match_length(substring);
+
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring),
+			/*querypos*/Substring_queryend(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	}
+	p = List_next(p);
+      }
+
+      if (p != *endp) {
+	if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
+	  debug1(printf("4. Pushing %dD\n",Junction_nindels(post_junction)));
+	  FPRINTF(fp,"%dD",Junction_nindels(post_junction));
+	  *nindels += Junction_nindels(post_junction);
+	} else if (type == INS_JUNCTION) {
+	  debug1(printf("4. Pushing %dI\n",Junction_nindels(post_junction)));
+	  FPRINTF(fp,"%dI",Junction_nindels(post_junction));
+	  *nindels += Junction_nindels(post_junction);
+	} else if (type == SPLICE_JUNCTION) {
+	  debug1(printf("4. Pushing %dN\n",Junction_splice_distance(post_junction)));
+	  FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
+	}
+
+	/* Last substring, minus, hard-clipped */
+	substring = (Substring_T) List_head(p);
+	debug(printf("Last substring, minus, hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
+		     Substring_queryend((Substring_T) List_head(p))));
+
+	if (hide_soft_clips_p == true) {
+	  substring_start = Substring_querystart_orig(substring);
+	  substring_length = Substring_match_length_orig(substring);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring) +
+			Substring_querystart(substring),
+			/*querypos*/Substring_queryend(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
+	} else {
+	  substring_start = Substring_querystart(substring);
+	  substring_length = Substring_match_length(substring);
+	  print_cigar_M(fp,substring,substring_length,substring_start,
+			Substring_match_length(substring),
+			/*querypos*/Substring_queryend(substring),querylength,
+			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	  print_cigar(fp,/*type*/'S',Substring_querystart(substring),
+		      /*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
+		      /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+	}
+	*finalp = p;
+	*nextp = List_next(p);
+
+      }
+    }
+  }
+
+  return;
+}
+
+
+void
+Cigar_print_halfdonor (Filestring_T fp, Substring_T donor, Stage3end_T this,
+		       int querylength, int *hardclip_low, int *hardclip_high,
+		       bool use_hardclip_p) {
+  bool sensep;
+  int transloc_hardclip_low, transloc_hardclip_high;
+  bool plusp;
+#ifdef PRINT_AMBIG_COORDS
+  Univcoord_T chroffset;
+#endif
+
+
+  plusp = Substring_plusp(donor);
+
+  if (Stage3end_sensedir(this) == SENSE_ANTI) {
+    sensep = false;
+  } else {
+    sensep = true;
+  }
+
+  if (use_hardclip_p == true) {
+    if (sensep == true) {
+      if (plusp == true) {
+	transloc_hardclip_low = 0;
+	transloc_hardclip_high = querylength - Substring_queryend(donor);
+      } else {
+	transloc_hardclip_high = 0;
+	transloc_hardclip_low = querylength - Substring_queryend(donor);
+      }
+
+    } else {
+      if (plusp == true) {
+	transloc_hardclip_high = 0;
+	transloc_hardclip_low = Substring_querystart(donor);
+      } else {
+	transloc_hardclip_low = 0;
+	transloc_hardclip_high = Substring_querystart(donor);
+      }
+    }
+
+    if (transloc_hardclip_low > *hardclip_low) {
+      *hardclip_low = transloc_hardclip_low;
+    }
+    if (transloc_hardclip_high > *hardclip_high) {
+      *hardclip_high = transloc_hardclip_high;
+    }
+  }
+
+
+  if (sensep == true) {
+    /* Doesn't hold for DNA-Seq chimeras */
+    /* assert(Substring_siteD_pos(donor) == Substring_queryend(donor)); */
+    if (plusp == true) {
+      /* sensep true, plusp true */
+      /* FPRINTF(fp,"donor sensep true, plusp true\n"); */
+      if (hide_soft_clips_p == true) {
+	print_cigar(fp,/*type*/'M',
+		    Substring_querystart(donor) + 
+		    Substring_match_length(donor),
+		    /*querypos*/0,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
+		    /*querypos*/Substring_queryend(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/true,
+		    /*trimlength*/Substring_trim_right(donor));
+
+      } else {
+	print_cigar(fp,/*type*/'S',Substring_querystart(donor),
+		    /*querypos*/0,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'M',Substring_match_length(donor),
+		    /*querypos*/Substring_querystart(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/false,
+		    /*trimlength*/0);
+	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
+		    /*querypos*/Substring_queryend(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/true,
+		    /*trimlength*/Substring_trim_right(donor));
+      }
+
+    } else {
+      /* sensep true, plusp false */
+      /* FPRINTF(fp,"donor sensep false, plusp false\n"); */
+      if (hide_soft_clips_p == true) {
+	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
+		    /*querypos*/querylength,querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/Substring_trim_right(donor));
+	print_cigar(fp,/*type*/'M',
+		    Substring_match_length(donor) +
+		    Substring_querystart(donor),
+		    /*querypos*/Substring_queryend(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/true,
+		    /*trimlength*/0);
+      } else {
+	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
+		    /*querypos*/querylength,querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/Substring_trim_right(donor));
+	print_cigar(fp,/*type*/'M',Substring_match_length(donor),
+		    /*querypos*/Substring_queryend(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/0);
+	print_cigar(fp,/*type*/'S',Substring_querystart(donor),
+		    /*querypos*/Substring_querystart(donor),querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+      }
+    }
+
+  } else {
+    /* Doesn't hold for DNA-Seq chimeras */
+    /* assert(Substring_siteD_pos(donor) == Substring_querystart(donor)); */
+    if (plusp == true) {
+      /* sensep false, plusp true */
+      /* FPRINTF(fp,"donor sensep false, plusp true\n"); */
+      if (hide_soft_clips_p == true) {
+	print_cigar(fp,/*type*/'E',Substring_querystart(donor),
+		    /*querypos*/0,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(donor));
+	print_cigar(fp,/*type*/'M',Substring_match_length(donor) + (querylength - Substring_queryend(donor)),
+		    /*querypos*/Substring_querystart(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/true,
+		    /*trimlength*/0);
+      } else {
+	print_cigar(fp,/*type*/'E',Substring_querystart(donor),
+		    /*querypos*/0,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(donor));
+	print_cigar(fp,/*type*/'M',Substring_match_length(donor),
+		    /*querypos*/Substring_querystart(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/false,
+		    /*trimlength*/0);
+	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(donor),
+		    /*querypos*/Substring_queryend(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/true,
+		    /*trimlength*/0);
+      }
+
+    } else {
+      /* sensep false, plusp false */
+      /* FPRINTF(fp,"donor sensep true, plusp false\n"); */
+      if (hide_soft_clips_p == true) {
+	print_cigar(fp,/*type*/'M',(querylength - Substring_queryend(donor)) + Substring_match_length(donor),
+		    /*querypos*/querylength,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'E',Substring_querystart(donor),
+		    /*querypos*/Substring_querystart(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/true,
+		    /*trimlength*/Substring_trim_left(donor));
+
+      } else {
+	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(donor),
+		    /*querypos*/querylength,querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/0);
+	print_cigar(fp,/*type*/'M',Substring_match_length(donor),
+		    /*querypos*/Substring_queryend(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/0);
+	print_cigar(fp,/*type*/'E',Substring_querystart(donor),
+		    /*querypos*/Substring_querystart(donor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/true,
+		    /*trimlength*/Substring_trim_left(donor));
+      }
+    }
+  }
+
+  return;
+}
+
+
+void
+Cigar_print_halfacceptor (Filestring_T fp, Substring_T acceptor, Stage3end_T this,
+			  int querylength, int *hardclip_low, int *hardclip_high,
+			  bool use_hardclip_p) {
+  bool sensep;
+  int transloc_hardclip_low, transloc_hardclip_high;
+  bool plusp;
+#ifdef PRINT_AMBIG_COORDS
+  Univcoord_T chroffset;
+#endif
+
+
+  plusp = Substring_plusp(acceptor);
+
+  if (Stage3end_sensedir(this) == SENSE_ANTI) {
+    sensep = false;
+  } else {
+    sensep = true;
+  }
+
+  if (use_hardclip_p == true) {
+    if (sensep == true) {
+      if (plusp == true) {
+	transloc_hardclip_high = 0;
+	transloc_hardclip_low = Substring_querystart(acceptor);
+      } else {
+	transloc_hardclip_low = 0;
+	transloc_hardclip_high = Substring_querystart(acceptor);
+      }
+
+    } else {
+      if (plusp == true) {
+	transloc_hardclip_low = 0;
+	transloc_hardclip_high = querylength - Substring_queryend(acceptor);
+      } else {
+	transloc_hardclip_high = 0;
+	transloc_hardclip_low = querylength - Substring_queryend(acceptor);
+      }
+    }
+
+    if (transloc_hardclip_low > *hardclip_low) {
+      *hardclip_low = transloc_hardclip_low;
+    }
+    if (transloc_hardclip_high > *hardclip_high) {
+      *hardclip_high = transloc_hardclip_high;
+    }
+  }
+
+
+  if (sensep == true) {
+    /* Doesn't hold for DNA-Seq chimeras */
+    /* assert(Substring_siteA_pos(acceptor) == Substring_querystart(acceptor)); */
+    if (plusp == true) {
+      /* sensep true, plusp true */
+      /* FPRINTF(fp,"acceptor sensep true, plusp true\n"); */
+      if (hide_soft_clips_p == true) {
+	print_cigar(fp,/*type*/'M',Substring_querystart(acceptor) + Substring_match_length(acceptor),
+		    /*querypos*/0,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
+		    /*querypos*/Substring_queryend(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/true,
+		    /*trimlength*/Substring_trim_right(acceptor));
+      } else {
+	print_cigar(fp,/*type*/'S',Substring_querystart(acceptor),
+		    /*querypos*/0,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
+		    /*querypos*/Substring_querystart(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
+		    /*querypos*/Substring_queryend(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/true,
+		    /*trimlength*/Substring_trim_right(acceptor));
+      }
+
+    } else {
+      /* sensep true, plusp false */
+      /* FPRINTF(fp,"acceptor sensep true, plusp false\n"); */
+      if (hide_soft_clips_p == true) {
+	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
+		    /*querypos*/querylength,querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/Substring_trim_right(acceptor));
+	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor) + Substring_querystart(acceptor),
+		    /*querypos*/Substring_queryend(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/true,
+		    /*trimlength*/0);
+      } else {
+	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
+		    /*querypos*/querylength,querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/Substring_trim_right(acceptor));
+	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
+		    /*querypos*/Substring_queryend(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/0);
+	print_cigar(fp,/*type*/'S',Substring_querystart(acceptor),
+		    /*querypos*/Substring_querystart(acceptor),querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/false,/*lastp*/true,/*trimlength*/0);
+      }
+    }
+
+  } else {
+    /* sensep false, plusp true */
+    /* Doesn't hold for DNA-Seq chimeras */
+    /* assert(Substring_siteA_pos(acceptor) == Substring_queryend(acceptor)); */
+    if (plusp == true) {
+      /* FPRINTF(fp,"acceptor sensep false, plusp true\n"); */
+      if (hide_soft_clips_p == true) {
+	print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
+		    /*querypos*/0,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(acceptor));
+	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor) + (querylength - Substring_queryend(acceptor)),
+		    /*querypos*/Substring_querystart(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/true,
+		    /*trimlength*/0);
+      } else {
+	print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
+		    /*querypos*/0,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(acceptor));
+	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
+		    /*querypos*/Substring_querystart(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(acceptor),
+		    /*querypos*/Substring_queryend(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/true,/*lastp*/true,
+		    /*trimlength*/Substring_trim_right(acceptor));
+      }
+
+    } else {
+      /* sensep false, plusp false */
+      /* FPRINTF(fp,"acceptor sensep false, plusp false\n"); */
+      if (hide_soft_clips_p == true) {
+	print_cigar(fp,/*type*/'M',(querylength - Substring_queryend(acceptor)) + Substring_match_length(acceptor),
+		    /*querypos*/querylength,querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/false,/*lastp*/false,/*trimlength*/0);
+	print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
+		    /*querypos*/Substring_querystart(acceptor),querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/false,/*lastp*/true,/*trimlength*/Substring_trim_left(acceptor));
+      } else {
+	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(acceptor),
+		    /*querypos*/querylength,querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/0);
+	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
+		    /*querypos*/Substring_queryend(acceptor),querylength,
+		    *hardclip_low,*hardclip_high,/*plusp*/false,/*lastp*/false,
+		    /*trimlength*/0);
+	print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
+		    /*querypos*/Substring_querystart(acceptor),querylength,*hardclip_low,*hardclip_high,
+		    /*plusp*/false,/*lastp*/true,/*trimlength*/Substring_trim_left(acceptor));
+      }
+    }
+  }
+
+  return;
+}
+
+
+
+
+static void
+print_exon_exon_cigar (Filestring_T fp, Stage3end_T this, int querylength) {
+  Substring_T donor, acceptor;
+  int sensedir;
+
+  /* Shouldn't have any overlap on a distant splice */
+  int hardclip_low = 0, hardclip_high = 0;
+
+  sensedir = Stage3end_sensedir(this);
+
+  if (sensedir == SENSE_FORWARD) {
+    donor = Stage3end_substring_donor(this);
+    Cigar_print_halfdonor(fp,donor,this,querylength,&hardclip_low,&hardclip_high,/*use_hardclip_p*/true);
+
+  } else if (Stage3end_sensedir(this) == SENSE_ANTI) {
+    acceptor = Stage3end_substring_acceptor(this);
+    Cigar_print_halfacceptor(fp,acceptor,this,querylength,&hardclip_low,&hardclip_high,/*use_hardclip_p*/true);
+
+  } else {
+    /* SENSE_NULL (DNA distant chimera) */
+    acceptor = Stage3end_substring_acceptor(this);
+    Cigar_print_halfacceptor(fp,acceptor,this,querylength,&hardclip_low,&hardclip_high,/*use_hardclip_p*/true);
+  }
+
+  return;
+}
+
+
+
+void
+Cigar_print_mate (Filestring_T fp, Stage3end_T mate, int mate_querylength, int mate_hardclip_low, int mate_hardclip_high) {
+  Hittype_T hittype;
+  int nindels;
+  List_T startp, startq, prevp, nextp, finalp, endp;
+
+
+  if (mate == NULL) {
+    FPRINTF(fp,"*");		/* CIGAR for nomapping */
+
+  } else if ((hittype = Stage3end_hittype(mate)) == GMAP) {
+    Pair_print_tokens(fp,Stage3end_cigar_tokens(mate));
+
+  } else if (hittype == TRANSLOC_SPLICE || (hittype == SAMECHR_SPLICE && merge_samechr_p == false)) {
+    print_exon_exon_cigar(fp,mate,mate_querylength);
+
+  } else {
+    Cigar_print_substrings(&nindels,&startp,&startq,&prevp,&nextp,&finalp,&endp,
+			   fp,mate,mate_querylength,mate_hardclip_low,mate_hardclip_high);
+  }
+
+  return;
+}
+
+
+
+
diff --git a/src/cigar.h b/src/cigar.h
new file mode 100644
index 0000000..2015330
--- /dev/null
+++ b/src/cigar.h
@@ -0,0 +1,33 @@
+/* $Id: cigar.h 206761 2017-05-30 17:39:28Z twu $ */
+#ifndef CIGAR_INCLUDED
+#define CIGAR_INCLUDED
+
+#include "bool.h"
+#include "filestring.h"
+#include "list.h"
+#include "substring.h"
+#include "stage3hr.h"
+
+
+extern int
+Cigar_length (List_T tokens);
+extern void
+Cigar_print_tokens (Filestring_T fp, List_T tokens);
+extern void
+Cigar_print_substrings (int *nindels, List_T *startp, List_T *startq, List_T *prevp, List_T *nextp, List_T *finalp, List_T *endp,
+			Filestring_T fp, Stage3end_T stage3end,
+			int querylength, int hardclip_low, int hardclip_high);
+extern void
+Cigar_print_halfdonor (Filestring_T fp, Substring_T donor, Stage3end_T this,
+		       int querylength, int *hardclip_low, int *hardclip_high,
+		       bool use_hardclip_p);
+extern void
+Cigar_print_halfacceptor (Filestring_T fp, Substring_T acceptor, Stage3end_T this,
+			  int querylength, int *hardclip_low, int *hardclip_high,
+			  bool use_hardclip_p);
+extern void
+Cigar_print_mate (Filestring_T fp, Stage3end_T mate, int mate_querylength, int mate_hardclip_low, int mate_hardclip_high);
+
+
+#endif
+
diff --git a/src/config.h.in b/src/config.h.in
index 815b1a6..66205b3 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -126,9 +126,6 @@
    */
 #undef HAVE_MM_POPCNT
 
-/* Define if you have the MPI library. */
-#undef HAVE_MPI
-
 /* Define to 1 if you have the `munmap' function. */
 #undef HAVE_MUNMAP
 
diff --git a/src/cpuid.c b/src/cpuid.c
index eadd351..efcdfb9 100644
--- a/src/cpuid.c
+++ b/src/cpuid.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: cpuid.c 200231 2016-11-08 00:55:17Z twu $";
+static char rcsid[] = "$Id: cpuid.c 201743 2016-12-16 16:41:11Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -35,7 +35,7 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
   *sse4_1_support_p = _may_i_use_cpu_feature(_FEATURE_SSE4_1);
   *sse4_2_support_p = _may_i_use_cpu_feature(_FEATURE_SSE4_2);
   *avx2_support_p = _may_i_use_cpu_feature(_FEATURE_AVX2 | _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE);
-  *avx512_support_p = _may_i_use_cpu_feature(_FEATURE_512F);
+  *avx512_support_p = _may_i_use_cpu_feature(_FEATURE_AVX512F | _FEATURE_AVX512CD);
 
   return;
 }
@@ -73,14 +73,28 @@ run_cpuid (uint32_t eax, uint32_t ecx, uint32_t *abcd) {
 static int
 check_xcr0_ymm () {
   uint32_t xcr0;
+  uint32_t ymm_xmm = ((1 << 2) | (1 << 1));
 #if defined(_MSC_VER)
   xcr0 = (uint32_t)_xgetbv(0);	/* min VS2010 SP1 compiler is required */
 #else
   __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
 #endif
-  return ((xcr0 & 6) == 6);	/* checking if xmm and ymm state are enabled in XCR0 */
+  return ((xcr0 & ymm_xmm) == ymm_xmm);	/* checking if xmm and ymm state are enabled in XCR0 */
 }
 
+static int
+check_xcr0_zmm () {
+  uint32_t xcr0;
+  uint32_t zmm_ymm_xmm = ((7 << 5) | (1 << 2) | (1 << 1));
+#if defined(_MSC_VER)
+  xcr0 = (uint32_t)_xgetbv(0);	/* min VS2010 SP1 compiler is required */
+#else
+  __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
+#endif
+  return ((xcr0 & zmm_ymm_xmm) == zmm_ymm_xmm);
+}
+
+
 void
 CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p,
 	       bool *avx2_support_p, bool *avx512_support_p) {
@@ -95,12 +109,14 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
   uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27)); /* ecx */
   uint32_t avx2_bmi12_mask = ((1 << 5) | (1 << 3) | (1 << 8)); /* ebx */
   uint32_t lzcnt_mask = (1 << 5); /* ecx */
-  uint32_t avx512_mask = (1 << 16);
+
+  uint32_t osxsave_mask = (1 << 27); /* ecx */
+  uint32_t avx512_mask = ((1 << 16) | (1 << 28));  /* ebx */
 
 
   run_cpuid(1, 0, abcd);
 #ifdef MAIN
-  printf("CPUID          1, 0 returns %08X %08X %08X %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]);
+  printf("CPUID          1, 0 returns EAX %08X EBX %08X ECX %08X EDX %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]);
 #endif
 
   *sse2_support_p = ((abcd[EDX] & sse2_mask) == sse2_mask) ? true : false;
@@ -111,36 +127,40 @@ CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support
 
   if ((abcd[ECX] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) {
     *avx2_support_p = false;
-    *avx512_support_p = false;
   } else if (!check_xcr0_ymm()) {
     *avx2_support_p = false;
-    *avx512_support_p = false;
   } else {
     run_cpuid(7, 0, abcd);
 #ifdef MAIN
-    printf("CPUID          7, 0 returns %08X %08X %08X %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]);
+    printf("CPUID          7, 0 returns EAX %08X EBX %08X ECX %08X EDX %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]);
 #endif
 
     if ((abcd[EBX] & avx2_bmi12_mask) != avx2_bmi12_mask) {
       *avx2_support_p = false;
-      *avx2_support_p = false;
     } else {
       run_cpuid(0x80000001, 0, abcd);
 #ifdef MAIN
-      printf("CPUID 0x80000001, 0 returns %08X %08X %08X %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]);
+      printf("CPUID 0x80000001, 0 returns EAX %08X EBX %08X ECX %08X EDX %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]);
 #endif
 
       if ((abcd[ECX] & lzcnt_mask) != lzcnt_mask) {
 	*avx2_support_p = false;
-	*avx512_support_p = false;
       } else {
 	*avx2_support_p = true;
-	*avx512_support_p = ((abcd[ECX] & avx512_mask) == avx512_mask) ? true : false;
       }
     }
   }
 
-  
+  run_cpuid(1, 0, abcd);
+  if ((abcd[ECX] & osxsave_mask) != osxsave_mask) {
+    *avx512_support_p = false;
+  } else if (!check_xcr0_zmm()) {
+    *avx512_support_p = false;
+  } else if ((abcd[EBX] & avx512_mask) != avx512_mask) {
+    *avx512_support_p = true;	/* Should fail, but book/Web examples skip this check */
+  } else {
+    *avx512_support_p = true;
+  }
 
   return;
 }
diff --git a/src/datadir.c b/src/datadir.c
index 1d461c1..8c4284d 100644
--- a/src/datadir.c
+++ b/src/datadir.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: datadir.c 188038 2016-04-20 17:47:47Z twu $";
+static char rcsid[] = "$Id: datadir.c 207315 2017-06-14 19:28:58Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -85,12 +85,13 @@ find_fileroot (char *genomesubdir, char *genomedir, char *dbroot) {
   if ((dp = opendir(genomesubdir)) == NULL) {
     /* Problem found.  Try to diagnose */
     if ((dp = opendir(genomedir)) == NULL) {
-      fprintf(stderr,"Unable to find genome directory %s.  Either recompile the GMAP package\n",genomedir);
-      fprintf(stderr,"to have the correct default directory (seen by doing gmap --version),\n");
+      fprintf(stderr,"Unable to find genome directory %s\n",genomedir);
+      fprintf(stderr,"Either recompile the GMAP package to have the");
+      fprintf(stderr,"correct default directory (seen by doing gmap --version),\n");
       fprintf(stderr,"or use the -D flag to gmap to specify the correct genome directory.\n");
       exit(9);
     } else {
-      fprintf(stderr,"Unable to find genome %s in directory %s.\n",dbroot,genomedir);
+      fprintf(stderr,"Unable to find genome %s in directory %s\n",dbroot,genomedir);
       fprintf(stderr,"Make sure you have typed the genome correctly, or use the -D flag\n");
       fprintf(stderr,"(or the -F flag for cmetindex or atoiindex) to specify a directory.\n");
       fprintf(stderr,"For example, '-D .' specifies this directory.\n");
diff --git a/src/diag.c b/src/diag.c
index 6c26a2a..f4e3e11 100644
--- a/src/diag.c
+++ b/src/diag.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: diag.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: diag.c 207199 2017-06-12 18:31:34Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -442,7 +442,9 @@ compute_dominance (int *nunique, T *array, int ndiagonals) {
     superend = super->queryend;
 
     expected_nconsecutive = superend + 1 - superstart;
-    if (super->nconsecutive > expected_nconsecutive - 10) {
+    debug(printf("expected_nconsecutive is %d.  actual is %d\n",expected_nconsecutive,super->nconsecutive));
+    if (expected_nconsecutive < 100 && super->nconsecutive > expected_nconsecutive - 10) {
+      /* Short diagonal.  Rely on arithmetic differences */
       threshold = super->nconsecutive - DOMINANCE_END_EQUIV;
       for (j = i+1; j < *nunique; j++) {
 	sub = array[j];
@@ -460,7 +462,29 @@ compute_dominance (int *nunique, T *array, int ndiagonals) {
 	}
       }
       *nunique = k;
+
+    } else if (expected_nconsecutive >= 100 && super->nconsecutive > expected_nconsecutive * 0.90) {
+      /* Long diagonal.  Rely on factor differences */
+      /* Needed for very long diagonals, such as in genome-genome alignment */
+      threshold = super->nconsecutive * 0.80;
+      for (j = i+1; j < *nunique; j++) {
+	sub = array[j];
+	if (sub->querystart >= superstart && sub->queryend <= superend && sub->nconsecutive < threshold) {
+	  sub->dominatedp = true;
+	}
+      }
+
+      /* Shift array to contain non-dominated diagonals */
+      k = i+1;
+      for (j = i+1; j < *nunique; j++) {
+	sub = array[j];
+	if (sub->dominatedp == false) {
+	  array[k++] = array[j];
+	}
+      }
+      *nunique = k;
     }
+
     i++;
   }
 
@@ -468,6 +492,34 @@ compute_dominance (int *nunique, T *array, int ndiagonals) {
 }
   
 
+static T *
+keep_center_diagonal (int *nunique, T *array, Chrpos_T center_diagonal) {
+  int nkeep = 0;
+  int i, k;
+
+
+  for (i = 0; i < *nunique; i++) {
+    if (array[i]->diagonal + 10000 < center_diagonal || array[i]->diagonal > center_diagonal + 10000) {
+      /* Skip */
+    } else {
+      nkeep++;
+    }
+  }
+
+  k = 0;
+  for (i = 0; i < *nunique; i++) {
+    if (array[i]->diagonal + 10000 < center_diagonal || array[i]->diagonal > center_diagonal + 10000) {
+      /* Skip */
+    } else {
+      array[k++] = array[i];
+    }
+  }
+
+  *nunique = nkeep;
+  return array;
+}
+
+
 static void
 assign_scores (List_T diagonals, int querylength) {
   int querypos;
@@ -559,7 +611,9 @@ Diag_compute_bounds (int *diag_querystart, int *diag_queryend,
 		     Chrpos_T chrstart, Chrpos_T chrend,
 		     Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp) {
   int nunique, ndiagonals, ngooddiagonals, i, j;
-  Chrpos_T diagonal;
+  Chrpos_T diagonal, min_diagonal, max_diagonal, center_diagonal;
+  int *bins, maxcount;
+  int nbins, bini;
   Chrpos_T genomiclength, position, chrinit, chrterm;
   int activestart, activeend;
   int querypos;
@@ -636,6 +690,42 @@ Diag_compute_bounds (int *diag_querystart, int *diag_queryend,
     printf("End of diagonals\n\n");
 #endif
 
+    if (nunique > 100) {
+      min_diagonal = array[0]->diagonal;
+      max_diagonal = array[nunique-1]->diagonal;
+      nbins = (max_diagonal - min_diagonal)/10000 + 1;
+      bins = (int *) CALLOC(nbins,sizeof(int));
+      for (i = 0; i < nunique; i++) {
+	bini = (array[i]->diagonal - min_diagonal)/10000;
+	bins[bini] += 1;
+      }
+      maxcount = 0;
+      /* max_bini = -1; */
+      diagonal = min_diagonal;
+      for (bini = 0; bini < nbins; bini++) {
+	if (bins[bini] > maxcount) {
+	  maxcount = bins[bini];
+	  /* max_bini = bini; */
+	  center_diagonal = diagonal;
+	}
+	diagonal += 10000;
+      }
+      center_diagonal += 5000;
+      debug0(printf("max bin is %d with count %d => center diagonal %u\n",
+		    max_bini,maxcount,center_diagonal));
+
+      array = keep_center_diagonal(&nunique,array,center_diagonal);
+      FREE(bins);
+
+#ifdef DEBUG0
+      printf("Start of center diagonals:\n");
+      for (i = 0; i < nunique; i++) {
+	print_segment(array[i],chrinit,/*queryseq_ptr*/NULL,/*genomicseg_ptr*/NULL);
+      }
+      printf("End of center diagonals\n\n");
+#endif
+    }
+
     /* Find end regions */
 #ifdef ACTIVE_BUFFER
     /* Allow buffer on 5' end to make sure we identify the best initial exon */
diff --git a/src/dynprog.c b/src/dynprog.c
index 21cb6c0..5b41b60 100644
--- a/src/dynprog.c
+++ b/src/dynprog.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog.c 188752 2016-05-01 17:28:22Z twu $";
+static char rcsid[] = "$Id: dynprog.c 203538 2017-02-15 00:45:08Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -134,7 +134,7 @@ Dynprog_score (int matches, int mismatches, int qopens, int qindels, int topens,
 }
 
 
-#if !defined(HAVE_SSE2) || defined(DEBUG_SIMD)
+#if !defined(HAVE_SSE2) || defined(DEBUG2) || defined(DEBUG_SIMD)
 static char complCode[128] = COMPLEMENT_LC;
 
 static char
@@ -809,6 +809,7 @@ Dynprog_free (T *old) {
 /************************************************************************/
 
 /* These are extern arrays, used by all dynprog procedures */
+int use8p_size[NMISMATCHTYPES];
 Pairdistance_T **pairdistance_array[NMISMATCHTYPES];
 #ifndef HAVE_SSE4_1
 Pairdistance_T **pairdistance_array_plus_128[NMISMATCHTYPES];
@@ -915,6 +916,12 @@ Dynprog_init (Mode_T mode) {
   nt_to_int_array['T'] = nt_to_int_array['t'] = 3;
 
 
+  use8p_size[HIGHQ] = NEG_INFINITY_8 / MISMATCH_HIGHQ - 1;
+  use8p_size[MEDQ] = NEG_INFINITY_8 / MISMATCH_MEDQ - 1;
+  use8p_size[LOWQ] = NEG_INFINITY_8 / MISMATCH_LOWQ - 1;
+  use8p_size[ENDQ] = NEG_INFINITY_8 / MISMATCH_ENDQ - 1;
+  /* printf("use8p_sizes: %d %d %d %d\n",use8p_size[HIGHQ],use8p_size[MEDQ],use8p_size[LOWQ],use8p_size[ENDQ]); */
+
   consistent_array = (bool **) CALLOC(128,sizeof(bool *));
   consistent_array[0] = (bool *) CALLOC(128*128,sizeof(bool));
   ptr = 0;
@@ -1128,6 +1135,7 @@ Dynprog_standard (Direction32_T ***directions_nogap, Direction32_T ***directions
 
   pairdistance_array_type = pairdistance_array[mismatchtype];
 
+  debug2(printf("Dynprog_standard.  jump_late_p %d, open %d, extend %d\n",jump_late_p,open,extend));
   debug(printf("compute_scores_standard: "));
   debug(printf("Lengths are %d and %d, so bands are %d on left and %d on right\n",rlength,glength,lband,uband));
 
@@ -1608,6 +1616,7 @@ Dynprog_standard (Direction32_T ***directions_nogap, Direction32_T ***directions
     }
   }
 
+  debug2(printf("STD: Dynprog_standard\n"));
   debug2(Dynprog_Matrix32_print(matrix,rlength,glength,rsequence,gsequence,gsequence_alt,
 				goffset,chroffset,chrhigh,watsonp,revp,lband,uband));
   debug2(Dynprog_Directions32_print(*directions_nogap,*directions_Egap,*directions_Fgap,
diff --git a/src/dynprog.h b/src/dynprog.h
index 3b43485..17763ae 100644
--- a/src/dynprog.h
+++ b/src/dynprog.h
@@ -1,4 +1,4 @@
-/* $Id: dynprog.h 188752 2016-05-01 17:28:22Z twu $ */
+/* $Id: dynprog.h 202041 2017-01-01 15:40:08Z twu $ */
 #ifndef DYNPROG_INCLUDED
 #define DYNPROG_INCLUDED
 #ifdef HAVE_CONFIG_H
@@ -150,6 +150,7 @@ typedef int Direction32_T;
    might be lower-case */
 #define PREUC 1			
 
+extern int use8p_size[NMISMATCHTYPES];
 extern Pairdistance_T **pairdistance_array[NMISMATCHTYPES];
 #ifndef HAVE_SSE4_1
 extern Pairdistance_T **pairdistance_array_plus_128[NMISMATCHTYPES];
diff --git a/src/dynprog_cdna.c b/src/dynprog_cdna.c
index 7e0ee13..8db0b1b 100644
--- a/src/dynprog_cdna.c
+++ b/src/dynprog_cdna.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_cdna.c 184458 2016-02-18 00:06:33Z twu $";
+static char rcsid[] = "$Id: dynprog_cdna.c 202042 2017-01-01 15:40:38Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -923,7 +923,7 @@ Dynprog_cdna_gap (int *dynprogindex, int *finalscore, bool *incompletep,
 
 #if defined(HAVE_SSE2)
   /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
-  if (glength <= SIMD_MAXLENGTH_EPI8 || (rlengthL <= SIMD_MAXLENGTH_EPI8 && rlengthR <= SIMD_MAXLENGTH_EPI8)) {
+  if (glength < use8p_size[mismatchtype] || (rlengthL < use8p_size[mismatchtype] && rlengthR <= use8p_size[mismatchtype])) {
     use8p = true;
   } else {
     use8p = false;
diff --git a/src/dynprog_end.c b/src/dynprog_end.c
index 34361e3..bf1740f 100644
--- a/src/dynprog_end.c
+++ b/src/dynprog_end.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_end.c 184458 2016-02-18 00:06:33Z twu $";
+static char rcsid[] = "$Id: dynprog_end.c 205259 2017-04-12 23:55:04Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1272,7 +1272,8 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 		  int rlength, int glength, int rev_roffset, int rev_goffset, 
 		  Univcoord_T chroffset, Univcoord_T chrhigh,
 		  bool watsonp, bool jump_late_p, Pairpool_T pairpool,
-		  int extraband_end, double defect_rate, Endalign_T endalign) {
+		  int extraband_end, double defect_rate, Endalign_T endalign,
+		  bool require_pos_score_p) {
   List_T pairs = NULL;
   char *rev_gsequence, *rev_gsequence_alt;
   Pair_T pair;
@@ -1369,7 +1370,7 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
     Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true);
 #if defined(HAVE_SSE2)
     /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
-    if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) {
+    if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) {
       use8p = true;
       matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog,
 					   rev_rsequence,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]),
@@ -1426,10 +1427,11 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 #endif
 
   } else if (endalign == QUERYEND_INDELS) {
-    Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true);
+    /* Wideband needs to be false, or else we will rediscover the end exon as an indel */
+    Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/false);
 #if defined(HAVE_SSE2)
     /* Use || because we want the minimum length (which determines the diagonal length) to achive a score less than 128 */
-    if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) {
+    if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) {
 
 
       use8p = true;
@@ -1492,7 +1494,7 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 
   } else if (endalign == QUERYEND_NOGAPS) {
     find_best_endpoint_to_queryend_nogaps(&bestr,&bestc,rlength,glength);
-    /* *finalscore = 0;	-- Splicetrie procedures need to know finalscore */
+    /* *finalscore = 0; -- Splicetrie procedures need to know finalscore */
 
   } else {
     fprintf(stderr,"Unexpected endalign value %d\n",endalign);
@@ -1524,6 +1526,10 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 			     /*revp*/true,*dynprogindex);
     *finalscore = (*nmatches)*FULLMATCH + (*nmismatches)*MISMATCH_ENDQ;
 
+  } else if (require_pos_score_p == true && *finalscore <= 0) {
+    /* Can skip traceback */
+    pairs = (List_T) NULL;
+
 #if defined(HAVE_SSE2)
   } else if (use8p == true) {
     if (bestc >= bestr) {
@@ -1574,7 +1580,7 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
   if ((endalign == QUERYEND_GAP || endalign == BEST_LOCAL) && (*nmatches + 1) < *nmismatches) {
     *finalscore = 0;
     /* No need to free pairs */
-    pairs = NULL;
+    pairs = (List_T) NULL;
   } else {
     /* Add 1 to count the match already in the alignment */
     pairs = List_reverse(pairs); /* Look at 5' end to remove excess gaps */
@@ -1685,7 +1691,7 @@ Dynprog_end5_splicejunction (int *dynprogindex, int *finalscore, int *missscore,
   Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true);
 #if defined(HAVE_SSE2)
   /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
-  if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) {
+  if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) {
     use8p = true;
     matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog,
 					 rev_rsequence,rev_gsequence,rev_gsequence_alt,
@@ -1866,7 +1872,8 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 		  int rlength, int glength, int roffset, int goffset, 
 		  Univcoord_T chroffset, Univcoord_T chrhigh,
 		  bool watsonp, bool jump_late_p, Pairpool_T pairpool,
-		  int extraband_end, double defect_rate, Endalign_T endalign) {
+		  int extraband_end, double defect_rate, Endalign_T endalign,
+		  bool require_pos_score_p) {
   List_T pairs = NULL;
   char *gsequence, *gsequence_alt;
   Pair_T pair;
@@ -1959,7 +1966,7 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
     Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true);
 #if defined(HAVE_SSE2)
     /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
-    if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) {
+    if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) {
       use8p = true;
       matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog,
 					   rsequenceuc,gsequence,gsequence_alt,rlength,glength,
@@ -2009,10 +2016,11 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 #endif
 
   } else if (endalign == QUERYEND_INDELS) {
-    Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true);
+    /* Wideband needs to be false, or else we will rediscover the end exon as an indel */
+    Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/false);
 #if defined(HAVE_SSE2)
     /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
-    if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) {
+    if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) {
       use8p = true;
       matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog,
 					   rsequenceuc,gsequence,gsequence_alt,rlength,glength,
@@ -2096,6 +2104,10 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 			     /*revp*/false,*dynprogindex);
     *finalscore = (*nmatches)*FULLMATCH + (*nmismatches)*MISMATCH_ENDQ;
 
+  } else if (require_pos_score_p == true && *finalscore <= 0) {
+    /* Can skip traceback */
+    pairs = (List_T) NULL;
+    
 #if defined(HAVE_SSE2)
   } else if (use8p == true) {
     if (bestc >= bestr) {
@@ -2255,7 +2267,7 @@ Dynprog_end3_splicejunction (int *dynprogindex, int *finalscore, int *missscore,
   Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true);
 #if defined(HAVE_SSE2)
   /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
-  if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) {
+  if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) {
     use8p = true;
     matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog,
 					 rsequenceuc,gsequence,gsequence_alt,rlength,glength,
@@ -2718,8 +2730,9 @@ Dynprog_end5_known (bool *knownsplicep, int *dynprogindex, int *finalscore,
 				&(*nopens),&(*nindels),dynprog,rev_rsequence,rev_rsequenceuc,
 				rlength,glength,rev_roffset,rev_goffset,chroffset,chrhigh,
 				watsonp,jump_late_p,pairpool,
-				extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS);
-  if (*finalscore < 0) {
+				extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,
+				/*require_pos_score_p*/true);
+  if (*finalscore <= 0) {
     orig_score = 0;
     orig_pairs = best_pairs = (List_T) NULL;
   } else {
@@ -2878,7 +2891,7 @@ Dynprog_end5_known (bool *knownsplicep, int *dynprogindex, int *finalscore,
 				    &(*nopens),&(*nindels),dynprog,rev_rsequence,rev_rsequenceuc,
 				    rlength,glength,rev_roffset,rev_goffset,chroffset,chrhigh,
 				    watsonp,jump_late_p,pairpool,
-				    extraband_end,defect_rate,/*endalign*/BEST_LOCAL);
+				    extraband_end,defect_rate,/*endalign*/BEST_LOCAL,/*require_pos_score_p*/false);
       debug7(Pair_dump_list(orig_pairs,/*zerobasedp*/true));
       debug7(printf("End of dynprog end5 known\n"));
       *knownsplicep = false;
@@ -2975,8 +2988,9 @@ Dynprog_end3_known (bool *knownsplicep, int *dynprogindex, int *finalscore,
 				&(*nopens),&(*nindels),dynprog,rsequence,rsequenceuc,
 				rlength,glength,roffset,goffset,chroffset,chrhigh,
 				watsonp,jump_late_p,pairpool,
-				extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS);
-  if (*finalscore < 0) {
+				extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,
+				/*require_pos_score_p*/true);
+  if (*finalscore <= 0) {
     orig_score = 0;
     orig_pairs = best_pairs = (List_T) NULL;
   } else {
@@ -3135,7 +3149,8 @@ Dynprog_end3_known (bool *knownsplicep, int *dynprogindex, int *finalscore,
 				    &(*nopens),&(*nindels),dynprog,rsequence,rsequenceuc,
 				    rlength,glength,roffset,goffset,chroffset,chrhigh,
 				    watsonp,jump_late_p,pairpool,
-				    extraband_end,defect_rate,/*endalign*/BEST_LOCAL);
+				    extraband_end,defect_rate,/*endalign*/BEST_LOCAL,
+				    /*require_pos_score_p*/false);
       debug7(Pair_dump_list(orig_pairs,/*zerobasedp*/true));
       *knownsplicep = false;
       debug7(printf("End of dynprog end5 known\n"));
diff --git a/src/dynprog_end.h b/src/dynprog_end.h
index 72380fb..b686fb2 100644
--- a/src/dynprog_end.h
+++ b/src/dynprog_end.h
@@ -1,4 +1,4 @@
-/* $Id: dynprog_end.h 184458 2016-02-18 00:06:33Z twu $ */
+/* $Id: dynprog_end.h 202043 2017-01-01 15:41:51Z twu $ */
 #ifndef DYNPROG_END_INCLUDED
 #define DYNPROG_END_INCLUDED
 
@@ -27,7 +27,8 @@ Dynprog_end5_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 		  int length1, int length2, int revoffset1, int revoffset2, 
 		  Univcoord_T chroffset, Univcoord_T chrhigh,
 		  bool watsonp, bool jump_late_p, Pairpool_T pairpool,
-		  int extraband_end, double defect_rate, Endalign_T endalign);
+		  int extraband_end, double defect_rate, Endalign_T endalign,
+		  bool require_pos_score_p);
 
 extern List_T
 Dynprog_end5_splicejunction (int *dynprogindex, int *finalscore, int *missscore,
@@ -46,7 +47,8 @@ Dynprog_end3_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmisma
 		  int length1, int length2, int offset1, int offset2, 
 		  Univcoord_T chroffset, Univcoord_T chrhigh,
 		  bool watsonp, bool jump_late_p, Pairpool_T pairpool,
-		  int extraband_end, double defect_rate, Endalign_T endalign);
+		  int extraband_end, double defect_rate, Endalign_T endalign,
+		  bool require_pos_score_p);
 
 extern List_T
 Dynprog_end3_splicejunction (int *dynprogindex, int *finalscore, int *missscore,
diff --git a/src/dynprog_genome.c b/src/dynprog_genome.c
index 8ce3097..d2223ed 100644
--- a/src/dynprog_genome.c
+++ b/src/dynprog_genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_genome.c 198278 2016-09-24 00:47:16Z twu $";
+static char rcsid[] = "$Id: dynprog_genome.c 207852 2017-06-29 20:32:58Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -77,7 +77,7 @@ static char rcsid[] = "$Id: dynprog_genome.c 198278 2016-09-24 00:47:16Z twu $";
 
 
 #define USE_SCOREI 1
-#define USE_WEAK_SCOREI 1
+/* #define USE_WEAK_SCOREI 1 */
 
 #define PROB_CEILING 0.85
 #define PROB_FLOOR 0.50
@@ -92,24 +92,28 @@ static char rcsid[] = "$Id: dynprog_genome.c 198278 2016-09-24 00:47:16Z twu $";
 #define FINAL_GCAG_INTRON 4    /* Amount above regular should approximately
 				   match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */
 #define FINAL_ATAC_INTRON 2
+
 #else
-#define GCAG_INTRON 15
-#define ATAC_INTRON 12
-#define FINAL_GCAG_INTRON 20    /* Amount above regular should approximately
+/* Values were 15, 12, 20, and 12 */
+#define GCAG_INTRON 8
+#define ATAC_INTRON 4
+#define FINAL_GCAG_INTRON 10    /* Amount above regular should approximately
 				   match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */
-#define FINAL_ATAC_INTRON 12
+#define FINAL_ATAC_INTRON 8
 #endif
 
 
 /* Don't want to make too high, otherwise we will harm evaluation of
    dual introns vs. single intron */
-#define CANONICAL_INTRON_HIGHQ 10 /* GT-AG */
-#define CANONICAL_INTRON_MEDQ  16
+/* Values were 10, 16, 22 */
+#define CANONICAL_INTRON_HIGHQ 14 /* GT-AG */
+#define CANONICAL_INTRON_MEDQ  18
 #define CANONICAL_INTRON_LOWQ  22
 
-#define FINAL_CANONICAL_INTRON_HIGHQ 30 /* GT-AG */
-#define FINAL_CANONICAL_INTRON_MEDQ  36
-#define FINAL_CANONICAL_INTRON_LOWQ  42
+/* Values were 30, 36, 42 */
+#define FINAL_CANONICAL_INTRON_HIGHQ 16 /* GT-AG */
+#define FINAL_CANONICAL_INTRON_MEDQ  20
+#define FINAL_CANONICAL_INTRON_LOWQ  24
 
 #define KNOWN_SPLICESITE_REWARD 20
 
@@ -562,10 +566,11 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
   int rL, rR, cL, cR;
   int cloL, chighL;
   int cloR, chighR;
-  int bestscore = NEG_INFINITY_8, score, scoreL, scoreR;
+  int bestscore = NEG_INFINITY_8, score, scoreL, scoreI, scoreR;
   Univcoord_T splicesitepos1, splicesitepos2;
   bool bestp;
 
+  scoreI = 0;			/* Because we constrain splices to given introns */
 
   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
     debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
@@ -607,7 +612,7 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) { /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -651,7 +656,7 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -710,7 +715,7 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -754,7 +759,7 @@ bridge_intron_gap_8_intron_level (int *bestrL, int *bestrR, int *bestcL, int *be
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -833,8 +838,9 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
     left1_alt = gsequenceL_alt[cL];
     left2 = gsequenceL[cL+1];
     left2_alt = gsequenceL_alt[cL+1];
-    assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp));
-    assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp));
+    /* Assertions may not hold for transcriptome alignment */
+    /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
+    /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
 
     if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
       leftdi[cL] = LEFT_GT;
@@ -857,8 +863,9 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
     right2_alt = rev_gsequenceR_alt[-cR-1];
     right1 = rev_gsequenceR[-cR];
     right1_alt = rev_gsequenceR_alt[-cR];
-    assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp));
-    assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp));
+    /* Assertions may not hold for transcriptome alignment */
+    /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
+    /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
 
     if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
       rightdi[cR] = RIGHT_AG;
@@ -1004,7 +1011,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
     scoreI = 0;
 #endif
 
-    if ((score = scoreL + scoreR) > bestscore) {
+    if ((score = scoreL + scoreI + scoreR) > bestscore) {
       debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1033,7 +1040,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
 		     cL,cR,scoreI,probL,probR,probL+probR));
       if (probL + probR > bestprob_with_dinucl) {
-	bestscore_with_dinucl = scoreL + scoreR;
+	bestscore_with_dinucl = scoreL + scoreI + scoreR;
 	bestcL_with_dinucl = cL;
 	bestcR_with_dinucl = cR;
 	bestrL_with_dinucl = rL;
@@ -1043,24 +1050,28 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
     }
 
 
-    debug3(printf("B. Test indel on right\n"));
+    debug3(printf("B. Test indel on right (1)\n"));
     /* Test indel on right */
     cL = rL;
     probL = left_probabilities[cL];
     scoreL = (int) matrixL_upper[cL][rL];
+#if 0
     if (directionsL_upper_nogap[cL][rL] != DIAG) {
       /* Favor gaps away from intron if possible */
       scoreL -= 100;
     }
+#endif
 
     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
     for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
       probR = right_probabilities[cR];
       scoreR = (int) matrixR_lower[rR][cR];
+#if 0
       if (directionsR_lower_nogap[rR][cR] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreR -= 100;
       }
+#endif
 	
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -1068,7 +1079,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       scoreI = 0;
 #endif
 	
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1090,6 +1101,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       } else {
 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+	debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
       }
     }
 
@@ -1097,10 +1109,12 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
     for (/*skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
       probR = right_probabilities[cR];
       scoreR = (int) matrixR_upper[cR][rR];
+#if 0
       if (directionsR_upper_nogap[cR][rR] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreR -= 100;
       }
+#endif
 	
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -1108,7 +1122,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       scoreI = 0;
 #endif
 	
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1130,27 +1144,32 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       } else {
 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+	debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
       }
     }
 
-    debug3(printf("C. Test indel on left\n"));
+    debug3(printf("C. Test indel on left (1)\n"));
     /* Test indel on left */
     cR = rR;
     probR = right_probabilities[cR];
     scoreR = (int) matrixR_upper[cR][rR];
+#if 0
     if (directionsR_upper_nogap[cR][rR] != DIAG) {
       /* Favor gaps away from intron if possible */
       scoreR -= 100;
     }
+#endif
 
     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
     for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
       probL = left_probabilities[cL];
       scoreL = (int) matrixL_lower[rL][cL];
+#if 0
       if (directionsL_lower_nogap[rL][cL] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreL -= 100;
       }
+#endif
 
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -1158,7 +1177,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       scoreI = 0;
 #endif
 
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1180,6 +1199,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       } else {
 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+	debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
       }
     }
 
@@ -1187,10 +1207,12 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
     for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
       probL = left_probabilities[cL];
       scoreL = (int) matrixL_upper[cL][rL];
+#if 0
       if (directionsL_upper_nogap[cL][rL] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreL -= 100;
       }
+#endif
 
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -1198,7 +1220,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       scoreI = 0;
 #endif
 
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1220,6 +1242,7 @@ bridge_intron_gap_8_site_level (int *bestrL, int *bestrR, int *bestcL, int *best
       } else {
 	debug3a(printf("Not best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		       cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
+	debug3a(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
       }
     }
   }
@@ -1386,10 +1409,11 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
   int rL, rR, cL, cR;
   int cloL, chighL;
   int cloR, chighR;
-  int bestscore = NEG_INFINITY_16, score, scoreL, scoreR;
+  int bestscore = NEG_INFINITY_16, score, scoreL, scoreI, scoreR;
   Univcoord_T splicesitepos1, splicesitepos2;
   bool bestp;
 
+  scoreI = 0;			/* Because we constrain splices to given introns */
 
   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
     debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
@@ -1431,7 +1455,7 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -1475,7 +1499,7 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -1534,7 +1558,7 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -1578,7 +1602,7 @@ bridge_intron_gap_16_intron_level (int *bestrL, int *bestrR, int *bestcL, int *b
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -1657,8 +1681,9 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
     left1_alt = gsequenceL_alt[cL];
     left2 = gsequenceL[cL+1];
     left2_alt = gsequenceL_alt[cL+1];
-    assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp));
-    assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp));
+    /* Assertions may not hold for transcriptome alignment */
+    /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
+    /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
 
     if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
       leftdi[cL] = LEFT_GT;
@@ -1681,8 +1706,9 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
     right2_alt = rev_gsequenceR_alt[-cR-1];
     right1 = rev_gsequenceR[-cR];
     right1_alt = rev_gsequenceR_alt[-cR];
-    assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp));
-    assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp));
+    /* Assertions may not hold for transcriptome alignment */
+    /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
+    /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
 
     if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
       rightdi[cR] = RIGHT_AG;
@@ -1828,7 +1854,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
     scoreI = 0;
 #endif
 	
-    if ((score = scoreL + scoreR) > bestscore) {
+    if ((score = scoreL + scoreI + scoreR) > bestscore) {
       debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1857,7 +1883,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
       debug3(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
 		    cL,cR,scoreI,probL,probR,probL+probR));
       if (probL + probR > bestprob_with_dinucl) {
-	bestscore_with_dinucl = scoreL + scoreR;
+	bestscore_with_dinucl = scoreL + scoreI + scoreR;
 	bestcL_with_dinucl = cL;
 	bestcR_with_dinucl = cR;
 	bestrL_with_dinucl = rL;
@@ -1872,19 +1898,23 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
     cL = rL;
     probL = left_probabilities[cL];
     scoreL = (int) matrixL_upper[cL][rL];
+#if 0
     if (directionsL_upper_nogap[cL][rL] != DIAG) {
       /* Favor gaps away from intron if possible */
       scoreL -= 100;
     }
+#endif
 
     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
     for (cR = cloR; cR < /*to main diagonal*/rR && cR < rightoffset-leftoffset-cL; cR++) {
       probR = right_probabilities[cR];
       scoreR = (int) matrixR_lower[rR][cR];
+#if 0
       if (directionsR_lower_nogap[rR][cR] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreR -= 100;
       }
+#endif
 	
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -1892,7 +1922,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
       scoreI = 0;
 #endif
 	
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1921,10 +1951,12 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
     for (/*Skip main diagonal*/cR++; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
       probR = right_probabilities[cR];
       scoreR = (int) matrixR_upper[cR][rR];
+#if 0
       if (directionsR_upper_nogap[cR][rR] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreR -= 100;
       }
+#endif
 	
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -1932,7 +1964,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
       scoreI = 0;
 #endif
 	
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -1958,24 +1990,28 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
     }
 
 
-    debug3(printf("C. Test indel on left\n"));
+    debug3(printf("C. Test indel on left (2)\n"));
     /* Test indel on left */
     cR = rR;
     probR = right_probabilities[cR];
     scoreR = (int) matrixR_upper[cR][rR];
+#if 0
     if (directionsR_upper_nogap[cR][rR] != DIAG) {
       /* Favor gaps away from intron if possible */
       scoreR -= 100;
     }
+#endif
 
     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
     for (cL = cloL; cL < /*to main diagonal*/rL && cL < rightoffset-leftoffset-cR; cL++) {
       probL = left_probabilities[cL];
       scoreL = (int) matrixL_lower[rL][cL];
+#if 0
       if (directionsL_lower_nogap[rL][cL] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreL -= 100;
       }
+#endif
 
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -1983,7 +2019,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
       scoreI = 0;
 #endif
 
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2012,10 +2048,12 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
     for (/*Skip main diagonal*/cL++; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
       probL = left_probabilities[cL];
       scoreL = (int) matrixL_upper[cL][rL];
+#if 0
       if (directionsL_upper_nogap[cL][rL] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreL -= 100;
       }
+#endif
 
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -2023,7 +2061,7 @@ bridge_intron_gap_16_site_level (int *bestrL, int *bestrR, int *bestcL, int *bes
       scoreI = 0;
 #endif
 
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2212,10 +2250,11 @@ bridge_intron_gap_intron_level (int *bestrL, int *bestrR, int *bestcL, int *best
   int rL, rR, cL, cR;
   int cloL, chighL;
   int cloR, chighR;
-  int bestscore = NEG_INFINITY_32, score, scoreL, scoreR;
+  int bestscore = NEG_INFINITY_32, score, scoreL, scoreI, scoreR;
   Univcoord_T splicesitepos1, splicesitepos2;
   bool bestp;
 
+  scoreI = 0;			/* Because we constrain splices to given introns */
 
   for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) {
     debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR));
@@ -2257,7 +2296,7 @@ bridge_intron_gap_intron_level (int *bestrL, int *bestrR, int *bestcL, int *best
 	    }
 #endif
 
-	    if ((score = scoreL + scoreR) > bestscore ||
+	    if ((score = scoreL + scoreI + scoreR) > bestscore ||
 		(score >= bestscore && jump_late_p)) {  /* Use >= for jump late */
 	      bestp = false;
 	      if (watsonp == true) {
@@ -2334,8 +2373,9 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
     left1_alt = gsequenceL_alt[cL];
     left2 = gsequenceL[cL+1];
     left2_alt = gsequenceL_alt[cL+1];
-    assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp));
-    assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp));
+    /* Assertions may not hold for transcriptome alignment */
+    /* assert(left1 == get_genomic_nt(&left1_alt,goffsetL+cL,chroffset,chrhigh,watsonp)); */
+    /* assert(left2 == get_genomic_nt(&left2_alt,goffsetL+cL+1,chroffset,chrhigh,watsonp)); */
 
     if ((left1 == 'G' || left1_alt == 'G') && (left2 == 'T' || left2_alt == 'T')) {
       leftdi[cL] = LEFT_GT;
@@ -2358,8 +2398,9 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
     right2_alt = rev_gsequenceR_alt[-cR-1];
     right1 = rev_gsequenceR[-cR];
     right1_alt = rev_gsequenceR_alt[-cR];
-    assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp));
-    assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp));
+    /* Assertions may not hold for transcriptome alignment */
+    /* assert(right2 == get_genomic_nt(&right2_alt,rev_goffsetR-cR-1,chroffset,chrhigh,watsonp)); */
+    /* assert(right1 == get_genomic_nt(&right1_alt,rev_goffsetR-cR,chroffset,chrhigh,watsonp)); */
 
     if ((right2 == 'A' || right2_alt == 'A') && (right1 == 'G' || right1_alt == 'G')) {
       rightdi[cR] = RIGHT_AG;
@@ -2505,7 +2546,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
     scoreI = 0;
 #endif
 
-    if ((score = scoreL + scoreR) > bestscore) {
+    if ((score = scoreL + scoreI + scoreR) > bestscore) {
       debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		    cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
       debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2534,7 +2575,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
       debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n",
 		     cL,cR,scoreI,probL,probR,probL+probR));
       if (probL + probR > bestprob_with_dinucl) {
-	bestscore_with_dinucl = scoreL + scoreR;
+	bestscore_with_dinucl = scoreL + scoreI + scoreR;
 	bestcL_with_dinucl = cL;
 	bestcR_with_dinucl = cR;
 	bestrL_with_dinucl = rL;
@@ -2549,19 +2590,23 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
     cL = rL;
     probL = left_probabilities[cL];
     scoreL = (int) matrixL[cL][rL];
+#if 0
     if (directionsL_nogap[cL][rL] != DIAG) {
       /* Favor gaps away from intron if possible */
       scoreL -= 100;
     }
+#endif
 
     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
     for (cR = cloR; cR < chighR && cR < rightoffset-leftoffset-cL; cR++) {
       probR = right_probabilities[cR];
       scoreR = (int) matrixR[cR][rR];
+#if 0
       if (directionsR_nogap[cR][rR] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreR -= 100;
       }
+#endif
 	      
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -2569,7 +2614,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
       scoreI = 0;
 #endif
 	
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -2594,24 +2639,28 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
       }
     }
 
-    debug3(printf("C. Test indel on left\n"));
+    debug3(printf("C. Test indel on left (3)\n"));
     /* Test indel on left */
     cR = rR;
     probR = right_probabilities[cR];
     scoreR = (int) matrixR[cR][rR];
+#if 0
     if (directionsR_nogap[cR][rR] != DIAG) {
       /* Favor gaps away from intron if possible */
       scoreR -= 100;
     }
+#endif
 
     /* Disallow leftoffset + cL >= rightoffset - cR, or cR >= rightoffset - leftoffset - cL */
     for (cL = cloL; cL < chighL && cL < rightoffset-leftoffset-cR; cL++) {
       probL = left_probabilities[cL];
       scoreL = (int) matrixL[cL][rL];
+#if 0
       if (directionsL_nogap[cL][rL] != DIAG) {
 	/* Favor gaps away from intron if possible */
 	scoreL -= 100;
       }
+#endif
 
 #ifdef USE_SCOREI
       scoreI = intron_score(&introntype,leftdi[cL],rightdi[cR],cdna_direction,canonical_reward,finalp);
@@ -2619,7 +2668,7 @@ bridge_intron_gap_site_level (int *bestrL, int *bestrR, int *bestcL, int *bestcR
       scoreI = 0;
 #endif
 	
-      if ((score = scoreL + scoreR) > bestscore) {
+      if ((score = scoreL + scoreI + scoreR) > bestscore) {
 	debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n",
 		      cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR));
 	debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR]));
@@ -3277,7 +3326,7 @@ Dynprog_genome_gap (int *dynprogindex, int *finalscore, int *new_leftgenomepos,
 
 #if defined(HAVE_SSE2)
   /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
-  if (rlength <= SIMD_MAXLENGTH_EPI8 || (glengthL <= SIMD_MAXLENGTH_EPI8 && glengthR <= SIMD_MAXLENGTH_EPI8)) {
+  if (rlength < use8p_size[mismatchtype] || (glengthL < use8p_size[mismatchtype] && glengthR < use8p_size[mismatchtype])) {
     use8p = true;
   } else {
     use8p = false;
diff --git a/src/dynprog_simd.c b/src/dynprog_simd.c
index 91e8a50..7ff8cf3 100644
--- a/src/dynprog_simd.c
+++ b/src/dynprog_simd.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_simd.c 189207 2016-05-06 23:16:32Z twu $";
+static char rcsid[] = "$Id: dynprog_simd.c 202044 2017-01-01 15:43:24Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -216,7 +216,9 @@ Matrix8_print (Score8_T **matrix, int rlength, int glength, char *rsequence,
   int i, j;
   char g, g_alt;
 
+#ifdef HAVE_SSE2
   _mm_lfence();
+#endif
 
   /* j */
   printf("   ");		/* For i */
@@ -291,7 +293,9 @@ Matrix8_print_ud (Score8_T **matrix, int rlength, int glength, char *rsequence,
   int i, j;
   char g, g_alt;
 
+#ifdef HAVE_SSE2
   _mm_lfence();
+#endif
 
   /* j */
   printf("   ");		/* For i */
@@ -380,7 +384,9 @@ Matrix16_print (Score16_T **matrix, int rlength, int glength, char *rsequence,
   int i, j;
   char g, g_alt;
 
+#ifdef HAVE_SSE2
   _mm_lfence();
+#endif
 
   /* j */
   if (rlength >= 100) {
@@ -516,7 +522,9 @@ Matrix16_print_ud (Score16_T **matrix, int rlength, int glength, char *rsequence
   int i, j;
   char g, g_alt;
 
+#ifdef HAVE_SSE2
   _mm_lfence();
+#endif
 
   /* j */
   printf("   ");		/* For i */
@@ -606,7 +614,9 @@ Directions8_print (Direction8_T **directions_nogap, Direction8_T **directions_Eg
   int i, j;
   char g, g_alt;
 
+#ifdef HAVE_SSE2
   _mm_lfence();
+#endif
 
   /* j */
   printf("   ");		/* For i */
@@ -700,7 +710,9 @@ Directions8_print_ud (Direction8_T **directions_nogap, Direction8_T **directions
   int i, j;
   char g, g_alt;
 
+#ifdef HAVE_SSE2
   _mm_lfence();
+#endif
 
   /* j */
   printf("   ");		/* For i */
@@ -809,7 +821,9 @@ Directions16_print (Direction16_T **directions_nogap, Direction16_T **directions
   int i, j;
   char g, g_alt;
 
+#ifdef HAVE_SSE2
   _mm_lfence();
+#endif
 
   /* j */
   printf("   ");		/* For i */
@@ -903,7 +917,9 @@ Directions16_print_ud (Direction16_T **directions_nogap, Direction16_T **directi
   int i, j;
   char g, g_alt;
 
+#ifdef HAVE_SSE2
   _mm_lfence();
+#endif
 
   /* j */
   printf("   ");		/* For i */
@@ -9086,11 +9102,11 @@ Dynprog_traceback_8_upper (List_T pairs, int *nmatches, int *nmismatches, int *n
     if ((dir = directions_nogap[c][r]) != DIAG) {
       /* Must be HORIZ */
       dist = 1;
-      /* Should not need to check for c > 0 if the main diagonal is populated with DIAG */
-      while (/* c > 0 && */ directions_Egap[c--][r] != DIAG) {
+      /* Should not need to check for c > r if the Egap diagonal above the main is populated with DIAG */
+      while (/* c > r && */ directions_Egap[c--][r] != DIAG) {
 	dist++;
       }
-      /* assert(c != 0); */
+      assert(c >= r);
 
       debug(printf("H%d: ",dist));
       pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,/*genomesequence*/NULL,
@@ -9197,11 +9213,11 @@ Dynprog_traceback_8_lower (List_T pairs, int *nmatches, int *nmismatches, int *n
     if ((dir = directions_nogap[r][c]) != DIAG) {
       /* Must be VERT */
       dist = 1;
-      /* Should not need to check for r > 0 if the main diagonal is populated with DIAG */
-      while (/* r > 0 && */ directions_Egap[r--][c] != DIAG) {
+      /* Should not need to check for r > c if the Egap diagonal below the main is populated with DIAG */
+      while (/* r > c && */ directions_Egap[r--][c] != DIAG) {
 	dist++;
       }
-      /* assert(r != 0); */
+      assert(r >= c);
 
       debug(printf("V%d: ",dist));
       pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence,
@@ -9236,8 +9252,8 @@ Dynprog_traceback_8_lower (List_T pairs, int *nmatches, int *nmismatches, int *n
 
       if (c2 == '*') {
 	/* Don't push pairs past end of chromosome */
-	debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n",
-		     genomeoffset,genomecoord,chroffset,chrhigh,watsonp));
+	debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u\n",
+		     genomeoffset,genomecoord));
 	
       } else if (/*querysequenceuc[querycoord]*/c1_uc == c2 || c1_uc == c2_alt) {
 	debug(printf("Pushing %d,%d [%d,%d] (%c,%c) - match\n",
@@ -9461,11 +9477,11 @@ Dynprog_traceback_16_upper (List_T pairs, int *nmatches, int *nmismatches, int *
     if ((dir = directions_nogap[c][r]) != DIAG) {
       /* Must be HORIZ */
       dist = 1;
-      /* Should not need to check for c > 0 if the main diagonal is populated with DIAG */
-      while (/* c > 0 && */ directions_Egap[c--][r] != DIAG) {
+      /* Should not need to check for c > r if the Egap diagonal above the main is populated with DIAG */
+      while (/* c > r && */ directions_Egap[c--][r] != DIAG) {
 	dist++;
       }
-      /* assert(c != 0); */
+      assert(c >= r);
 
       debug(printf("H%d: ",dist));
       pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,/*genomesequence*/NULL,
@@ -9572,11 +9588,11 @@ Dynprog_traceback_16_lower (List_T pairs, int *nmatches, int *nmismatches, int *
     if ((dir = directions_nogap[r][c]) != DIAG) {
       /* Must be VERT */
       dist = 1;
-      /* Should not need to check for r > 0 if the main diagonal is populated with DIAG */
-      while (/* r > 0 && */ directions_Egap[r--][c] != DIAG) {
+      /* Should not need to check for r > c if the Egap diagonal below the main is populated with DIAG */
+      while (/* r > c && */ directions_Egap[r--][c] != DIAG) {
 	dist++;
       }
-      /* assert(r != 0); */
+      assert(r >= c);
 
       debug(printf("V%d: ",dist));
       pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence,
@@ -9611,8 +9627,8 @@ Dynprog_traceback_16_lower (List_T pairs, int *nmatches, int *nmismatches, int *
 
       if (c2 == '*') {
 	/* Don't push pairs past end of chromosome */
-	debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n",
-		     genomeoffset,genomecoord,chroffset,chrhigh,watsonp));
+	debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u\n",
+		     genomeoffset,genomecoord));
 
       } else if (/*querysequenceuc[querycoord]*/c1_uc == c2 || c1_uc == c2_alt) {
 	debug(printf("Pushing %d,%d [%d,%d] (%c,%c) - match\n",
diff --git a/src/dynprog_simd.h b/src/dynprog_simd.h
index 8d2dd06..1e961a9 100644
--- a/src/dynprog_simd.h
+++ b/src/dynprog_simd.h
@@ -3,7 +3,11 @@
 
 #include "dynprog.h"
 
+#if 0
+/* Now determined by mismatchtype: highq 41, medq 63, lowq 127, endq 24 */
 #define SIMD_MAXLENGTH_EPI8 30  /* Previously had 40 = 128/3, but have seen 7-bit overflow empirically at matrices of size 30 */
+#endif
+
 
 /* Define DEBUG_SIMD and DEBUG_AVX2 in dynprog.h */
 
diff --git a/src/dynprog_single.c b/src/dynprog_single.c
index f06f844..fc7a0ad 100644
--- a/src/dynprog_single.c
+++ b/src/dynprog_single.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_single.c 184458 2016-02-18 00:06:33Z twu $";
+static char rcsid[] = "$Id: dynprog_single.c 203539 2017-02-15 00:46:36Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -500,7 +500,8 @@ Dynprog_single_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmis
 #endif
   assert(glength > 0);
 
-  if (rlength > dynprog->max_rlength || glength > dynprog->max_glength) {
+  if (rlength <= 0 || glength <= 0 ||
+      rlength > dynprog->max_rlength || glength > dynprog->max_glength) {
     debug(printf("rlength %d or glength %d is too long.  Returning NULL\n",rlength,glength));
     *finalscore = NEG_INFINITY_32;
     *nmatches = *nmismatches = *nopens = *nindels = 0;
@@ -586,7 +587,7 @@ Dynprog_single_gap (int *dynprogindex, int *finalscore, int *nmatches, int *nmis
 #if defined(HAVE_SSE2)
   /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */
   /* Use && because we don't want to overflow in either direction */
-  if (rlength <= SIMD_MAXLENGTH_EPI8 && glength <= SIMD_MAXLENGTH_EPI8) {
+  if (rlength < use8p_size[mismatchtype] && glength < use8p_size[mismatchtype]) {
     matrix8 = Dynprog_simd_8(&directions8_nogap,&directions8_Egap,&directions8_Fgap,dynprog,
 			     rsequence,gsequence,gsequence_alt,rlength,glength,
 #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD)
diff --git a/src/genome.c b/src/genome.c
index 8695454..ebf9e56 100644
--- a/src/genome.c
+++ b/src/genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome.c 196872 2016-08-24 22:41:33Z twu $";
+static char rcsid[] = "$Id: genome.c 207383 2017-06-15 20:58:03Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -10695,7 +10695,7 @@ Genome_fill_buffer_simple (T this, Univcoord_T left, Chrpos_T length, char *gbuf
 
   /* Fix out of bounds resulting from negative numbers */
   if (left + length < left) {
-    fprintf(stderr,"left %llu + length %u < left %llu\n",(unsigned long long) left,length,(unsigned long long) left);
+    /* fprintf(stderr,"left %llu + length %u < left %llu\n",(unsigned long long) left,length,(unsigned long long) left); */
     delta = -left;
     length -= delta;
     for (i = 0; i < delta; i++) {
@@ -10815,7 +10815,7 @@ Genome_fill_buffer_simple_alt (T genome, T genomealt, Univcoord_T left, Chrpos_T
 
   /* Fix out of bounds resulting from negative numbers */
   if (left + length < left) {
-    fprintf(stderr,"left %llu + length %u < left %llu\n",(unsigned long long) left,length,(unsigned long long) left);
+    /* fprintf(stderr,"left %llu + length %u < left %llu\n",(unsigned long long) left,length,(unsigned long long) left); */
     delta = -left;
     length -= delta;
     for (i = 0; i < delta; i++) {
@@ -11206,7 +11206,7 @@ Genome_get_segment (T this, Univcoord_T left, Chrpos_T length, Univ_IIT_T chromo
   int nunknowns;
   char *gbuffer;
   
-  gbuffer = (char *) CALLOC(length+1,sizeof(char));
+  gbuffer = (char *) MALLOC_IN((length+1)*sizeof(char));
 
   fill_buffer(&chrnum,&nunknowns,this,left,length,gbuffer,chromosome_iit,
 	      /*bitbybitp*/false,DEFAULT_CHARS,DEFAULT_FLAGS);
@@ -11233,7 +11233,7 @@ Genome_get_segment_alt (T this, Univcoord_T left, Chrpos_T length, Univ_IIT_T ch
   int nunknowns;
   char *gbuffer;
   
-  gbuffer = (char *) CALLOC(length+1,sizeof(char));
+  gbuffer = (char *) MALLOC_IN((length+1)*sizeof(char));
   
   fill_buffer(&chrnum,&nunknowns,this,left,length,gbuffer,chromosome_iit,
 	      /*bitbybitp*/true,DEFAULT_CHARS,SNP_FLAGS);
@@ -11258,7 +11258,7 @@ Genome_get_segment_snp (T this, Univcoord_T left, Chrpos_T length, Univ_IIT_T ch
   int nunknowns;
   char *gbuffer;
   
-  gbuffer = (char *) CALLOC(length+1,sizeof(char));
+  gbuffer = (char *) MALLOC_IN((length+1)*sizeof(char));
 
   fill_buffer(&chrnum,&nunknowns,this,left,length,gbuffer,chromosome_iit,
 	      /*bitbybitp*/true,SNP_CHARS,SNP_FLAGS);
diff --git a/src/genome128_hr.c b/src/genome128_hr.c
index 73bc84e..ca3c7b6 100644
--- a/src/genome128_hr.c
+++ b/src/genome128_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: genome128_hr.c 184459 2016-02-18 00:06:56Z twu $";
+static char rcsid[] = "$Id: genome128_hr.c 203519 2017-02-14 18:34:42Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -30,6 +30,44 @@ static char rcsid[] = "$Id: genome128_hr.c 184459 2016-02-18 00:06:56Z twu $";
 #include "littleendian.h"
 #endif
 
+
+/* Consecutive_matches_rightward and leftward */
+/* Slower with shift and wrap, perhaps because we need to extract integers from the SIMD object */
+/* #define USE_SHIFT_FIRST_MISMATCH 1 */
+/* #define USE_WRAP_FIRST_MISMATCH 1 */
+
+/* Genome_mismatches_right and left */
+/* Slower with shift and wrap, probably because we need to loop over the SIMD object */
+/* #define USE_SHIFT_MISMATCH_POSITIONS 1 */
+/* #define USE_WRAP_MISMATCH_POSITIONS 1 */
+
+/* Genome_count_mismatches_substring */
+/* Faster with shift and wrap.  Does not involve any loops. */
+#define USE_SHIFT_POPCOUNT 1
+#define USE_WRAP_POPCOUNT 1
+
+/* Genome_mismatches_right_trim and left_trim */
+/* Slower with shift and wrap */
+/* #define USE_SHIFT_TRIM 1 */
+/* #define USE_WRAP_TRIM 1 */
+
+
+/* Faster to use a straight shift, and _mm_bsrli_si128 is not defined in gcc 4.7 */
+/* #define USE_SHIFT_HILO 1 */
+
+
+#ifdef HAVE_SSE2
+#define QUERY_NEXTCOL 1		/* high0, high1, high2, high3 */
+#define QUERY_NEXTROW 8
+#else
+#define QUERY_NEXTCOL 3		/* high, low, flags */
+/* #define QUERY_NEXTROW 0 */
+#endif
+
+#define GENOME_NEXTCOL 1
+#define GENOME_NEXTROW 8
+
+
 #ifdef WORDS_BIGENDIAN
 /* Do not use SIMD */
 #elif defined(HAVE_SSE2)
@@ -38,6 +76,13 @@ static char rcsid[] = "$Id: genome128_hr.c 184459 2016-02-18 00:06:56Z twu $";
 #ifdef HAVE_SSE4_1
 #include <smmintrin.h>
 #endif
+#ifdef HAVE_AVX2
+#include <immintrin.h>
+#endif
+#ifdef HAVE_AVX512
+#include <immintrin.h>
+#endif
+
 #if !defined(HAVE_SSE4_2)
 /* Skip popcnt, which comes after SSE4.2 */
 #elif defined(HAVE_POPCNT)
@@ -16512,7 +16557,8 @@ print_vector_dec (__m128i x) {
 	 _mm_extract_epi32(x,0),_mm_extract_epi32(x,1),_mm_extract_epi32(x,2),_mm_extract_epi32(x,3));
   return;
 }
-#else
+
+#elif defined(HAVE_SSE2)
 static void
 print_vector_hex (__m128i x) {
   printf("%08X %08X %08X %08X\n",
@@ -16533,6 +16579,49 @@ print_vector_dec (__m128i x) {
   return;
 }
 #endif
+
+#ifdef HAVE_AVX2
+static void
+print_vector_256_hex (__m256i x) {
+  printf("%08X %08X %08X %08X %08X %08X %08X %08X\n",
+	 _mm256_extract_epi32(x,0),_mm256_extract_epi32(x,1),_mm256_extract_epi32(x,2),_mm256_extract_epi32(x,3),
+	 _mm256_extract_epi32(x,4),_mm256_extract_epi32(x,5),_mm256_extract_epi32(x,6),_mm256_extract_epi32(x,7));
+  return;
+}
+
+static void
+print_vector_256_dec (__m256i x) {
+  printf("%u %u %u %u %u %u %u %u\n",
+	 _mm256_extract_epi32(x,0),_mm256_extract_epi32(x,1),_mm256_extract_epi32(x,2),_mm256_extract_epi32(x,3),
+	 _mm256_extract_epi32(x,4),_mm256_extract_epi32(x,5),_mm256_extract_epi32(x,6),_mm256_extract_epi32(x,7));
+  return;
+}
+#endif
+
+#ifdef HAVE_AVX512
+static void
+print_vector_512_hex (__m512i x) {
+  unsigned int array[16];
+
+  _mm512_store_si512((__m512i *) array,x);
+  printf("%08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	 array[0],array[1],array[2],array[3],array[4],array[5],array[6],array[7],
+	 array[8],array[9],array[10],array[11],array[12],array[13],array[14],array[15]);
+  return;
+}
+
+static void
+print_vector_512_dec (__m512i x) {
+  unsigned int array[16];
+
+  _mm512_store_si512((__m512i *) array,x);
+  printf("%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n",
+	 array[0],array[1],array[2],array[3],array[4],array[5],array[6],array[7],
+	 array[8],array[9],array[10],array[11],array[12],array[13],array[14],array[15]);
+  return;
+}
+#endif
+
 #endif
 
 
@@ -16636,14 +16725,14 @@ Genome_print_blocks (Genomecomp_T *blocks, Univcoord_T startpos, Univcoord_T end
     write_chars(high,low,flags);
     printf("\n");
 
-    ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+    ptr += GENOME_NEXTCOL; if (++startcolumni == 4) {ptr += GENOME_NEXTROW; startcolumni = 0;}
 #elif !defined(HAVE_SSE2)
     high = ptr[0]; low = ptr[4]; flags = ptr[8];
     printf("high: %08X  low: %08X  flags: %08X\t",high,low,flags);
     write_chars(high,low,flags);
     printf("\n");
 
-    ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+    ptr += GENOME_NEXTCOL; if (++startcolumni == 4) {ptr += GENOME_NEXTROW; startcolumni = 0;}
 
 #else
     if (startcolumni == 0) {
@@ -16808,7 +16897,7 @@ Genome_print_blocks_snp (Genomecomp_T *blocks, Genomecomp_T *snp_blocks, Univcoo
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
     high = ref_ptr[0]; low = ref_ptr[4]; flags = ref_ptr[8]; snpmask = snp_ptr[8];
     printf("high: %08X  low: %08X  flags: %08X  snpmask: %08X\n",high,low,flags,snpmask);
-    ref_ptr += 1; snp_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; snp_ptr += 8; startcolumni = 0;}
+    ref_ptr += GENOME_NEXTCOL; snp_ptr += GENOME_NEXTCOL; if (++startcolumni == 4) {ref_ptr += GENOME_NEXTROW; snp_ptr += GENOME_NEXTROW; startcolumni = 0;}
 #else
     high = ref_ptr[0]; low = ref_ptr[4]; flags = ref_ptr[8]; snpmask = snp_ptr[8];
     printf("high: %08X  low: %08X  flags: %08X  snpmask: %08X\n",high,low,flags,snpmask);
@@ -16834,6 +16923,201 @@ Genome_print_blocks_snp (Genomecomp_T *blocks, Genomecomp_T *snp_blocks, Univcoo
 static Genomecomp_T *ref_blocks;
 static Genomecomp_T *snp_blocks;
 
+#if defined(USE_SHIFT_HILO) && defined(HAVE_SSE2)
+static inline void
+read_128_shift_lo (__m128i *__restrict__ high, __m128i *__restrict__ low, __m128i *__restrict__ flags, UINT4 *__restrict__ ptr,
+		    int startcolumni) {
+  __m128i a, b, c;
+  
+  ptr -= startcolumni;
+  a = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  b = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  c = _mm_load_si128((__m128i *) ptr); ptr += 4;
+
+  switch (startcolumni) {
+  case 0:
+    *high = _mm_bsrli_si128(a, 0);
+    *low = _mm_bsrli_si128(b, 0);
+    *flags = _mm_bsrli_si128(c, 0);
+    break;
+  case 1:
+    *high = _mm_bsrli_si128(a, 4);
+    *low = _mm_bsrli_si128(b, 4);
+    *flags = _mm_bsrli_si128(c, 4);
+    break;
+  case 2:
+    *high = _mm_bsrli_si128(a, 8);
+    *low = _mm_bsrli_si128(b, 8);
+    *flags = _mm_bsrli_si128(c, 8);
+    break;
+  default:
+    *high = _mm_bsrli_si128(a, 12);
+    *low = _mm_bsrli_si128(b, 12);
+    *flags = _mm_bsrli_si128(c, 12);
+    break;
+  }
+
+  return;
+}
+
+static inline void
+read_128_shift_hi (__m128i *__restrict__ high, __m128i *__restrict__ low, __m128i *__restrict__ flags, UINT4 *__restrict__ ptr,
+		   int endcolumni) {
+  __m128i a, b, c;
+  
+  ptr -= endcolumni;
+  a = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  b = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  c = _mm_load_si128((__m128i *) ptr); ptr += 4;
+
+  switch (endcolumni) {
+  case 0:
+    *high = _mm_bslli_si128(a, 12);
+    *low = _mm_bslli_si128(b, 12);
+    *flags = _mm_bslli_si128(c, 12);
+    break;
+  case 1:
+    *high = _mm_bslli_si128(a, 8);
+    *low = _mm_bslli_si128(b, 8);
+    *flags = _mm_bslli_si128(c, 8);
+    break;
+  case 2:
+    *high = _mm_bslli_si128(a, 4);
+    *low = _mm_bslli_si128(b, 4);
+    *flags = _mm_bslli_si128(c, 4);
+    break;
+  default:
+    *high = _mm_bslli_si128(a, 0);
+    *low = _mm_bslli_si128(b, 0);
+    *flags = _mm_bslli_si128(c, 0);
+    break;
+  }
+
+  return;
+}
+#endif
+
+
+#ifdef HAVE_SSSE3
+static inline void
+read_128_wrap_lo (__m128i *__restrict__ high, __m128i *__restrict__ low, __m128i *__restrict__ flags, UINT4 *__restrict__ ptr,
+		  int startcolumni) {
+  __m128i a, b, c, d, e, f;
+  
+  ptr -= startcolumni;
+  a = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  b = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  c = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  d = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  e = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  f = _mm_load_si128((__m128i *) ptr);
+
+  switch (startcolumni) {
+  case 0:
+    *high = _mm_alignr_epi8(d, a, 0);
+    *low = _mm_alignr_epi8(e, b, 0);
+    *flags = _mm_alignr_epi8(f, c, 0);
+    break;
+  case 1:
+    *high = _mm_alignr_epi8(d, a, 4);
+    *low = _mm_alignr_epi8(e, b, 4);
+    *flags = _mm_alignr_epi8(f, c, 4);
+    break;
+  case 2:
+    *high = _mm_alignr_epi8(d, a, 8);
+    *low = _mm_alignr_epi8(e, b, 8);
+    *flags = _mm_alignr_epi8(f, c, 8);
+    break;
+  default:
+    *high = _mm_alignr_epi8(d, a, 12);
+    *low = _mm_alignr_epi8(e, b, 12);
+    *flags = _mm_alignr_epi8(f, c, 12);
+    break;
+  }
+
+  return;
+}
+
+static inline void
+read_128_wrap_hi (__m128i *__restrict__ high, __m128i *__restrict__ low, __m128i *__restrict__ flags, UINT4 *__restrict__ ptr,
+		  int endcolumni) {
+  __m128i a, b, c, d, e, f;
+  
+  ptr -= endcolumni;
+  ptr -= 12;
+  a = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  b = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  c = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  d = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  e = _mm_load_si128((__m128i *) ptr); ptr += 4;
+  f = _mm_load_si128((__m128i *) ptr);
+    
+  switch (endcolumni) {
+  case 0:
+    *high = _mm_alignr_epi8(d, a, 4);
+    *low = _mm_alignr_epi8(e, b, 4);
+    *flags = _mm_alignr_epi8(f, c, 4);
+    break;
+  case 1:
+    *high = _mm_alignr_epi8(d, a, 8);
+    *low = _mm_alignr_epi8(e, b, 8);
+    *flags = _mm_alignr_epi8(f, c, 8);
+    break;
+  case 2:
+    *high = _mm_alignr_epi8(d, a, 12);
+    *low = _mm_alignr_epi8(e, b, 12);
+    *flags = _mm_alignr_epi8(f, c, 12);
+    break;
+  default:
+    *high = _mm_alignr_epi8(d, a, 16);
+    *low = _mm_alignr_epi8(e, b, 16);
+    *flags = _mm_alignr_epi8(f, c, 16);
+    break;
+  }
+
+  return;
+}
+#endif
+
+
+#ifdef HAVE_AVX2
+static inline void
+read_256 (__m256i *__restrict__ high, __m256i *__restrict__ low, __m256i *__restrict__ flags, UINT4 *__restrict__ ptr) {
+  __m256i a, b, c;
+  a = _mm256_loadu_si256((__m256i *) ptr); /* query0_high, query0_low */
+  b = _mm256_loadu_si256((__m256i *) &(ptr[8])); /* query0_flags, query1_high */
+  c = _mm256_loadu_si256((__m256i *) &(ptr[16])); /* query1_low, query1_flags */
+
+  *high = _mm256_permute2x128_si256(a, b, 0x30);
+  *low = _mm256_permute2x128_si256(a, c, 0x21);
+  *flags = _mm256_permute2x128_si256(b, c, 0x30);
+
+  return;
+}
+#endif
+
+#ifdef HAVE_AVX512
+static inline void
+read_512 (__m512i *__restrict__ high, __m512i *__restrict__ low, __m512i *__restrict__ flags, UINT4 *__restrict__ ptr) {
+  __m512i a, b, c, d, e, f;
+  a = _mm512_loadu_si512((__m512i *) ptr); /* query0_high, query0_low, query0_flags, query1_high */
+  b = _mm512_loadu_si512((__m512i *) &(ptr[16])); /* query1_low, query1_flags, query2_high, query2_low */
+  c = _mm512_loadu_si512((__m512i *) &(ptr[32])); /* query2_flags, query3_high, query3_low, query3_flags */
+
+  d = _mm512_permutex2var_epi32(a, _mm512_setr_epi32(0, 1, 2, 3, 12, 13, 14, 15,
+						     4, 5, 6, 7, 16+0, 16+1, 16+2, 16+3), b);
+  e = _mm512_permutex2var_epi32(b, _mm512_setr_epi32(8, 9, 10, 11, 16+4, 16+5, 16+6, 16+7,
+						     12, 13, 14, 15, 16+8, 16+9, 16+10, 16+11), c);
+  f = _mm512_permutex2var_epi32(a, _mm512_setr_epi32(8, 9, 10, 11, 16+4, 16+5, 16+6, 16+7,
+						     12, 13, 14, 15, 16+8, 16+9, 16+10, 16+11), b);
+
+  *high = _mm512_permutex2var_epi64(d, _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3), e);
+  *low = _mm512_permutex2var_epi64(d, _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7), e);
+  *flags = _mm512_permutex2var_epi64(f, _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+6, 8+7), c);
+
+  return;
+}
+#endif
 
 
 /* These are global values, used for alignment.  Previously for
@@ -16844,10 +17128,9 @@ static bool query_unk_mismatch_p = false;
 static bool genome_unk_mismatch_p = true;
 
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-typedef UINT4 Genomediff_T;
 #define STEP_SIZE 32
 #else
-typedef __m128i Genomediff_T;
+/* Holds for SSE2, AVX2, and AVX512 */
 #define STEP_SIZE 128
 #endif
 
@@ -16913,69 +17196,135 @@ block_diff_standard_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 }
 
 
-static Genomediff_T
-block_diff_standard (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-  UINT4 diff;
+#ifdef HAVE_SSE2
+static __m128i
+block_diff_standard_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-  debug(printf("Comparing high: query %08X with genome %08X ",query_shifted[0],ref_ptr[0]));
-  debug(printf("Comparing low: query %08X with genome %08X ",query_shifted[1],ref_ptr[4]));
+  _query_high = _mm_load_si128((__m128i *) query_shifted);
+  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
 
-#ifdef WORDS_BIGENDIAN
-  diff = (query_shifted[0] ^ Bigendian_convert_uint(ref_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
-  diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[1] ^ ref_ptr[4]);
-#endif
+  _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
 
-  /* Query Ns */
+  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
-    /* Query: Considering N as a mismatch */
-    diff |= query_shifted[2];
+    _diff = _mm_or_si128(_query_flags, _diff);
   } else {
-    /* Query: Considering N as a wildcard */
-    diff &= ~(query_shifted[2]);
+    _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
-  /* Genome Ns */
+  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
-    /* Genome: Considering N as a mismatch */
-#ifdef WORDS_BIGENDIAN
-    diff |= Bigendian_convert_uint(ref_ptr[8]);
-#else
-    diff |= ref_ptr[8];
-#endif
+    _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
-    /* Genome: Considering N as a wildcard */
-#ifdef WORDS_BIGENDIAN
-    diff &= ~(Bigendian_convert_uint(ref_ptr[8]));
-#else
-    diff &= ~(ref_ptr[8]);
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_standard_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				  int startcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+
+  _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+static __m128i
+block_diff_standard_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				  int endcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+
+  _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
 #endif
+#endif
+
+#ifdef HAVE_SSSE3
+static __m128i
+block_diff_standard_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				 bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				 int startcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+
+  _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
-  debug(printf(" => diff %08X\n",diff));
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
 
-  return diff;
+  return _diff;
+}
 
-#else
+static __m128i
+block_diff_standard_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				 bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				 int endcolumni) {
   __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-  _query_high = _mm_load_si128((__m128i *) query_shifted);
-  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
-  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
-  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+  read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
 
   _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
 
-  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
     _diff = _mm_or_si128(_query_flags, _diff);
   } else {
     _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
-  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
     _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
@@ -16983,9 +17332,62 @@ block_diff_standard (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   }
 
   return _diff;
+}
 #endif
+
+#ifdef HAVE_AVX2
+static __m256i
+block_diff_standard_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_256(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  _diff = _mm256_or_si256(_mm256_xor_si256(_query_high, _ref_high), _mm256_xor_si256(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm256_or_si256(_query_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm256_or_si256(_ref_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_ref_flags, _diff);
+  }
+
+  return _diff;
 }
+#endif
+
+#ifdef HAVE_AVX512
+static __m512i
+block_diff_standard_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_512(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  _diff = _mm512_or_si512(_mm512_xor_si512(_query_high, _ref_high), _mm512_xor_si512(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm512_or_si512(_query_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm512_or_si512(_ref_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_ref_flags, _diff);
+  }
 
+  return _diff;
+}
+#endif
 
 
 static UINT4
@@ -17082,101 +17484,190 @@ block_diff_standard_wildcard_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_
 /* not wildcard if ref != alt || ref_flag == 1 || alt_flag == 0 */
 /* diffs are (query ^ ref) & (query ^ alt) & ~wildcard */
 /* snp_ptr here is alt_ptr */
-static Genomediff_T
-block_diff_standard_wildcard (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
-			      bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef HAVE_SSE2
+static __m128i
+block_diff_standard_wildcard_128 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  __m128i _diff, _wildcard, _query_high, _query_low, _query_flags,
+    _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-  UINT4 diff, non_wildcard;
+  _query_high = _mm_load_si128((__m128i *) query_shifted);
+  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
 
-  /* Taken from block_diff_standard */
-#ifdef WORDS_BIGENDIAN
-  diff = (query_shifted[0] ^ Bigendian_convert_uint(ref_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
-  diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[1] ^ ref_ptr[4]);
-#endif
+  _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
 
-  /* Query Ns */
+  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
-    /* Query: Considering N as a mismatch */
-    diff |= query_shifted[2];
+    _diff = _mm_or_si128(_query_flags, _diff);
   } else {
-    /* Query: Considering N as a wildcard */
-    diff &= ~(query_shifted[2]);
+    _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
-  /* Genome Ns */
+  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
-    /* Genome: Considering N as a mismatch */
-#ifdef WORDS_BIGENDIAN
-    diff |= Bigendian_convert_uint(ref_ptr[8]);
-#else
-    diff |= ref_ptr[8];
-#endif
+    _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
-    /* Genome: Considering N as a wildcard */
-#ifdef WORDS_BIGENDIAN
-    diff &= ~(Bigendian_convert_uint(ref_ptr[8]));
-#else
-    diff &= ~(ref_ptr[8]);
-#endif
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
   }
+  /* End of (query ^ ref) */
+
+
+  /* Add (query ^ snp).  Don't need to recompute query flags or use SNP flags. */
+  _snp_high = _mm_load_si128((__m128i *) snp_ptr);
+  _snp_low = _mm_load_si128((__m128i *) &(snp_ptr[4]));
+
+  _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low)));
 
-  /* Add difference relative to SNP */
-#ifdef WORDS_BIGENDIAN
-  diff &= (query_shifted[0] ^ Bigendian_convert_uint(snp_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(snp_ptr[4]));
-#else
-  diff &= (query_shifted[0] ^ snp_ptr[0]) | (query_shifted[1] ^ snp_ptr[4]);
-#endif
 
   /* Test for equality of ref and alt */
-  debug(printf("Equality high: ref genome %08X with alt genome %08X ",ref_ptr[0],snp_ptr[0]));
-#ifdef WORDS_BIGENDIAN
-  non_wildcard = (Bigendian_convert_uint(ref_ptr[0]) ^ Bigendian_convert_uint(snp_ptr[0])) |
-    (Bigendian_convert_uint(ref_ptr[4]) ^ Bigendian_convert_uint(snp_ptr[4]));
-#else
-  non_wildcard = (ref_ptr[0] ^ snp_ptr[0]) | (ref_ptr[4] ^ snp_ptr[4]);
+  _snp_flags = _mm_load_si128((__m128i *) &(snp_ptr[8]));
+  _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags);
+  _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard);
+
+  _diff = _mm_andnot_si128(_wildcard, _diff);
+
+  return _diff;
+}
+
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_standard_wildcard_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+					   bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+					   int startcolumni) {
+  __m128i _diff, _wildcard, _query_high, _query_low, _query_flags,
+    _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
+
+  read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+  read_128_shift_lo(&_snp_high,&_snp_low,&_snp_flags,snp_ptr,startcolumni);
+
+  _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+  /* End of (query ^ ref) */
+
+  /* Add (query ^ snp).  Don't need to recompute query flags or use SNP flags. */
+  _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low)));
+
+  /* Test for equality of ref and alt */
+  _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags);
+  _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard);
+
+  _diff = _mm_andnot_si128(_wildcard, _diff);
+
+  return _diff;
+}
+
+static __m128i
+block_diff_standard_wildcard_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+					   bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+					   int endcolumni) {
+  __m128i _diff, _wildcard, _query_high, _query_low, _query_flags,
+    _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
+
+  read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+  read_128_shift_hi(&_snp_high,&_snp_low,&_snp_flags,snp_ptr,endcolumni);
+
+  _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+  /* End of (query ^ ref) */
+
+  /* Add (query ^ snp).  Don't need to recompute query flags or use SNP flags. */
+  _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low)));
+
+  /* Test for equality of ref and alt */
+  _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags);
+  _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard);
+
+  _diff = _mm_andnot_si128(_wildcard, _diff);
+
+  return _diff;
+}
 #endif
-  debug(printf(" => diff %08X\n",non_wildcard));
-  
-  /* Ref flags */
-  debug(printf("Wildcard add ref flags: ref genome %08X and alt genome %08X ",ref_ptr[8],snp_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-  non_wildcard |= Bigendian_convert_uint(ref_ptr[8]);
-#else
-  non_wildcard |= ref_ptr[8];
 #endif
 
-  /* Alt flags */
-  debug(printf("Wildcard add alt flags: ref genome %08X and alt genome %08X ",ref_ptr[8],snp_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-  non_wildcard |= ~(Bigendian_convert_uint(snp_ptr[8]));
-#else
-  non_wildcard |= ~(snp_ptr[8]);
-#endif
-  debug(printf(" => non_wildcard %08X\n",non_wildcard));
+#ifdef HAVE_SSSE3
+static __m128i
+block_diff_standard_wildcard_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+					  bool plusp, int genestrand, bool query_unk_mismatch_local_p, int startcolumni) {
+  __m128i _diff, _wildcard, _query_high, _query_low, _query_flags,
+    _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
 
-  return diff & non_wildcard;
+  read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+  read_128_wrap_lo(&_snp_high,&_snp_low,&_snp_flags,snp_ptr,startcolumni);
 
-#else
+  _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+  /* End of (query ^ ref) */
+
+
+  /* Add (query ^ snp).  Don't need to recompute query flags or use SNP flags. */
+  _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low)));
+
+
+  /* Test for equality of ref and alt */
+  _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags);
+  _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard);
+
+  _diff = _mm_andnot_si128(_wildcard, _diff);
+
+  return _diff;
+}
+
+static __m128i
+block_diff_standard_wildcard_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+					  bool plusp, int genestrand, bool query_unk_mismatch_local_p, int endcolumni) {
   __m128i _diff, _wildcard, _query_high, _query_low, _query_flags,
     _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
 
-  _query_high = _mm_load_si128((__m128i *) query_shifted);
-  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
-  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
-  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+  read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+  read_128_wrap_hi(&_snp_high,&_snp_low,&_snp_flags,snp_ptr,endcolumni);
 
   _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low));
 
-  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
     _diff = _mm_or_si128(_query_flags, _diff);
   } else {
     _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
-  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
     _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
@@ -17186,23 +17677,106 @@ block_diff_standard_wildcard (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr
 
 
   /* Add (query ^ snp).  Don't need to recompute query flags or use SNP flags. */
-  _snp_high = _mm_load_si128((__m128i *) snp_ptr);
-  _snp_low = _mm_load_si128((__m128i *) &(snp_ptr[4]));
-
   _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low)));
 
 
   /* Test for equality of ref and alt */
-  _snp_flags = _mm_load_si128((__m128i *) &(snp_ptr[8]));
   _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags);
   _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard);
 
   _diff = _mm_andnot_si128(_wildcard, _diff);
 
   return _diff;
+}
 #endif
+
+/* wildcard if ref == alt && ref_flag == 0 && alt_flag == 1 */
+/* not wildcard if ref != alt || ref_flag == 1 || alt_flag == 0 */
+/* diffs are (query ^ ref) & (query ^ alt) & ~wildcard */
+/* snp_ptr here is alt_ptr */
+#ifdef HAVE_AVX2
+static __m256i
+block_diff_standard_wildcard_256 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  __m256i _diff, _wildcard, _query_high, _query_low, _query_flags,
+    _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
+
+  read_256(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  _diff = _mm256_or_si256(_mm256_xor_si256(_query_high, _ref_high), _mm256_xor_si256(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm256_or_si256(_query_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm256_or_si256(_ref_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_ref_flags, _diff);
+  }
+  /* End of (query ^ ref) */
+
+  /* Add (query ^ snp).  Don't need to recompute query flags or use SNP flags. */
+  read_256(&_snp_high,&_snp_low,&_snp_flags,snp_ptr);
+
+  _diff = _mm256_and_si256(_diff, _mm256_or_si256(_mm256_xor_si256(_query_high, _snp_high), _mm256_xor_si256(_query_low, _snp_low)));
+
+  /* Test for equality of ref and alt */
+  _wildcard = _mm256_andnot_si256(_ref_flags, _snp_flags);
+  _wildcard = _mm256_andnot_si256(_mm256_or_si256(_mm256_xor_si256(_ref_high, _snp_high), _mm256_xor_si256(_ref_low, _snp_low)), _wildcard);
+
+  _diff = _mm256_andnot_si256(_wildcard, _diff);
+
+  return _diff;
 }
+#endif
+
+/* wildcard if ref == alt && ref_flag == 0 && alt_flag == 1 */
+/* not wildcard if ref != alt || ref_flag == 1 || alt_flag == 0 */
+/* diffs are (query ^ ref) & (query ^ alt) & ~wildcard */
+/* snp_ptr here is alt_ptr */
+#ifdef HAVE_AVX512
+static __m512i
+block_diff_standard_wildcard_512 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  __m512i _diff, _wildcard, _query_high, _query_low, _query_flags,
+    _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags;
+
+  read_512(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  _diff = _mm512_or_si512(_mm512_xor_si512(_query_high, _ref_high), _mm512_xor_si512(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm512_or_si512(_query_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm512_or_si512(_ref_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_ref_flags, _diff);
+  }
+  /* End of (query ^ ref) */
+
+  /* Add (query ^ snp).  Don't need to recompute query flags or use SNP flags. */
+  read_512(&_snp_high,&_snp_low,&_snp_flags,snp_ptr);
+
+  _diff = _mm512_and_si512(_diff, _mm512_or_si512(_mm512_xor_si512(_query_high, _snp_high), _mm512_xor_si512(_query_low, _snp_low)));
+
+  /* Test for equality of ref and alt */
+  _wildcard = _mm512_andnot_si512(_ref_flags, _snp_flags);
+  _wildcard = _mm512_andnot_si512(_mm512_or_si512(_mm512_xor_si512(_ref_high, _snp_high), _mm512_xor_si512(_ref_low, _snp_low)), _wildcard);
 
+  _diff = _mm512_andnot_si512(_wildcard, _diff);
+
+  return _diff;
+}
+#endif
 
 
 /************************************************************************
@@ -17282,74 +17856,168 @@ block_diff_metct_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 }
 
 
+#ifdef HAVE_SSE2
 /* Convert C to T: high/low (A) 0 0 => new high 0; (C) 0 1 => 1; (G) 1 0 => 1; (T) 1 0 => 1 */
 /* new high = high | low */
-static Genomediff_T
-block_diff_metct (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		  bool query_unk_mismatch_local_p, bool sarrayp) {
+static __m128i
+block_diff_metct_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-  UINT4 diff;
+  _query_high = _mm_load_si128((__m128i *) query_shifted);
+  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
 
   if (sarrayp == true) {
-    /* Convert everything to 3-nucleotide space */
-    diff = 0U;
+    /* Ignore genome-T to query-C mismatches.  Convert everything to 3-nucleotide space */
+    _diff = _mm_setzero_si128();
   } else {
     /* Mark genome-T to query-C mismatches */
-#ifdef WORDS_BIGENDIAN
-    diff = (~(query_shifted[0]) & query_shifted[1]) &
-      (Bigendian_convert_uint(ref_ptr[0]) & Bigendian_convert_uint(ref_ptr[4]));
-#else
-    diff = (~(query_shifted[0]) & query_shifted[1]) & (ref_ptr[0] & ref_ptr[4]);
-#endif
-    debug(printf(" => diff %08X\n",diff));
+    _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low));
   }
 
   /* Compare reduced C->T nts  */
-#ifdef WORDS_BIGENDIAN
-  diff |= ((query_shifted[0] | query_shifted[1]) ^ (Bigendian_convert_uint(ref_ptr[0]) | Bigendian_convert_uint(ref_ptr[4]))) |
-    (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
-  diff |= ((query_shifted[0] | query_shifted[1]) ^ (ref_ptr[0] | ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
-#endif
-  debug(printf(" => diff %08X\n",diff));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
 
+  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_metct_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			       bool query_unk_mismatch_local_p, bool sarrayp, int startcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-T to query-C mismatches.  Convert everything to 3-nucleotide space */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-T to query-C mismatches */
+    _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced C->T nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
 
-  /* Flags: Considering N as a mismatch */
   if (query_unk_mismatch_local_p) {
-    debug(printf("Marking query flags: query %08X ",query_shifted[2]));
-    diff |= query_shifted[2];
+    _diff = _mm_or_si128(_query_flags, _diff);
   } else {
-    debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
-    diff &= ~(query_shifted[2]);
+    _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
   if (genome_unk_mismatch_p) {
-    debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-    diff |= Bigendian_convert_uint(ref_ptr[8]);
-#else
-    diff |= (ref_ptr[8]);
-#endif
+    _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
-    debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-    diff &= ~(Bigendian_convert_uint(ref_ptr[8]));
-#else
-    diff &= ~(ref_ptr[8]);
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+static __m128i
+block_diff_metct_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			       bool query_unk_mismatch_local_p, bool sarrayp, int endcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-T to query-C mismatches.  Convert everything to 3-nucleotide space */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-T to query-C mismatches */
+    _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced C->T nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
 #endif
+#endif
+
+#ifdef HAVE_SSSE3
+/* Convert C to T: high/low (A) 0 0 => new high 0; (C) 0 1 => 1; (G) 1 0 => 1; (T) 1 0 => 1 */
+/* new high = high | low */
+static __m128i
+block_diff_metct_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool query_unk_mismatch_local_p, bool sarrayp,
+			      int startcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-T to query-C mismatches.  Convert everything to 3-nucleotide space */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-T to query-C mismatches */
+    _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low));
   }
-  debug(printf(" => diff %08X\n",diff));
 
-  return diff;
+  /* Compare reduced C->T nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
 
-#else
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+static __m128i
+block_diff_metct_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool query_unk_mismatch_local_p, bool sarrayp,
+			      int endcolumni) {
   __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-  _query_high = _mm_load_si128((__m128i *) query_shifted);
-  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
-  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
-  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+  read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
 
   if (sarrayp == true) {
     /* Ignore genome-T to query-C mismatches.  Convert everything to 3-nucleotide space */
@@ -17363,14 +18031,12 @@ block_diff_metct (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low)));
   _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
 
-  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
     _diff = _mm_or_si128(_query_flags, _diff);
   } else {
     _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
-  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
     _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
@@ -17378,8 +18044,87 @@ block_diff_metct (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   }
 
   return _diff;
+}
+#endif
+
+
+#ifdef HAVE_AVX2
+/* Convert C to T: high/low (A) 0 0 => new high 0; (C) 0 1 => 1; (G) 1 0 => 1; (T) 1 0 => 1 */
+/* new high = high | low */
+static __m256i
+block_diff_metct_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_256(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  if (sarrayp == true) {
+    /* Ignore genome-T to query-C mismatches.  Convert everything to 3-nucleotide space */
+    _diff = _mm256_setzero_si256();
+  } else {
+    /* Mark genome-T to query-C mismatches */
+    _diff = _mm256_and_si256(_mm256_andnot_si256(_query_high, _query_low), _mm256_and_si256(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced C->T nts  */
+  _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_mm256_or_si256(_query_high, _query_low), _mm256_or_si256(_ref_high, _ref_low)));
+  _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm256_or_si256(_query_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm256_or_si256(_ref_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
 #endif
+
+#ifdef HAVE_AVX512
+/* Convert C to T: high/low (A) 0 0 => new high 0; (C) 0 1 => 1; (G) 1 0 => 1; (T) 1 0 => 1 */
+/* new high = high | low */
+static __m512i
+block_diff_metct_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_512(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  if (sarrayp == true) {
+    /* Ignore genome-T to query-C mismatches.  Convert everything to 3-nucleotide space */
+    _diff = _mm512_setzero_si512();
+  } else {
+    /* Mark genome-T to query-C mismatches */
+    _diff = _mm512_and_si512(_mm512_andnot_si512(_query_high, _query_low), _mm512_and_si512(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced C->T nts  */
+  _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_mm512_or_si512(_query_high, _query_low), _mm512_or_si512(_ref_high, _ref_low)));
+  _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm512_or_si512(_query_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm512_or_si512(_ref_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_ref_flags, _diff);
+  }
+
+  return _diff;
 }
+#endif
 
 
 static UINT4
@@ -17455,74 +18200,58 @@ block_diff_metga_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 }
 
 
+#ifdef HAVE_SSE2
 /* Convert G to A: high/low (A) 0 0 => new high 0; (C) 0 1 => 0; (G) 1 0 => 0; (T) 1 0 => 1 */
 /* new high = high & low */
-static Genomediff_T
-block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		  bool query_unk_mismatch_local_p, bool sarrayp) {
+static __m128i
+block_diff_metga_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-  UINT4 diff;
+  _query_high = _mm_load_si128((__m128i *) query_shifted);
+  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
 
   if (sarrayp == true) {
     /* Ignore genome-A to query-G mismatches.  Convert everything to 3-nucleotide space. */
-    diff = 0U;
+    _diff = _mm_setzero_si128();
   } else {
     /* Mark genome-A to query-G mismatches */
-#ifdef WORDS_BIGENDIAN
-    diff = (query_shifted[0] & ~(query_shifted[1])) &
-      ~(Bigendian_convert_uint(ref_ptr[0]) | Bigendian_convert_uint(ref_ptr[4]));
-#else
-    diff = (query_shifted[0] & ~(query_shifted[1])) & ~(ref_ptr[0] | ref_ptr[4]);
-#endif
-    debug(printf(" => diff %08X\n",diff));
+    _diff = _mm_andnot_si128(_query_low, _query_high);
+    _diff = _mm_andnot_si128(_ref_high, _diff);
+    _diff = _mm_andnot_si128(_ref_low, _diff);
   }
 
   /* Compare reduced G->A nts  */
-#ifdef WORDS_BIGENDIAN
-  diff |= ((query_shifted[0] & query_shifted[1]) ^ (Bigendian_convert_uint(ref_ptr[0]) & Bigendian_convert_uint(ref_ptr[4]))) |
-    (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
-  diff |= ((query_shifted[0] & query_shifted[1]) ^ (ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
-#endif
-  debug(printf(" => diff %08X\n",diff));
-
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
 
-  /* Flags: Considering N as a mismatch */
+  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
-    debug(printf("Marking query flags: query %08X ",query_shifted[2]));
-    diff |= query_shifted[2];
+    _diff = _mm_or_si128(_query_flags, _diff);
   } else {
-    debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
-    diff &= ~(query_shifted[2]);
+    _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
+  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
-    debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-    diff |= Bigendian_convert_uint(ref_ptr[8]);
-#else
-    diff |= (ref_ptr[8]);
-#endif
+    _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
-    debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-    diff &= ~(Bigendian_convert_uint(ref_ptr[8]));
-#else
-    diff &= ~(ref_ptr[8]);
-#endif
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
   }
-  debug(printf(" => diff %08X\n",diff));
 
-  return diff;
+  return _diff;
+}
 
-#else
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_metga_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			       bool query_unk_mismatch_local_p, bool sarrayp, int startcolumni) {
   __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-  _query_high = _mm_load_si128((__m128i *) query_shifted);
-  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
-  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
-  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+  read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
 
   if (sarrayp == true) {
     /* Ignore genome-A to query-G mismatches.  Convert everything to 3-nucleotide space. */
@@ -17538,14 +18267,12 @@ block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)));
   _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
 
-  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
     _diff = _mm_or_si128(_query_flags, _diff);
   } else {
     _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
-  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
     _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
@@ -17553,16 +18280,217 @@ block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   }
 
   return _diff;
-#endif
 }
 
-static UINT4
-block_diff_cmet_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
-  if (genestrand == +2) {
-    if (plusp) {
-      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
-    } else {
+static __m128i
+block_diff_metga_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			       bool query_unk_mismatch_local_p, bool sarrayp, int endcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-A to query-G mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-A to query-G mismatches */
+    _diff = _mm_andnot_si128(_query_low, _query_high);
+    _diff = _mm_andnot_si128(_ref_high, _diff);
+    _diff = _mm_andnot_si128(_ref_low, _diff);
+  }
+
+  /* Compare reduced G->A nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+#endif
+
+#ifdef HAVE_SSSE3
+/* Convert G to A: high/low (A) 0 0 => new high 0; (C) 0 1 => 0; (G) 1 0 => 0; (T) 1 0 => 1 */
+/* new high = high & low */
+static __m128i
+block_diff_metga_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool query_unk_mismatch_local_p, bool sarrayp,
+			      int startcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-A to query-G mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-A to query-G mismatches */
+    _diff = _mm_andnot_si128(_query_low, _query_high);
+    _diff = _mm_andnot_si128(_ref_high, _diff);
+    _diff = _mm_andnot_si128(_ref_low, _diff);
+  }
+
+  /* Compare reduced G->A nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+static __m128i
+block_diff_metga_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool query_unk_mismatch_local_p, bool sarrayp,
+			      int endcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-A to query-G mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-A to query-G mismatches */
+    _diff = _mm_andnot_si128(_query_low, _query_high);
+    _diff = _mm_andnot_si128(_ref_high, _diff);
+    _diff = _mm_andnot_si128(_ref_low, _diff);
+  }
+
+  /* Compare reduced G->A nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+#ifdef HAVE_AVX2
+/* Convert G to A: high/low (A) 0 0 => new high 0; (C) 0 1 => 0; (G) 1 0 => 0; (T) 1 0 => 1 */
+/* new high = high & low */
+static __m256i
+block_diff_metga_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_256(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  if (sarrayp == true) {
+    /* Ignore genome-A to query-G mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm256_setzero_si256();
+  } else {
+    /* Mark genome-A to query-G mismatches */
+    _diff = _mm256_andnot_si256(_query_low, _query_high);
+    _diff = _mm256_andnot_si256(_ref_high, _diff);
+    _diff = _mm256_andnot_si256(_ref_low, _diff);
+  }
+
+  /* Compare reduced G->A nts  */
+  _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_mm256_and_si256(_query_high, _query_low), _mm256_and_si256(_ref_high, _ref_low)));
+  _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm256_or_si256(_query_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm256_or_si256(_ref_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+#ifdef HAVE_AVX512
+/* Convert G to A: high/low (A) 0 0 => new high 0; (C) 0 1 => 0; (G) 1 0 => 0; (T) 1 0 => 1 */
+/* new high = high & low */
+static __m512i
+block_diff_metga_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_512(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  if (sarrayp == true) {
+    /* Ignore genome-A to query-G mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm512_setzero_si512();
+  } else {
+    /* Mark genome-A to query-G mismatches */
+    _diff = _mm512_andnot_si512(_query_low, _query_high);
+    _diff = _mm512_andnot_si512(_ref_high, _diff);
+    _diff = _mm512_andnot_si512(_ref_low, _diff);
+  }
+
+  /* Compare reduced G->A nts  */
+  _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_mm512_and_si512(_query_high, _query_low), _mm512_and_si512(_ref_high, _ref_low)));
+  _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm512_or_si512(_query_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm512_or_si512(_ref_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+
+static UINT4
+block_diff_cmet_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
       return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   } else {
@@ -17574,189 +18502,527 @@ block_diff_cmet_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   }
 }
 
-static Genomediff_T
-block_diff_cmet (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef HAVE_SSE2
+static __m128i
+block_diff_cmet_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   } else {
     if (plusp) {
-      return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   }
 }
 
-static UINT4
-block_diff_cmet_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-				       bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_cmet_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			      int startcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
     } else {
-      return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
     } else {
-      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
     }
   }
 }
 
-static Genomediff_T
-block_diff_cmet_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+static __m128i
+block_diff_cmet_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			      int endcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
     } else {
-      return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
     } else {
-      return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
     }
   }
 }
+#endif
+#endif
 
-#ifdef GSNAP
-/* Ignores snp_ptr */
-static UINT4
-block_diff_cmet_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
-			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef HAVE_SSSE3
+static __m128i
+block_diff_cmet_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			     int startcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
     } else {
-      return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
     } else {
-      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
     }
   }
 }
-#endif
 
+static __m128i
+block_diff_cmet_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			     int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  }
+}
+#endif
 
-#ifdef GSNAP
-/* Ignores snp_ptr */
-static Genomediff_T
-block_diff_cmet_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+#ifdef HAVE_AVX2
+static __m256i
+block_diff_cmet_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   } else {
     if (plusp) {
-      return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   }
 }
 #endif
 
-
-/************************************************************************
- *   ATOI
- ************************************************************************/
+#ifdef HAVE_AVX512
+static __m512i
+block_diff_cmet_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
 
 static UINT4
-block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		     bool query_unk_mismatch_local_p, bool sarrayp) {
-  UINT4 diff;
+block_diff_cmet_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			   bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
 
-  if (sarrayp == true) {
-    /* Ignore genome-G to query-A mismatches.  Convert everything to 3-nucleotide space. */
-    diff = 0U;
+#ifdef HAVE_SSE2
+static __m128i
+block_diff_cmet_sarray_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
   } else {
-    /* Mark genome-G to query-A mismatches */
-#ifdef WORDS_BIGENDIAN
-    diff = ~(query_shifted[0] | query_shifted[1]) &
-      (Bigendian_convert_uint(ref_ptr[0]) & ~Bigendian_convert_uint(ref_ptr[4]));
-#elif !defined(HAVE_SSE2)
-    diff = ~(query_shifted[0] | query_shifted[1]) & (ref_ptr[0] & ~(ref_ptr[4]));
-#else
-    diff = ~(query_shifted[0] | query_shifted[4]) & (ref_ptr[0] & ~(ref_ptr[4]));
-#endif
-    debug(printf(" => diff %08X\n",diff));
+    if (plusp) {
+      return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
   }
+}
 
-  /* Compare reduced A->G nts  */
-#ifdef WORDS_BIGENDIAN
-  diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) | ~(Bigendian_convert_uint(ref_ptr[4])))) |
-    (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#elif !defined(HAVE_SSE2)
-  diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]);
-  /* Because (a ^ b) = (~a ^ ~b), this is equivalent to 
-  diff |= ((~query_shifted[0] & query_shifted[1]) ^ (~ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
-  */
-#else
-  diff |= ((query_shifted[0] | ~(query_shifted[4])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[4] ^ ref_ptr[4]);
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_cmet_sarray_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				     int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
+    } else {
+      return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
+    } else {
+      return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_cmet_sarray_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				     int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
+    } else {
+      return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
+    } else {
+      return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
+    }
+  }
+}
+#endif
 #endif
-  debug(printf(" => diff %08X\n",diff));
 
-  /* Flags: Considering N as a mismatch */
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-  if (query_unk_mismatch_local_p) {
-    debug(printf("Marking query flags: query %08X ",query_shifted[2]));
-    diff |= query_shifted[2];
+#ifdef HAVE_SSSE3
+static __m128i
+block_diff_cmet_sarray_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				    bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				    int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
+    } else {
+      return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
+    }
   } else {
-    debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
-    diff &= ~(query_shifted[2]);
+    if (plusp) {
+      return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
+    } else {
+      return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
+    }
   }
-#else
-  if (query_unk_mismatch_local_p) {
-    debug(printf("Marking query flags: query %08X ",query_shifted[8]));
-    diff |= query_shifted[8];
+}
+
+static __m128i
+block_diff_cmet_sarray_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				    bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				    int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    } else {
+      return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    }
   } else {
-    debug(printf("Clearing query flags: query %08X ",query_shifted[8]));
-    diff &= ~(query_shifted[8]);
+    if (plusp) {
+      return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    } else {
+      return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    }
   }
+}
 #endif
 
-  if (genome_unk_mismatch_p) {
-    debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-    diff |= Bigendian_convert_uint(ref_ptr[8]);
-#else
-    diff |= (ref_ptr[8]);
+#ifdef HAVE_AVX2
+static __m256i
+block_diff_cmet_sarray_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
 #endif
+
+#ifdef HAVE_AVX512
+static __m512i
+block_diff_cmet_sarray_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
   } else {
-    debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-    diff &= ~(Bigendian_convert_uint(ref_ptr[8]));
-#else
-    diff &= ~(ref_ptr[8]);
+    if (plusp) {
+      return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
 #endif
+
+
+#ifdef GSNAP
+/* Ignores snp_ptr */
+static UINT4
+block_diff_cmet_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
   }
-  debug(printf(" => diff %08X\n",diff));
+}
+#endif
 
-  return diff;
+
+#if defined(GSNAP) && defined(HAVE_SSE2)
+/* Ignores snp_ptr */
+static __m128i
+block_diff_cmet_snp_128 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
 }
 
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_cmet_snp_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				  int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    } else {
+      return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    } else {
+      return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    }
+  }
+}
 
-/* Convert A->G: new high = high | ~low */
-static Genomediff_T
-block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		  bool query_unk_mismatch_local_p, bool sarrayp) {
+static __m128i
+block_diff_cmet_snp_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				  int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    } else {
+      return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    } else {
+      return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    }
+  }
+}
+#endif
+#endif
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#if defined(GSNAP) && defined(HAVE_SSSE3)
+/* Ignores snp_ptr */
+static __m128i
+block_diff_cmet_snp_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				 bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				 int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    } else {
+      return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    } else {
+      return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_cmet_snp_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				 bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				 int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  }
+}
+#endif
+
+#if defined(GSNAP) && defined(HAVE_AVX2)
+/* Ignores snp_ptr */
+static __m256i
+block_diff_cmet_snp_256 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+#if defined(GSNAP) && defined(HAVE_AVX512)
+/* Ignores snp_ptr */
+static __m512i
+block_diff_cmet_snp_512 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+
+/************************************************************************
+ *   ATOI
+ ************************************************************************/
+
+static UINT4
+block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool query_unk_mismatch_local_p, bool sarrayp) {
   UINT4 diff;
 
   if (sarrayp == true) {
@@ -17767,8 +19033,10 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 #ifdef WORDS_BIGENDIAN
     diff = ~(query_shifted[0] | query_shifted[1]) &
       (Bigendian_convert_uint(ref_ptr[0]) & ~Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
     diff = ~(query_shifted[0] | query_shifted[1]) & (ref_ptr[0] & ~(ref_ptr[4]));
+#else
+    diff = ~(query_shifted[0] | query_shifted[4]) & (ref_ptr[0] & ~(ref_ptr[4]));
 #endif
     debug(printf(" => diff %08X\n",diff));
   }
@@ -17777,15 +19045,18 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 #ifdef WORDS_BIGENDIAN
   diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) | ~(Bigendian_convert_uint(ref_ptr[4])))) |
     (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
+#elif !defined(HAVE_SSE2)
   diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]);
   /* Because (a ^ b) = (~a ^ ~b), this is equivalent to 
   diff |= ((~query_shifted[0] & query_shifted[1]) ^ (~ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]);
   */
+#else
+  diff |= ((query_shifted[0] | ~(query_shifted[4])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[4] ^ ref_ptr[4]);
 #endif
   debug(printf(" => diff %08X\n",diff));
 
   /* Flags: Considering N as a mismatch */
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
   if (query_unk_mismatch_local_p) {
     debug(printf("Marking query flags: query %08X ",query_shifted[2]));
     diff |= query_shifted[2];
@@ -17793,6 +19064,15 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
     debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
     diff &= ~(query_shifted[2]);
   }
+#else
+  if (query_unk_mismatch_local_p) {
+    debug(printf("Marking query flags: query %08X ",query_shifted[8]));
+    diff |= query_shifted[8];
+  } else {
+    debug(printf("Clearing query flags: query %08X ",query_shifted[8]));
+    diff &= ~(query_shifted[8]);
+  }
+#endif
 
   if (genome_unk_mismatch_p) {
     debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
@@ -17812,13 +19092,19 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   debug(printf(" => diff %08X\n",diff));
 
   return diff;
+}
 
-#else
+
+#ifdef HAVE_SSE2
+/* Convert A->G: new high = high | ~low */
+static __m128i
+block_diff_a2iag_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
   __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
   _query_high = _mm_load_si128((__m128i *) query_shifted);
-  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
   _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
   _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
 
   if (sarrayp == true) {
@@ -17848,20 +19134,243 @@ block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   }
 
   return _diff;
-#endif
 }
 
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_a2iag_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			       bool query_unk_mismatch_local_p, bool sarrayp, int startcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-static UINT4
-block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		     bool query_unk_mismatch_local_p, bool sarrayp) {
-  UINT4 diff;
+  read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
 
   if (sarrayp == true) {
-    /* Ignore genome-C to query-T mismatches */
-    diff = 0U;
+    /* Ignore genome-G to query-A mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm_setzero_si128();
   } else {
-    /* Mark genome-C to query-T mismatches */
+    /* Mark genome-G to query-A mismatches */
+    _diff = _mm_andnot_si128(_mm_or_si128(_query_high, _query_low), _mm_andnot_si128(_ref_low, _ref_high));
+  }
+
+  /* Compare reduced A->G nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+static __m128i
+block_diff_a2iag_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			       bool query_unk_mismatch_local_p, bool sarrayp, int endcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-G to query-A mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-G to query-A mismatches */
+    _diff = _mm_andnot_si128(_mm_or_si128(_query_high, _query_low), _mm_andnot_si128(_ref_low, _ref_high));
+  }
+
+  /* Compare reduced A->G nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+#endif
+
+#ifdef HAVE_SSSE3
+/* Convert A->G: new high = high | ~low */
+static __m128i
+block_diff_a2iag_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool query_unk_mismatch_local_p, bool sarrayp,
+			      int startcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-G to query-A mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-G to query-A mismatches */
+    _diff = _mm_andnot_si128(_mm_or_si128(_query_high, _query_low), _mm_andnot_si128(_ref_low, _ref_high));
+  }
+
+  /* Compare reduced A->G nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+static __m128i
+block_diff_a2iag_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool query_unk_mismatch_local_p, bool sarrayp,
+			      int endcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-G to query-A mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-G to query-A mismatches */
+    _diff = _mm_andnot_si128(_mm_or_si128(_query_high, _query_low), _mm_andnot_si128(_ref_low, _ref_high));
+  }
+
+  /* Compare reduced A->G nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+#ifdef HAVE_AVX2
+/* Convert A->G: new high = high | ~low */
+static __m256i
+block_diff_a2iag_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_256(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  if (sarrayp == true) {
+    /* Ignore genome-G to query-A mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm256_setzero_si256();
+  } else {
+    /* Mark genome-G to query-A mismatches */
+    _diff = _mm256_andnot_si256(_mm256_or_si256(_query_high, _query_low), _mm256_andnot_si256(_ref_low, _ref_high));
+  }
+
+  /* Compare reduced A->G nts  */
+  _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_mm256_andnot_si256(_query_high, _query_low), _mm256_andnot_si256(_ref_high, _ref_low)));
+  _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm256_or_si256(_query_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm256_or_si256(_ref_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+#ifdef HAVE_AVX512
+/* Convert A->G: new high = high | ~low */
+static __m512i
+block_diff_a2iag_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_512(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  if (sarrayp == true) {
+    /* Ignore genome-G to query-A mismatches.  Convert everything to 3-nucleotide space. */
+    _diff = _mm512_setzero_si512();
+  } else {
+    /* Mark genome-G to query-A mismatches */
+    _diff = _mm512_andnot_si512(_mm512_or_si512(_query_high, _query_low), _mm512_andnot_si512(_ref_low, _ref_high));
+  }
+
+  /* Compare reduced A->G nts  */
+  _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_mm512_andnot_si512(_query_high, _query_low), _mm512_andnot_si512(_ref_high, _ref_low)));
+  _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm512_or_si512(_query_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm512_or_si512(_ref_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+
+static UINT4
+block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool query_unk_mismatch_local_p, bool sarrayp) {
+  UINT4 diff;
+
+  if (sarrayp == true) {
+    /* Ignore genome-C to query-T mismatches */
+    diff = 0U;
+  } else {
+    /* Mark genome-C to query-T mismatches */
 #ifdef WORDS_BIGENDIAN
     diff = (query_shifted[0] & query_shifted[1]) &
       (~(Bigendian_convert_uint(ref_ptr[0])) & Bigendian_convert_uint(ref_ptr[4]));
@@ -17924,72 +19433,55 @@ block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 }
 
 
+#ifdef HAVE_SSE2
 /* Convert T->C: new high = high & ~low */
-static Genomediff_T
-block_diff_a2itc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-                  bool query_unk_mismatch_local_p, bool sarrayp) {
+static __m128i
+block_diff_a2itc_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-  UINT4 diff;
+  _query_high = _mm_load_si128((__m128i *) query_shifted);
+  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
+  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
+  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
 
   if (sarrayp == true) {
     /* Ignore genome-C to query-T mismatches */
-    diff = 0U;
+    _diff = _mm_setzero_si128();
   } else {
     /* Mark genome-C to query-T mismatches */
-#ifdef WORDS_BIGENDIAN
-    diff = (query_shifted[0] & query_shifted[1]) &
-      (~(Bigendian_convert_uint(ref_ptr[0])) & Bigendian_convert_uint(ref_ptr[4]));
-#else
-    diff = (query_shifted[0] & query_shifted[1]) & (~(ref_ptr[0]) & ref_ptr[4]);
-#endif
-    debug(printf(" => diff %08X\n",diff));
+    _diff = _mm_and_si128(_mm_and_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low));
   }
 
   /* Compare reduced T->C nts  */
-#ifdef WORDS_BIGENDIAN
-  diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) & ~(Bigendian_convert_uint(ref_ptr[4])))) |
-    (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4]));
-#else
-  diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (ref_ptr[0] & ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]);
-#endif
-  debug(printf(" => diff %08X\n",diff));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
 
-  /* Flags: Considering N as a mismatch */
+  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
-    debug(printf("Marking query flags: query %08X ",query_shifted[2]));
-    diff |= query_shifted[2];
+    _diff = _mm_or_si128(_query_flags, _diff);
   } else {
-    debug(printf("Clearing query flags: query %08X ",query_shifted[2]));
-    diff &= ~(query_shifted[2]);
+    _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
+  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
-    debug(printf("Marking genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-    diff |= Bigendian_convert_uint(ref_ptr[8]);
-#else
-    diff |= (ref_ptr[8]);
-#endif
+    _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
-    debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8]));
-#ifdef WORDS_BIGENDIAN
-    diff &= ~(Bigendian_convert_uint(ref_ptr[8]));
-#else
-    diff &= ~(ref_ptr[8]);
-#endif
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
   }
-  debug(printf(" => diff %08X\n",diff));
 
-  return diff;
+  return _diff;
+}
 
-#else
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_a2itc_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			       bool query_unk_mismatch_local_p, bool sarrayp, int startcolumni) {
   __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-  _query_high = _mm_load_si128((__m128i *) query_shifted);
-  _ref_high = _mm_load_si128((__m128i *) ref_ptr);
-  _query_low = _mm_load_si128((__m128i *) &(query_shifted[4]));
-  _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4]));
+  read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
 
   if (sarrayp == true) {
     /* Ignore genome-C to query-T mismatches */
@@ -18003,14 +19495,12 @@ block_diff_a2itc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high)));
   _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
 
-  _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8]));
   if (query_unk_mismatch_local_p) {
     _diff = _mm_or_si128(_query_flags, _diff);
   } else {
     _diff = _mm_andnot_si128(_query_flags, _diff);
   }
 
-  _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8]));
   if (genome_unk_mismatch_p) {
     _diff = _mm_or_si128(_ref_flags, _diff);
   } else {
@@ -18018,21 +19508,208 @@ block_diff_a2itc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
   }
 
   return _diff;
-#endif
 }
 
+static __m128i
+block_diff_a2itc_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			       bool query_unk_mismatch_local_p, bool sarrayp, int endcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
 
-static UINT4
-block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
-  if (genestrand == +2) {
-    if (plusp) {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
-    } else {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
-    }
+  read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-C to query-T mismatches */
+    _diff = _mm_setzero_si128();
   } else {
-    if (plusp) {
+    /* Mark genome-C to query-T mismatches */
+    _diff = _mm_and_si128(_mm_and_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced T->C nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+#endif
+
+#ifdef HAVE_SSSE3
+/* Convert T->C: new high = high & ~low */
+static __m128i
+block_diff_a2itc_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool query_unk_mismatch_local_p, bool sarrayp,
+			      int startcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni);
+  read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-C to query-T mismatches */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-C to query-T mismatches */
+    _diff = _mm_and_si128(_mm_and_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced T->C nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+
+static __m128i
+block_diff_a2itc_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool query_unk_mismatch_local_p, bool sarrayp,
+			      int endcolumni) {
+  __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni);
+  read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni);
+
+  if (sarrayp == true) {
+    /* Ignore genome-C to query-T mismatches */
+    _diff = _mm_setzero_si128();
+  } else {
+    /* Mark genome-C to query-T mismatches */
+    _diff = _mm_and_si128(_mm_and_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced T->C nts  */
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high)));
+  _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm_or_si128(_query_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm_or_si128(_ref_flags, _diff);
+  } else {
+    _diff = _mm_andnot_si128(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+#ifdef HAVE_AVX2
+/* Convert T->C: new high = high & ~low */
+static __m256i
+block_diff_a2itc_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_256(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  if (sarrayp == true) {
+    /* Ignore genome-C to query-T mismatches */
+    _diff = _mm256_setzero_si256();
+  } else {
+    /* Mark genome-C to query-T mismatches */
+    _diff = _mm256_and_si256(_mm256_and_si256(_query_high, _query_low), _mm256_andnot_si256(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced T->C nts  */
+  _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_mm256_andnot_si256(_query_low, _query_high), _mm256_andnot_si256(_ref_low, _ref_high)));
+  _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm256_or_si256(_query_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm256_or_si256(_ref_flags, _diff);
+  } else {
+    _diff = _mm256_andnot_si256(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+#ifdef HAVE_AVX512
+/* Convert T->C: new high = high & ~low */
+static __m512i
+block_diff_a2itc_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		      bool query_unk_mismatch_local_p, bool sarrayp) {
+  __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags;
+
+  read_512(&_query_high,&_query_low,&_query_flags,query_shifted);
+  read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr);
+
+  if (sarrayp == true) {
+    /* Ignore genome-C to query-T mismatches */
+    _diff = _mm512_setzero_si512();
+  } else {
+    /* Mark genome-C to query-T mismatches */
+    _diff = _mm512_and_si512(_mm512_and_si512(_query_high, _query_low), _mm512_andnot_si512(_ref_high, _ref_low));
+  }
+
+  /* Compare reduced T->C nts  */
+  _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_mm512_andnot_si512(_query_low, _query_high), _mm512_andnot_si512(_ref_low, _ref_high)));
+  _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_query_low, _ref_low));
+
+  if (query_unk_mismatch_local_p) {
+    _diff = _mm512_or_si512(_query_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_query_flags, _diff);
+  }
+
+  if (genome_unk_mismatch_p) {
+    _diff = _mm512_or_si512(_ref_flags, _diff);
+  } else {
+    _diff = _mm512_andnot_si512(_ref_flags, _diff);
+  }
+
+  return _diff;
+}
+#endif
+
+
+static UINT4
+block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
       return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
       return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
@@ -18041,253 +19718,1146 @@ block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 }
 
 
-static Genomediff_T
-block_diff_atoi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef HAVE_SSE2
+static __m128i
+block_diff_atoi_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   } else {
     if (plusp) {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   }
 }
 
-static UINT4
-block_diff_atoi_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-			   bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_atoi_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			      int startcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
     } else {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
     } else {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
     }
   }
 }
 
-static Genomediff_T
-block_diff_atoi_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+static __m128i
+block_diff_atoi_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			      int endcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
     } else {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
     } else {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
     }
   }
 }
+#endif
+#endif
 
-#ifdef GSNAP
-/* Ignores snp_ptr */
-static UINT4
-block_diff_atoi_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
-			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef HAVE_SSSE3
+static __m128i
+block_diff_atoi_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			     int startcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
     } else {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
     } else {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
     }
   }
 }
-#endif
 
-#ifdef GSNAP
-/* Ignores snp_ptr */
-static Genomediff_T
-block_diff_atoi_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
-		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+static __m128i
+block_diff_atoi_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			     int endcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
     } else {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
     } else {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
     }
   }
 }
 #endif
 
-
-/************************************************************************
- *  TTOC
- ************************************************************************/
-
-static UINT4
-block_diff_ttoc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef HAVE_AVX2
+static __m256i
+block_diff_atoi_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   } else {
     if (plusp) {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   }
 }
+#endif
 
-
-static Genomediff_T
-block_diff_ttoc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-		 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef HAVE_AVX512
+static __m512i
+block_diff_atoi_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   } else {
     if (plusp) {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     } else {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
     }
   }
 }
+#endif
 
 static UINT4
-block_diff_ttoc_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+block_diff_atoi_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
 			   bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
   if (genestrand == +2) {
     if (plusp) {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
       return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
     } else {
       return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
     }
+  }
+}
+
+#ifdef HAVE_SSE2
+static __m128i
+block_diff_atoi_sarray_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
   } else {
     if (plusp) {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
     } else {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
     }
   }
 }
 
-static Genomediff_T
-block_diff_ttoc_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
-			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_atoi_sarray_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				     int startcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
     } else {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
     } else {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
     }
   }
 }
 
-#ifdef GSNAP
-/* Ignores snp_ptr */
-static UINT4
-block_diff_ttoc_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
-			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+static __m128i
+block_diff_atoi_sarray_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				     int endcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
     } else {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
     } else {
-      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
     }
   }
 }
 #endif
+#endif
 
-#ifdef GSNAP
-/* Ignores snp_ptr */
-static Genomediff_T
-block_diff_ttoc_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
-		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+#ifdef HAVE_SSSE3
+static __m128i
+block_diff_atoi_sarray_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				    bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				    int startcolumni) {
   if (genestrand == +2) {
     if (plusp) {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
     } else {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
     }
   } else {
     if (plusp) {
-      return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
     } else {
-      return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
     }
   }
 }
-#endif
-
-
-/* query_shifted, (snp_ptr,) ref_ptr, plusp, genestrand, query_unk_mismatch_local_p */
-typedef Genomediff_T (*Diffproc_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool);
-typedef Genomediff_T (*Diffproc_snp_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool);
-typedef UINT4 (*Diffproc_32_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool);
-typedef UINT4 (*Diffproc_snp_32_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool);
-
-static Diffproc_T block_diff;
-static Diffproc_snp_T block_diff_snp;
-static Diffproc_32_T block_diff_32;
-static Diffproc_snp_32_T block_diff_snp_32;
-
-/* For CMET and ATOI, ignores genome-to-query mismatches.  Used by
-   Genome_consecutive procedures, called only by sarray-read.c */
-static Diffproc_T block_diff_sarray; 
-static Diffproc_32_T block_diff_sarray_32; 
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-/* Skip */
-#else
-static __m128i _BOUND_HIGH;
-static __m128i _BOUND_LOW;
+static __m128i
+block_diff_atoi_sarray_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				    bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				    int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    }
+  }
+}
+#endif
+
+#ifdef HAVE_AVX2
+static __m256i
+block_diff_atoi_sarray_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
+#endif
+
+#ifdef HAVE_AVX512
+static __m512i
+block_diff_atoi_sarray_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
+#endif
+
+
+#ifdef GSNAP
+/* Ignores snp_ptr */
+static UINT4
+block_diff_atoi_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+#if defined(GSNAP) && defined(HAVE_SSE2)
+/* Ignores snp_ptr */
+static __m128i
+block_diff_atoi_snp_128 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_atoi_snp_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p, int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    } else {
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    } else {
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_atoi_snp_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p, int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    } else {
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    } else {
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    }
+  }
+}
+#endif
+#endif
+
+#if defined(GSNAP) && defined(HAVE_SSSE3)
+/* Ignores snp_ptr */
+static __m128i
+block_diff_atoi_snp_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				 bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				 int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_atoi_snp_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				 bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				 int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  }
+}
+#endif
+
+#if defined(GSNAP) && defined(HAVE_AVX2)
+/* Ignores snp_ptr */
+static __m256i
+block_diff_atoi_snp_256 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+#if defined(GSNAP) && defined(HAVE_AVX512)
+/* Ignores snp_ptr */
+static __m512i
+block_diff_atoi_snp_512 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+
+/************************************************************************
+ *  TTOC
+ ************************************************************************/
+
+static UINT4
+block_diff_ttoc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+
+
+#ifdef HAVE_SSE2
+static __m128i
+block_diff_ttoc_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_ttoc_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			      int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    } else {
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    } else {
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_ttoc_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			      bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			      int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    } else {
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    } else {
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    }
+  }
+}
+#endif
+#endif
+
+#ifdef HAVE_SSSE3
+static __m128i
+block_diff_ttoc_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			     int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_ttoc_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+			     int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  }
+}
+#endif
+
+#ifdef HAVE_AVX2
+static __m256i
+block_diff_ttoc_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+#ifdef HAVE_AVX512
+static __m512i
+block_diff_ttoc_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+		     bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+static UINT4
+block_diff_ttoc_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			   bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
+
+#ifdef HAVE_SSE2
+static __m128i
+block_diff_ttoc_sarray_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
+
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_ttoc_sarray_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				     int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
+    } else {
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
+    } else {
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_ttoc_sarray_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				     bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				     int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
+    } else {
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
+    } else {
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					   endcolumni);
+    }
+  }
+}
+#endif
+#endif
+
+#ifdef HAVE_SSSE3
+static __m128i
+block_diff_ttoc_sarray_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				    bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				    int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_ttoc_sarray_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+				    bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				    int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true,
+					  endcolumni);
+    }
+  }
+}
+#endif
+
+#ifdef HAVE_AVX2
+static __m256i
+block_diff_ttoc_sarray_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
+#endif
+
+#ifdef HAVE_AVX512
+static __m512i
+block_diff_ttoc_sarray_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr,
+			    bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    } else {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true);
+    }
+  }
+}
+#endif
+
+#ifdef GSNAP
+/* Ignores snp_ptr */
+static UINT4
+block_diff_ttoc_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+#if defined(GSNAP) && defined(HAVE_SSE2)
+/* Ignores snp_ptr */
+static __m128i
+block_diff_ttoc_snp_128 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+
+#ifdef USE_SHIFT_HILO
+static __m128i
+block_diff_ttoc_snp_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				  int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    } else {
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    } else {
+      return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_ttoc_snp_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				  bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				  int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    } else {
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    } else {
+      return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					   endcolumni);
+    }
+  }
+}
+#endif
+#endif
+
+#if defined(GSNAP) && defined(HAVE_SSSE3)
+/* Ignores snp_ptr */
+static __m128i
+block_diff_ttoc_snp_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				 bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				 int startcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  startcolumni);
+    }
+  }
+}
+
+static __m128i
+block_diff_ttoc_snp_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+				 bool plusp, int genestrand, bool query_unk_mismatch_local_p,
+				 int endcolumni) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    } else {
+      return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false,
+					  endcolumni);
+    }
+  }
+}
+#endif
+
+#if defined(GSNAP) && defined(HAVE_AVX2)
+/* Ignores snp_ptr */
+static __m256i
+block_diff_ttoc_snp_256 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+#if defined(GSNAP) && defined(HAVE_AVX512)
+/* Ignores snp_ptr */
+static __m512i
+block_diff_ttoc_snp_512 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr,
+			 bool plusp, int genestrand, bool query_unk_mismatch_local_p) {
+  if (genestrand == +2) {
+    if (plusp) {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  } else {
+    if (plusp) {
+      return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    } else {
+      return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false);
+    }
+  }
+}
+#endif
+
+
+/* query_shifted, (snp_ptr,) ref_ptr, plusp, genestrand, query_unk_mismatch_local_p */
+#ifdef HAVE_AVX512
+typedef __m512i (*Diffproc_512_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+typedef __m512i (*Diffproc_snp_512_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+static Diffproc_512_T block_diff_512;
+static Diffproc_snp_512_T block_diff_snp_512;
+#endif
+
+#ifdef HAVE_AVX2
+typedef __m256i (*Diffproc_256_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+typedef __m256i (*Diffproc_snp_256_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+static Diffproc_256_T block_diff_256;
+static Diffproc_snp_256_T block_diff_snp_256;
+#endif
+
+#ifdef HAVE_SSSE3
+typedef __m128i (*Diffproc_128_wrap_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool, int);
+typedef __m128i (*Diffproc_snp_128_wrap_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool, int);
+static Diffproc_128_wrap_T block_diff_128_wrap_lo;
+static Diffproc_snp_128_wrap_T block_diff_snp_128_wrap_lo;
+static Diffproc_128_wrap_T block_diff_128_wrap_hi;
+static Diffproc_snp_128_wrap_T block_diff_snp_128_wrap_hi;
+#endif
+
+#ifdef HAVE_SSE2
+#ifdef USE_SHIFT_HILO
+typedef __m128i (*Diffproc_128_shift_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool, int);
+typedef __m128i (*Diffproc_snp_128_shift_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool, int);
+static Diffproc_128_shift_T block_diff_128_shift_lo;
+static Diffproc_snp_128_shift_T block_diff_snp_128_shift_lo;
+static Diffproc_128_shift_T block_diff_128_shift_hi;
+static Diffproc_snp_128_shift_T block_diff_snp_128_shift_hi;
+#endif
+
+typedef __m128i (*Diffproc_128_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+typedef __m128i (*Diffproc_snp_128_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+static Diffproc_128_T block_diff_128;
+static Diffproc_snp_128_T block_diff_snp_128;
+#endif
+
+typedef UINT4 (*Diffproc_32_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+typedef UINT4 (*Diffproc_snp_32_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool);
+
+static Diffproc_32_T block_diff_32;
+static Diffproc_snp_32_T block_diff_snp_32;
+
+/* For CMET and ATOI, ignores genome-to-query mismatches.  Used by
+   Genome_consecutive procedures, called only by sarray-read.c */
+#ifdef HAVE_AVX512
+static Diffproc_512_T block_diff_sarray_512;
+#endif
+#ifdef HAVE_AVX2
+static Diffproc_256_T block_diff_sarray_256; 
+#endif
+#ifdef HAVE_SSSE3
+static Diffproc_128_wrap_T block_diff_sarray_128_wrap_lo;
+static Diffproc_128_wrap_T block_diff_sarray_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+static Diffproc_128_T block_diff_sarray_128; 
+#ifdef USE_SHIFT_HILO
+static Diffproc_128_shift_T block_diff_sarray_128_shift_lo;
+static Diffproc_128_shift_T block_diff_sarray_128_shift_hi;
+#endif
+#endif
+static Diffproc_32_T block_diff_sarray_32; 
+
+#ifdef HAVE_AVX512
+static __m512i _BOUND_HIGH_512;
+static __m512i _BOUND_LOW_512;
+#endif
+#ifdef HAVE_AVX2
+static __m256i _BOUND_HIGH_256;
+static __m256i _BOUND_LOW_256;
+#endif
+#ifdef HAVE_SSE2
+static __m128i _BOUND_HIGH;
+static __m128i _BOUND_LOW;
 #endif
 
 void
 Genome_hr_setup (Genomecomp_T *ref_blocks_in, Genomecomp_T *snp_blocks_in,
 		 bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in,
 		 Mode_T mode) {
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-/* Skip */
-#else
+
+#ifdef HAVE_AVX512
+  _BOUND_HIGH_512 = _mm512_set_epi32(512,480,448,416, 384,352,320,288, 256,224,192,160, 128,96,64,32);
+  _BOUND_LOW_512 = _mm512_set_epi32(480,448,416,384, 352,320,288,256, 224,192,160,128, 96,64,32,0);
+#endif
+#ifdef HAVE_AVX2
+  _BOUND_HIGH_256 = _mm256_set_epi32(256,224,192,160,128,96,64,32);
+  _BOUND_LOW_256 = _mm256_set_epi32(224,192,160,128,96,64,32,0);
+#endif
+#ifdef HAVE_SSE2
   _BOUND_HIGH = _mm_set_epi32(128,96,64,32);
   _BOUND_LOW = _mm_set_epi32(96,64,32,0);
 #endif
@@ -18299,26 +20869,117 @@ Genome_hr_setup (Genomecomp_T *ref_blocks_in, Genomecomp_T *snp_blocks_in,
 
   switch (mode) {
   case STANDARD:
-    block_diff = block_diff_standard;
-    block_diff_sarray = block_diff_standard;
+#ifdef HAVE_AVX512
+    block_diff_512 = block_diff_standard_512;
+    block_diff_sarray_512 = block_diff_standard_512;
+#endif
+#ifdef HAVE_AVX2
+    block_diff_256 = block_diff_standard_256;
+    block_diff_sarray_256 = block_diff_standard_256;
+#endif
+#ifdef HAVE_SSSE3
+    block_diff_128_wrap_lo = block_diff_standard_128_wrap_lo;
+    block_diff_sarray_128_wrap_lo = block_diff_standard_128_wrap_lo;
+    block_diff_128_wrap_hi = block_diff_standard_128_wrap_hi;
+    block_diff_sarray_128_wrap_hi = block_diff_standard_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+    block_diff_128 = block_diff_standard_128;
+    block_diff_sarray_128 = block_diff_standard_128;
+#ifdef USE_SHIFT_HILO
+    block_diff_128_shift_lo = block_diff_standard_128_shift_lo;
+    block_diff_sarray_128_shift_lo = block_diff_standard_128_shift_lo;
+    block_diff_128_shift_hi = block_diff_standard_128_shift_hi;
+    block_diff_sarray_128_shift_hi = block_diff_standard_128_shift_hi;
+#endif
+#endif
     block_diff_32 = block_diff_standard_32;
     block_diff_sarray_32 = block_diff_standard_32;
     break;
+
   case CMET_STRANDED: case CMET_NONSTRANDED:
-    block_diff = block_diff_cmet;
-    block_diff_sarray = block_diff_cmet_sarray;
+#ifdef HAVE_AVX512
+    block_diff_512 = block_diff_cmet_512;
+    block_diff_sarray_512 = block_diff_cmet_sarray_512;
+#endif
+#ifdef HAVE_AVX2
+    block_diff_256 = block_diff_cmet_256;
+    block_diff_sarray_256 = block_diff_cmet_sarray_256;
+#endif
+#ifdef HAVE_SSSE3
+    block_diff_128_wrap_lo = block_diff_cmet_128_wrap_lo;
+    block_diff_sarray_128_wrap_lo = block_diff_cmet_sarray_128_wrap_lo;
+    block_diff_128_wrap_hi = block_diff_cmet_128_wrap_hi;
+    block_diff_sarray_128_wrap_hi = block_diff_cmet_sarray_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+    block_diff_128 = block_diff_cmet_128;
+    block_diff_sarray_128 = block_diff_cmet_sarray_128;
+#ifdef USE_SHIFT_HILO
+    block_diff_128_shift_lo = block_diff_cmet_128_shift_lo;
+    block_diff_sarray_128_shift_lo = block_diff_cmet_128_shift_lo;
+    block_diff_128_shift_hi = block_diff_cmet_128_shift_hi;
+    block_diff_sarray_128_shift_hi = block_diff_cmet_128_shift_hi;
+#endif
+#endif
     block_diff_32 = block_diff_cmet_32;
     block_diff_sarray_32 = block_diff_cmet_sarray_32;
     break;
+
   case ATOI_STRANDED: case ATOI_NONSTRANDED:
-    block_diff = block_diff_atoi;
-    block_diff_sarray = block_diff_atoi_sarray;
+#ifdef HAVE_AVX512
+    block_diff_512 = block_diff_atoi_512;
+    block_diff_sarray_512 = block_diff_atoi_sarray_512;
+#endif
+#ifdef HAVE_AVX2
+    block_diff_256 = block_diff_atoi_256;
+    block_diff_sarray_256 = block_diff_atoi_sarray_256;
+#endif
+#ifdef HAVE_SSSE3
+    block_diff_128_wrap_lo = block_diff_atoi_128_wrap_lo;
+    block_diff_sarray_128_wrap_lo = block_diff_atoi_sarray_128_wrap_lo;
+    block_diff_128_wrap_hi = block_diff_atoi_128_wrap_hi;
+    block_diff_sarray_128_wrap_hi = block_diff_atoi_sarray_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+    block_diff_128 = block_diff_atoi_128;
+    block_diff_sarray_128 = block_diff_atoi_sarray_128;
+#ifdef USE_SHIFT_HILO
+    block_diff_128_shift_lo = block_diff_atoi_128_shift_lo;
+    block_diff_sarray_128_shift_lo = block_diff_atoi_128_shift_lo;
+    block_diff_128_shift_hi = block_diff_atoi_128_shift_hi;
+    block_diff_sarray_128_shift_hi = block_diff_atoi_128_shift_hi;
+#endif
+#endif
     block_diff_32 = block_diff_atoi_32;
     block_diff_sarray_32 = block_diff_atoi_sarray_32;
     break;
+
   case TTOC_STRANDED: case TTOC_NONSTRANDED:
-    block_diff = block_diff_ttoc;
-    block_diff_sarray = block_diff_ttoc_sarray;
+#ifdef HAVE_AVX512
+    block_diff_512 = block_diff_ttoc_512;
+    block_diff_sarray_512 = block_diff_ttoc_sarray_512;
+#endif
+#ifdef HAVE_AVX2
+    block_diff_256 = block_diff_ttoc_256;
+    block_diff_sarray_256 = block_diff_ttoc_sarray_256;
+#endif
+#ifdef HAVE_SSSE3
+    block_diff_128_wrap_lo = block_diff_ttoc_128_wrap_lo;
+    block_diff_sarray_128_wrap_lo = block_diff_ttoc_sarray_128_wrap_lo;
+    block_diff_128_wrap_hi = block_diff_ttoc_128_wrap_hi;
+    block_diff_sarray_128_wrap_hi = block_diff_ttoc_sarray_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+    block_diff_128 = block_diff_ttoc_128;
+    block_diff_sarray_128 = block_diff_ttoc_sarray_128;
+#ifdef USE_SHIFT_HILO
+    block_diff_128_shift_lo = block_diff_ttoc_128_shift_lo;
+    block_diff_sarray_128_shift_lo = block_diff_ttoc_128_shift_lo;
+    block_diff_128_shift_hi = block_diff_ttoc_128_shift_hi;
+    block_diff_sarray_128_shift_hi = block_diff_ttoc_128_shift_hi;
+#endif
+#endif
     block_diff_32 = block_diff_ttoc_32;
     block_diff_sarray_32 = block_diff_ttoc_sarray_32;
     break;
@@ -18326,82 +20987,100 @@ Genome_hr_setup (Genomecomp_T *ref_blocks_in, Genomecomp_T *snp_blocks_in,
   }
 
 #ifndef GSNAP
-  block_diff_snp = block_diff_standard_wildcard;
+#ifdef HAVE_AVX512
+  block_diff_snp_512 = block_diff_standard_wildcard_512;
+#endif
+#ifdef HAVE_AVX2
+  block_diff_snp_256 = block_diff_standard_wildcard_256;
+#endif
+#ifdef HAVE_SSE2
+  block_diff_snp_128 = block_diff_standard_wildcard_128;
+#endif
   block_diff_snp_32 = block_diff_standard_wildcard_32;
+
 #else
   switch (mode) {
   case STANDARD:
-    block_diff_snp = block_diff_standard_wildcard;
-    block_diff_snp_32 = block_diff_standard_wildcard_32;
-    break;
-  case CMET_STRANDED: case CMET_NONSTRANDED:
-    block_diff_snp = block_diff_cmet_snp;
-    block_diff_snp_32 = block_diff_cmet_snp_32;
-    break;
-  case ATOI_STRANDED: case ATOI_NONSTRANDED:
-    block_diff_snp = block_diff_atoi_snp;
-    block_diff_snp_32 = block_diff_atoi_snp_32;
-    break;
-  case TTOC_STRANDED: case TTOC_NONSTRANDED:
-    block_diff_snp = block_diff_ttoc_snp;
-    block_diff_snp_32 = block_diff_ttoc_snp_32;
-    break;
-  default: fprintf(stderr,"Mode %d not recognized\n",mode); abort();
-  }
+#ifdef HAVE_AVX512
+    block_diff_snp_512 = block_diff_standard_wildcard_512;
 #endif
-
-  return;
-}
-
-/* genomebits available */
-void
-Genome_hr_user_setup (UINT4 *ref_blocks_in,
-		      bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in,
-		      Mode_T mode) {
-  ref_blocks = ref_blocks_in;
-  snp_blocks = (UINT4 *) NULL;
-  query_unk_mismatch_p = query_unk_mismatch_p_in;
-  genome_unk_mismatch_p = genome_unk_mismatch_p_in;
-
-  switch (mode) {
-  case STANDARD:
-    block_diff = block_diff_standard;
-    block_diff_32 = block_diff_standard_32;
-    break;
-  case CMET_STRANDED: case CMET_NONSTRANDED:
-    block_diff = block_diff_cmet;
-    block_diff_32 = block_diff_cmet_32;
-    break;
-  case ATOI_STRANDED: case ATOI_NONSTRANDED:
-    block_diff = block_diff_atoi;
-    block_diff_32 = block_diff_atoi_32;
-    break;
-  case TTOC_STRANDED: case TTOC_NONSTRANDED:
-    block_diff = block_diff_ttoc;
-    block_diff_32 = block_diff_ttoc_32;
+#ifdef HAVE_AVX2
+    block_diff_snp_256 = block_diff_standard_wildcard_256;
+#endif
+#ifdef HAVE_SSSE3
+    block_diff_snp_128_wrap_lo = block_diff_standard_wildcard_128_wrap_lo;
+    block_diff_snp_128_wrap_hi = block_diff_standard_wildcard_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+    block_diff_snp_128 = block_diff_standard_wildcard_128;
+#ifdef USE_SHIFT_HILO
+    block_diff_snp_128_shift_lo = block_diff_standard_wildcard_128_shift_lo;
+    block_diff_snp_128_shift_hi = block_diff_standard_wildcard_128_shift_hi;
+#endif
+#endif
+    block_diff_snp_32 = block_diff_standard_wildcard_32;
     break;
-  default: fprintf(stderr,"Mode %d not recognized\n",mode); abort();
-  }
 
-#ifndef GSNAP
-  block_diff_snp = block_diff_standard_wildcard;
-  block_diff_snp_32 = block_diff_standard_wildcard_32;
-#else
-  switch (mode) {
-  case STANDARD:
-    block_diff_snp = block_diff_standard_wildcard; 
-    block_diff_snp_32 = block_diff_standard_wildcard_32; 
-    break;
   case CMET_STRANDED: case CMET_NONSTRANDED:
-    block_diff_snp = block_diff_cmet_snp;
+#ifdef HAVE_AVX512
+    block_diff_snp_512 = block_diff_cmet_snp_512;
+#endif
+#ifdef HAVE_AVX2
+    block_diff_snp_256 = block_diff_cmet_snp_256;
+#endif
+#ifdef HAVE_SSSE3
+    block_diff_snp_128_wrap_lo = block_diff_cmet_snp_128_wrap_lo;
+    block_diff_snp_128_wrap_hi = block_diff_cmet_snp_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+    block_diff_snp_128 = block_diff_cmet_snp_128;
+#ifdef USE_SHIFT_HILO
+    block_diff_snp_128_shift_lo = block_diff_cmet_snp_128_shift_lo;
+    block_diff_snp_128_shift_hi = block_diff_cmet_snp_128_shift_hi;
+#endif
+#endif
     block_diff_snp_32 = block_diff_cmet_snp_32;
     break;
+
   case ATOI_STRANDED: case ATOI_NONSTRANDED:
-    block_diff_snp = block_diff_atoi_snp;
+#ifdef HAVE_AVX512
+    block_diff_snp_512 = block_diff_atoi_snp_512;
+#endif
+#ifdef HAVE_AVX2
+    block_diff_snp_256 = block_diff_atoi_snp_256;
+#endif
+#ifdef HAVE_SSSE3
+    block_diff_snp_128_wrap_lo = block_diff_atoi_snp_128_wrap_lo;
+    block_diff_snp_128_wrap_hi = block_diff_atoi_snp_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+    block_diff_snp_128 = block_diff_atoi_snp_128;
+#ifdef USE_SHIFT_HILO
+    block_diff_snp_128_shift_lo = block_diff_atoi_snp_128_shift_lo;
+    block_diff_snp_128_shift_hi = block_diff_atoi_snp_128_shift_hi;
+#endif
+#endif
     block_diff_snp_32 = block_diff_atoi_snp_32;
     break;
+
   case TTOC_STRANDED: case TTOC_NONSTRANDED:
-    block_diff_snp = block_diff_ttoc_snp;
+#ifdef HAVE_AVX512
+    block_diff_snp_512 = block_diff_ttoc_snp_512;
+#endif
+#ifdef HAVE_AVX2
+    block_diff_snp_256 = block_diff_ttoc_snp_256;
+#endif
+#ifdef HAVE_SSSE3
+    block_diff_snp_128_wrap_lo = block_diff_ttoc_snp_128_wrap_lo;
+    block_diff_snp_128_wrap_hi = block_diff_ttoc_snp_128_wrap_hi;
+#endif
+#ifdef HAVE_SSE2
+    block_diff_snp_128 = block_diff_ttoc_snp_128;
+#ifdef USE_SHIFT_HILO
+    block_diff_snp_128_shift_lo = block_diff_ttoc_snp_128_shift_lo;
+    block_diff_snp_128_shift_hi = block_diff_ttoc_snp_128_shift_hi;
+#endif
+#endif
     block_diff_snp_32 = block_diff_ttoc_snp_32;
     break;
   default: fprintf(stderr,"Mode %d not recognized\n",mode); abort();
@@ -18412,108 +21091,152 @@ Genome_hr_user_setup (UINT4 *ref_blocks_in,
 }
 
 
-
 /************************************************************************/
 
-/*                 76543210 */
-#define HIGH_BIT 0x80000000
-
-#define clear_start_32(diff,startdiscard) (diff & (~0U << (startdiscard)))
-#define clear_end_32(diff,enddiscard) (diff & ~(~0U << (enddiscard)))
-
-/* Needed only for debugging */
-#define clear_start_mask(startdiscard) (~0U << (startdiscard))
-#define clear_end_mask(enddiscard) (~(~0U << (enddiscard)))
-
-/* Needed only for debugging */
-#define set_start_mask(startdiscard) (~(~0U << startdiscard))
-#define set_end_mask(enddiscard) (~0U << enddiscard)
+#ifdef HAVE_AVX512
+/* Need to implement.  Extract procedures not available. */
+#endif
 
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#ifdef HAVE_AVX2
+#define nonzero_p_256(diff) !_mm256_testz_si256(diff,diff)
 
-#define nonzero_p(diff) diff
 
-#define clear_start(diff,startdiscard) (diff & (~0U << (startdiscard)))
-#define clear_end(diff,enddiscard) (diff & ~(~0U << (enddiscard)))
+#if defined(HAVE_POPCNT)
+#define popcount_ones_256(_diff) (_popcnt64(_mm256_extract_epi64(_diff,0)) + _popcnt64(_mm256_extract_epi64(_diff,1)) + popcnt64(_mm256_extract_epi64(_diff,2)) + _popcnt64(_mm256_extract_epi64(_diff,3)))
+#elif defined(HAVE_MM_POPCNT)
+#define popcount_ones_256(_diff) (_mm_popcnt_u64(_mm256_extract_epi64(_diff,0)) + _mm_popcnt_u64(_mm256_extract_epi64(_diff,1)) + _mm_popcnt_u64(_mm256_extract_epi64(_diff,2)) + _mm_popcnt_u64(_mm256_extract_epi64(_diff,3)))
+#else
+#define popcount_ones_256(_diff) (__builtin_popcountll(_mm256_extract_epi64(_diff,0)) + __builtin_popcountll(_mm256_extract_epi64(_diff,1)) + __builtin_popcountll(_mm256_extract_epi64(_diff,2)) + __builtin_popcountll(_mm256_extract_epi64(_diff,3)))
+#endif
 
-/* Same speed: clear_highbit(diff,relpos) (diff - (HIGH_BIT >> relpos)) */
-/* Note: xor assumes that bit at relpos was on */
-#define clear_highbit(diff,relpos) (diff ^ (HIGH_BIT >> relpos))
+static int
+count_leading_zeroes_256 (__m256i _diff) {
+  debug4(printf("Entered count_leading_zeroes with "));
+  debug4(print_vector_256_hex(_diff));
+  UINT8 x;
 
-/* Slower: clear_lowbit(diff,relpos) diff -= (1 << relpos) */
-#define clear_lowbit(diff,relpos) (diff & (diff - 1));
+#ifdef HAVE_LZCNT
+  if ((x = _mm256_extract_epi64(_diff,3)) != 0) {
+    return (int) _lzcnt_u64(x);
+  } else if ((x = _mm256_extract_epi64(_diff,2)) != 0) {
+    return 64 + (int) _lzcnt_u64(x);
+  } else if ((x = _mm256_extract_epi64(_diff,1)) != 0) {
+    return 128 + (int) _lzcnt_u64(x);
+  } else {
+    return 192 + (int) _lzcnt_u64(_mm256_extract_epi64(_diff,0));
+  }
 
+#elif defined(HAVE_BUILTIN_CLZ)
+  if ((x = _mm256_extract_epi64(_diff,3)) != 0) {
+    return (int) __builtin_clzll(x);
+  } else if ((x = _mm256_extract_epi64(_diff,2)) != 0) {
+    return 64 + (int) __builtin_clzll(x);
+  } else if ((x = _mm256_extract_epi64(_diff,1)) != 0) {
+    return 128 + (int) __builtin_clzll(x);
+  } else {
+    return 192 + (int) __builtin_clzll(_mm256_extract_epi64(_diff,0));
+  }
 
-#if !defined(HAVE_SSE4_2)
-#define popcount_ones(diff) (count_bits[diff & 0x0000FFFF] + count_bits[diff >> 16])
-#elif defined(HAVE_POPCNT)
-#define popcount_ones(diff) (_popcnt32(diff))
-#elif defined(HAVE_MM_POPCNT)
-#define popcount_ones(diff) (_mm_popcnt_u32(diff))
-#elif defined(HAVE_BUILTIN_POPCOUNT)
-#define popcount_ones(diff) (__builtin_popcount(diff))
 #else
-#define popcount_ones(diff) (count_bits[diff & 0x0000FFFF] + count_bits[diff >> 16])
+  abort();
 #endif
+}
+
+static int
+count_trailing_zeroes_256 (__m256i _diff) {
+  debug4(printf("Entered count_trailing_zeroes with "));
+  debug4(print_vector_256_hex(_diff));
+  UINT8 x;
+
+#ifdef HAVE_TZCNT
+  if ((x = _mm256_extract_epi64(_diff,0)) != 0) {
+    return (int) _tzcnt_u64(x);
+  } else if ((x = _mm256_extract_epi64(_diff,1)) != 0) {
+    return 64 + (int) _tzcnt_u64(x);
+  } else if ((x = _mm256_extract_epi64(_diff,2)) != 0) {
+    return 128 + (int) _tzcnt_u64(x);
+  } else {
+    return 192 + (int) _tzcnt_u64(_mm256_extract_epi64(_diff,3));
+  }
 
+#elif defined(HAVE_BUILTIN_CTZ)
+  if ((x = _mm256_extract_epi64(_diff,0)) != 0) {
+    return (int) __builtin_ctzll(x);
+  } else if ((x = _mm256_extract_epi64(_diff,1)) != 0) {
+    return 64 + (int) __builtin_ctzll(x);
+  } else if ((x = _mm256_extract_epi64(_diff,2)) != 0) {
+    return 128 + (int) __builtin_ctzll(x);
+  } else {
+    return 192 + (int) __builtin_ctzll(_mm256_extract_epi64(_diff,3));
+  }
 
-#if !defined(HAVE_SSE4_2)
-#define count_leading_zeroes(diff) ((diff >> 16) ? clz_table[diff >> 16] : 16 + clz_table[diff])
-#elif defined(HAVE_LZCNT)
-#define count_leading_zeroes(diff) _lzcnt_u32(diff)
-#elif defined(HAVE_BUILTIN_CLZ)
-#define count_leading_zeroes(diff) __builtin_clz(diff)
 #else
-#define count_leading_zeroes(diff) ((diff >> 16) ? clz_table[diff >> 16] : 16 + clz_table[diff])
+  abort();
 #endif
 
-#if !defined(HAVE_SSE4_2)
-#define count_trailing_zeroes(diff) mod_37_bit_position[(-diff & diff) % 37]
-#elif defined(HAVE_TZCNT)
-#define count_trailing_zeroes(diff) _tzcnt_u32(diff)
-#elif defined(HAVE_BUILTIN_CTZ)
-#define count_trailing_zeroes(diff) __builtin_ctz(diff)
+}
+
+static __m256i
+clear_highbit_256 (__m256i _diff, int leading_zeroes) {
+  __m256i _subtract, _relpos;
+  int relpos;
+
+  relpos = 255 - leading_zeroes;
+  debug3(printf("Clearing high bit at relpos %d\n",relpos));
+
+  _subtract = _mm256_slli_epi32(_mm256_set1_epi32(1), relpos % 32);
+  _relpos = _mm256_set1_epi32(relpos);
+  _subtract = _mm256_and_si256(_mm256_cmpgt_epi32(_BOUND_HIGH_256, _relpos), _subtract);
+  _subtract = _mm256_andnot_si256(_mm256_cmpgt_epi32(_BOUND_LOW_256, _relpos), _subtract);
+
+  debug3(printf("Subtract: "));
+  debug3(print_vector_256_hex(_subtract));
+#if 0
+  /* latency 1, throughput: 0.5 */
+  return _mm256_sub_epi32(_diff, _subtract);
 #else
-/* lowbit = -diff & diff */
-#define count_trailing_zeroes(diff) mod_37_bit_position[(-diff & diff) % 37]
+  /* _mm256_xor_si128 also works if all other bits are 0.  latency 1, throughput: 0.33 */
+  return _mm256_xor_si256(_diff, _subtract);
 #endif
+}
 
-/* For trimming */
-#define set_start(diff,startdiscard) (diff | ~(~0U << startdiscard))
-#define set_end(diff,enddiscard) (diff | (~0U << enddiscard))
+/* relpos is equal to trailing_zeroes */
+static __m256i
+clear_lowbit_256 (__m256i _diff, int relpos) {
+  __m256i _subtract, _relpos;
 
-#if defined(DEBUG) || defined(DEBUG5)
-static void
-print_diff_popcount (UINT4 diff) {
-  printf("diff: %08X => nmismatches %d\n",diff,popcount_ones(diff));
-  return;
-}
+  debug3(printf("Clearing low bit at relpos %d\n",relpos));
 
-static void
-print_diff_trailing_zeroes (UINT4 diff, int offset) {
-  printf("diff: %08X => offset %d + trailing zeroes %d\n",diff,offset,count_trailing_zeroes(diff));
-  return;
-}
+  _subtract = _mm256_slli_epi32(_mm256_set1_epi32(1), relpos % 32);
+  _relpos = _mm256_set1_epi32(relpos);
+  _subtract = _mm256_and_si256(_mm256_cmpgt_epi32(_BOUND_HIGH_256, _relpos), _subtract);
+  _subtract = _mm256_andnot_si256(_mm256_cmpgt_epi32(_BOUND_LOW_256, _relpos), _subtract);
 
-static void
-print_diff_leading_zeroes (UINT4 diff, int offset) {
-  printf("diff: %08X => offset %d - leading zeroes %d\n",diff,offset,count_leading_zeroes(diff));
-  return;
+  debug3(printf("Subtract: "));
+  debug3(print_vector_256_hex(_subtract));
+#if 0
+  /* latency 1, throughput: 0.5 */
+  return _mm256_sub_epi32(_diff, _subtract);
+#else
+  /* _mm256_xor_si128 also works if all other bits are 0.  latency 1, throughput: 0.33 */
+  return _mm256_xor_si256(_diff, _subtract);
+#endif
 }
+
 #endif
 
-#else  /* littleendian and SSE2 */
+
+#ifdef HAVE_SSE2
 
 #ifdef HAVE_SSE4_1
-#define nonzero_p(diff) !_mm_testz_si128(diff,diff)
+#define nonzero_p_128(diff) !_mm_testz_si128(diff,diff)
 #else
-#define nonzero_p(diff) _mm_movemask_epi8(_mm_cmpeq_epi8(diff,_mm_setzero_si128())) != 0xFFFF
+#define nonzero_p_128(diff) _mm_movemask_epi8(_mm_cmpeq_epi8(diff,_mm_setzero_si128())) != 0xFFFF
 #endif
 
-
 static __m128i
-clear_start (__m128i _diff, int startdiscard) {
+clear_start_128 (__m128i _diff, int startdiscard) {
   __m128i _mask, _startdiscard;
 #ifdef DEBUG
   __m128i _result;
@@ -18542,7 +21265,7 @@ clear_start (__m128i _diff, int startdiscard) {
 }
 
 static __m128i
-clear_end (__m128i _diff, int enddiscard) {
+clear_end_128 (__m128i _diff, int enddiscard) {
   __m128i _mask, _enddiscard;
 #ifdef DEBUG
   __m128i _result;
@@ -18569,14 +21292,54 @@ clear_end (__m128i _diff, int enddiscard) {
 
   return _mm_andnot_si128(_mask, _diff);
 }
-  
+
+/* Based on clear_end */
+static __m128i
+set_start_128 (__m128i _diff, int startdiscard) {
+  __m128i _mask, _startdiscard;
+
+  debug(printf("Setting start at startdiscard %d\n",startdiscard));
+
+#ifdef DEFECTIVE_SSE2_COMPILER
+  _mask = _mm_sll_epi32(_mm_set1_epi32(~0U), _mm_setr_epi32(startdiscard % 32,0,0,0));
+#else
+  _mask = _mm_slli_epi32(_mm_set1_epi32(~0U), startdiscard % 32);
+#endif
+  _startdiscard = _mm_set1_epi32(startdiscard);
+  _mask = _mm_or_si128(_mm_cmplt_epi32(_startdiscard, _BOUND_LOW), _mask);
+  _mask = _mm_and_si128(_mm_cmplt_epi32(_startdiscard, _BOUND_HIGH), _mask);
+
+  _mask = _mm_xor_si128(_mask, _mm_set1_epi32(~0U)); /* Take complement of _mask */
+
+  return _mm_or_si128(_mask, _diff);
+}
+
+/* Based on clear_start */
+static __m128i
+set_end_128 (__m128i _diff, int enddiscard) {
+  __m128i _mask, _enddiscard;
+
+  debug(printf("Setting end at enddiscard %d\n",enddiscard));
+
+#ifdef DEFECTIVE_SSE2_COMPILER
+  _mask = _mm_sll_epi32(_mm_set1_epi32(~0U), _mm_setr_epi32(enddiscard % 32,0,0,0));
+#else
+  _mask = _mm_slli_epi32(_mm_set1_epi32(~0U), enddiscard % 32);
+#endif
+  _enddiscard = _mm_set1_epi32(enddiscard);
+  _mask = _mm_or_si128(_mask, _mm_cmplt_epi32(_enddiscard, _BOUND_LOW));
+  _mask = _mm_and_si128(_mask, _mm_cmplt_epi32(_enddiscard, _BOUND_HIGH));
+
+  return _mm_or_si128(_mask, _diff);
+}
+
 
 #if !defined(HAVE_SSE4_2)
 
 #if 0
 /* Naive method for pre-SSE4.2.  Requires four popcount operations. */
 static int
-popcount_ones (__m128i _diff) {
+popcount_ones_128 (__m128i _diff) {
   UINT4 diff[4];
 
   _mm_store_si128((__m128i *) diff,_diff);
@@ -18600,7 +21363,7 @@ popcount_ones (__m128i _diff) {
 #define CSA(h,l, a,b,c, u,v) u = a ^ b; v = c; h = (a & b) | (u & v); l = u ^ v;
 
 static int
-popcount_ones (__m128i _diff) {
+popcount_ones_128 (__m128i _diff) {
   UINT4 ones, twos, u, v;
   UINT4 diff[4];
 
@@ -18613,17 +21376,17 @@ popcount_ones (__m128i _diff) {
 
 
 #elif defined(HAVE_POPCNT)
-#define popcount_ones(_diff) (_popcnt64(_mm_extract_epi64(_diff,0)) + _popcnt64(_mm_extract_epi64(_diff,1)))
+#define popcount_ones_128(_diff) (_popcnt64(_mm_extract_epi64(_diff,0)) + _popcnt64(_mm_extract_epi64(_diff,1)))
 #elif defined(HAVE_MM_POPCNT)
-#define popcount_ones(_diff) (_mm_popcnt_u64(_mm_extract_epi64(_diff,0)) + _mm_popcnt_u64(_mm_extract_epi64(_diff,1)))
+#define popcount_ones_128(_diff) (_mm_popcnt_u64(_mm_extract_epi64(_diff,0)) + _mm_popcnt_u64(_mm_extract_epi64(_diff,1)))
 #else
-#define popcount_ones(_diff) (__builtin_popcountll(_mm_extract_epi64(_diff,0)) + __builtin_popcountll(_mm_extract_epi64(_diff,1)))
+#define popcount_ones_128(_diff) (__builtin_popcountll(_mm_extract_epi64(_diff,0)) + __builtin_popcountll(_mm_extract_epi64(_diff,1)))
 #endif
 
 
 static int
-count_leading_zeroes (__m128i _diff) {
-  debug4(printf("Entered count_leading_zeroes with "));
+count_leading_zeroes_128 (__m128i _diff) {
+  debug4(printf("Entered count_leading_zeroes_128 with "));
   debug4(print_vector_hex(_diff));
 
 #if defined(HAVE_SSE4_2) && defined(HAVE_LZCNT)
@@ -18665,8 +21428,8 @@ count_leading_zeroes (__m128i _diff) {
 }
 
 static int
-count_trailing_zeroes (__m128i _diff) {
-  debug4(printf("Entered count_trailing_zeroes with "));
+count_trailing_zeroes_128 (__m128i _diff) {
+  debug4(printf("Entered count_trailing_zeroes_128 with "));
   debug4(print_vector_hex(_diff));
 
 #if defined(HAVE_SSE4_2) && defined(HAVE_TZCNT)
@@ -18708,7 +21471,7 @@ count_trailing_zeroes (__m128i _diff) {
 }
 
 static __m128i
-clear_highbit (__m128i _diff, int leading_zeroes) {
+clear_highbit_128 (__m128i _diff, int leading_zeroes) {
   __m128i _subtract, _relpos;
   int relpos;
 
@@ -18737,7 +21500,7 @@ clear_highbit (__m128i _diff, int leading_zeroes) {
 
 /* relpos is equal to trailing_zeroes */
 static __m128i
-clear_lowbit (__m128i _diff, int relpos) {
+clear_lowbit_128 (__m128i _diff, int relpos) {
   __m128i _subtract, _relpos;
 
   debug3(printf("Clearing low bit at relpos %d\n",relpos));
@@ -18762,80 +21525,26 @@ clear_lowbit (__m128i _diff, int relpos) {
 #endif
 }
 
-/* Based on clear_end */
-static __m128i
-set_start (__m128i _diff, int startdiscard) {
-  __m128i _mask, _startdiscard;
-
-  debug(printf("Setting start at startdiscard %d\n",startdiscard));
-
-#ifdef DEFECTIVE_SSE2_COMPILER
-  _mask = _mm_sll_epi32(_mm_set1_epi32(~0U), _mm_setr_epi32(startdiscard % 32,0,0,0));
-#else
-  _mask = _mm_slli_epi32(_mm_set1_epi32(~0U), startdiscard % 32);
-#endif
-  _startdiscard = _mm_set1_epi32(startdiscard);
-  _mask = _mm_or_si128(_mm_cmplt_epi32(_startdiscard, _BOUND_LOW), _mask);
-  _mask = _mm_and_si128(_mm_cmplt_epi32(_startdiscard, _BOUND_HIGH), _mask);
-
-  _mask = _mm_xor_si128(_mask, _mm_set1_epi32(~0U)); /* Take complement of _mask */
-
-  return _mm_or_si128(_mask, _diff);
-}
-
-/* Based on clear_start */
-static __m128i
-set_end (__m128i _diff, int enddiscard) {
-  __m128i _mask, _enddiscard;
-
-  debug(printf("Setting end at enddiscard %d\n",enddiscard));
-
-#ifdef DEFECTIVE_SSE2_COMPILER
-  _mask = _mm_sll_epi32(_mm_set1_epi32(~0U), _mm_setr_epi32(enddiscard % 32,0,0,0));
-#else
-  _mask = _mm_slli_epi32(_mm_set1_epi32(~0U), enddiscard % 32);
-#endif
-  _enddiscard = _mm_set1_epi32(enddiscard);
-  _mask = _mm_or_si128(_mask, _mm_cmplt_epi32(_enddiscard, _BOUND_LOW));
-  _mask = _mm_and_si128(_mask, _mm_cmplt_epi32(_enddiscard, _BOUND_HIGH));
-
-  return _mm_or_si128(_mask, _diff);
-}
-
-#if defined(DEBUG) || defined(DEBUG5)
-static void
-print_diff_popcount (__m128i _diff) {
-  printf("diff: ");
-  print_vector_hex(_diff);
-  printf("nmismatches %d\n",popcount_ones(_diff));
-  return;
-}
-
-static void
-print_diff_trailing_zeroes (__m128i _diff, int offset) {
-  printf("diff: ");
-  print_vector_hex(_diff);
-  printf("offset %d + trailing zeroes %d\n",offset,count_trailing_zeroes(_diff));
-  return;
-}
-
-static void
-print_diff_leading_zeroes (__m128i _diff, int offset) {
-  printf("diff: ");
-  print_vector_hex(_diff);
-  printf("offset %d - leading zeroes %d\n",offset,count_leading_zeroes(_diff));
-  return;
-}
 #endif
 
-#endif	/* littleendian and SSE2 */
 
+/*                 76543210 */
+#define HIGH_BIT 0x80000000
 
 #define nonzero_p_32(diff) diff
 
 #define clear_start_32(diff,startdiscard) (diff & (~0U << (startdiscard)))
 #define clear_end_32(diff,enddiscard) (diff & ~(~0U << (enddiscard)))
 
+/* For trimming */
+#define set_start_32(diff,startdiscard) (diff | ~(~0U << startdiscard))
+#define set_end_32(diff,enddiscard) (diff | (~0U << enddiscard))
+
+/* For fragment functions that evaluate only the end 16-mer */
+#define clear_start_mask(startdiscard) (~0U << (startdiscard))
+#define clear_end_mask(enddiscard) (~(~0U << (enddiscard)))
+
+
 /* Same speed: clear_highbit(diff,relpos) (diff - (HIGH_BIT >> relpos)) */
 /* Note: xor assumes that bit at relpos was on */
 #define clear_highbit_32(diff,relpos) (diff ^ (HIGH_BIT >> relpos))
@@ -18877,27 +21586,24 @@ print_diff_leading_zeroes (__m128i _diff, int offset) {
 #define count_trailing_zeroes_32(diff) mod_37_bit_position[(-diff & diff) % 37]
 #endif
 
-/* For trimming */
-#define set_start_32(diff,startdiscard) (diff | ~(~0U << startdiscard))
-#define set_end_32(diff,enddiscard) (diff | (~0U << enddiscard))
-
-
 
 /* Counts matches from pos5 to pos3 up to first mismatch.  Modified from mismatches_left */
 int
 Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
 				      bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
   int mismatch_position, offset, nshift;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *query_shifted, *ptr, *end;
-  UINT4 diff_32;
-  Genomediff_T diff;
+  Genomecomp_T *query_shifted, *ptr, *endptr;
   int relpos;
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
   debug(
 	printf("\n\n");
@@ -18923,163 +21629,247 @@ Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T lef
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
   query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
+  query_shifted += startcolumni;
+#endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = -startdiscard + pos5;
+  ptr = &(ref_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
 
   if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = -startdiscard + pos5;
+    /* Single block */
     debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_sarray_32)(query_shifted,&(ref_blocks[startblocki_32]),
-                                     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-#else
-    diff_32 = (block_diff_sarray_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
-                                     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-#endif
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
     if (nonzero_p_32(diff_32)) {
       mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
       debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
-#ifdef DEBUG14
-      answer = (mismatch_position - pos5);
-#else
       return (mismatch_position - pos5);
-#endif
     } else {
       debug(printf("Would return %d - %d consecutive matches\n",pos3,pos5));
-#ifdef DEBUG14
-      answer = (pos3 - pos5);
-#else
       return (pos3 - pos5);
-#endif
     }
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_FIRST_MISMATCH) && defined(HAVE_SSE2)
+    /* Shift */
+    enddiscard += (endcolumni - startcolumni)*32;
+    assert(startdiscard == ((left+pos5) % 128) - startcolumni*32);
+    assert(enddiscard == ((left+pos3) % 128) - startcolumni*32);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
+    diff_128 = (block_diff_sarray_128_shift_lo)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true,
+						startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = -startdiscard + pos5;
+    if (nonzero_p_128(diff_128)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
+      return (mismatch_position - pos5);
+    } else {
+      return (pos3 - pos5);
+    }
 
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+#else
+    /* Start block */
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+                                     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-#ifndef DEBUG14
-  }
+    if (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
+      return (mismatch_position - pos5);
+    }
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    offset += 32;
+
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+
+      if (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
+	return (mismatch_position - pos5);
+      }
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
+
+    /* End block */
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+                                     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+
+    if (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
+      return (mismatch_position - pos5);
+    } else {
+      return (pos3 - pos5);
+    }
 #endif
 
+#if defined(USE_WRAP_FIRST_MISMATCH) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    assert(startdiscard == ((left+pos5) % 128) - startcolumni*32);
+    assert(enddiscard == ((left+pos3) % 128) + (4 - startcolumni)*32);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    diff = (block_diff_sarray)(query_shifted,&(ref_blocks[startblocki]),
-			       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    if (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == (mismatch_position - pos5)));
+    diff_128 = (block_diff_sarray_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true,
+					       startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    if (nonzero_p_128(diff_128)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
       return (mismatch_position - pos5);
     } else {
-      debug(printf("returning %d - %d consecutive matches\n",pos3,pos5));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == (pos3 - pos5)));
       return (pos3 - pos5);
     }
 
-  } else {
 #endif
 
-    /* Startblock */
-    diff = (block_diff_sarray)(query_shifted,&(ref_blocks[startblocki]),
-			       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-    diff = clear_start(diff,startdiscard);
-
-    if (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == (mismatch_position - pos5)));
+  } else {
+    /* Start block */
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+                                     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    if (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
       return (mismatch_position - pos5);
     }
-
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[startblocki]);
-    ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
-#else
-    ptr = &(ref_blocks[startblocki+12]);
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    offset += 32;
+
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+      if (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
+	return (mismatch_position - pos5);
+      }
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
 #endif
-    end = &(ref_blocks[endblocki]);
-    offset += STEP_SIZE; /* 128 or 32 */
-    while (ptr < end) {
-      diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    ptr += GENOME_NEXTROW;
+
 
-      if (nonzero_p(diff) /* != 0*/) {
-	mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr + 24 <= endptr) {
+      diff_256 = (block_diff_sarray_256)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+
+      if (nonzero_p_256(diff_256)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_256(diff_256));
 	debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5));
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == (mismatch_position - pos5)));
 	return (mismatch_position - pos5);
       }
+      query_shifted += 24; ptr += 24;
+      offset += 256;
+    }
+#endif
 
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#ifdef HAVE_SSE2
+    while (ptr + 12 <= endptr) {
+      diff_128 = (block_diff_sarray_128)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+
+      if (nonzero_p_128(diff_128)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+	debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5));
+	return (mismatch_position - pos5);
+      }
+      query_shifted += 12; ptr += 12;
+      offset += 128;
+    }
 #else
-      ptr += 12;
+    while (ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+					 plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+	if (nonzero_p_32(diff_32)) {
+	  mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	  debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
+	  return (mismatch_position - pos5);
+	}
+	query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+	offset += 32;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW;
+    }
 #endif
-      offset += STEP_SIZE; /* 128 or 32 */
+
+    /* End row */
+    while (ptr < endptr) {
+      diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+
+      if (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
+	return (mismatch_position - pos5);
+      }
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
     }
 
-    /* Endblock */
-    diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-    diff = clear_end(diff,enddiscard);
+    /* End block */
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-    if (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == (mismatch_position - pos5)));
+    if (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5));
       return (mismatch_position - pos5);
     } else {
-      debug(printf("returning %d - %d consecutive matches\n",pos3,pos5));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == (pos3 - pos5)));
       return (pos3 - pos5);
     }
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
   }
-#endif
 }
 
 
 /* Counts matches from pos3 to pos5 up to first mismatch.  Modified from mismatches_right */
 int
 Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-				     bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
+				      bool plusp, int genestrand) {
   int mismatch_position, offset, relpos, nshift;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *query_shifted, *start, *ptr;
-  UINT4 diff_32;
-  Genomediff_T diff;
+  Genomecomp_T *query_shifted, *ptr, *startptr;
 #ifndef HAVE_BUILTIN_CLZ
   Genomecomp_T top;
 #endif
   int startcolumni, endcolumni;
+  static int ncalls = 0;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
+
+  /* printf("Number of calls to leftward: %d\n",++ncalls); */
 
   debug(
 	printf("\n\n");
@@ -19105,142 +21895,226 @@ Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
   query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
+  query_shifted += endcolumni;
+#endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = (pos3 - 1) - enddiscard + 32;
+  ptr = &(ref_blocks[endblocki_32]);
+  startptr = &(ref_blocks[startblocki_32]);
 
   if (startblocki_32 == endblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = (pos3 - 1) - enddiscard + 32;
+    /* Single block */
     debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_sarray_32)(query_shifted,&(ref_blocks[endblocki_32]),
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
 				     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-#else
-    diff_32 = (block_diff_sarray_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]),
-				     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-#endif
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
     if (nonzero_p_32(diff_32)) {
       mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
       debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
-#ifdef DEBUG14
-      answer = (pos3 - mismatch_position - 1);
-#else
       return (pos3 - mismatch_position - 1);
-#endif
     } else {
-      debug(printf("returning %d - %d consecutive matches\n",pos3,pos5));
-#ifdef DEBUG14
-      answer = (pos3 - pos5);
-#else
+      debug(printf("Would return %d - %d consecutive matches\n",pos3,pos5));
       return (pos3 - pos5);
-#endif
     }
-  }
 
-#ifndef DEBUG14
-  else {
-#endif
+  } else if (startblocki == endblocki) {
+#if defined(USE_SHIFT_FIRST_MISMATCH) && defined(HAVE_SSE2)
+    /* Shift */
+    startdiscard += 96 - (endcolumni - startcolumni)*32;
+    enddiscard += 96;
+    assert(startdiscard == ((left+pos5) % 128) + (3 - endcolumni)*32);
+    assert(enddiscard == ((left+pos3) % 128) + (3 - endcolumni)*32);
+
+    diff_128 = (block_diff_sarray_128_shift_hi)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true,
+						endcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    if (nonzero_p_128(diff_128)) {
+      mismatch_position = offset - (relpos = count_leading_zeroes_128(diff_128));
+      debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
+      return (pos3 - mismatch_position - 1);
+    } else {
+      debug(printf("Would return %d - %d consecutive matches\n",pos3,pos5));
+      return (pos3 - pos5);
+    }
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
+#else
+    /* End block */
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+                                     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = (pos3 - 1) - enddiscard + STEP_SIZE;
-  
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n",
-		 nshift,startdiscard,enddiscard,offset));
-#ifndef DEBUG14
-  }
-#endif
+    if (nonzero_p_32(diff_32)) {
+      mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
+      debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
+      return (pos3 - mismatch_position - 1);
+    }
+    query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+    offset -= 32;
 
+    /* Single row */
+    while (--endcolumni > startcolumni) {
+      diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (startblocki == endblocki) {
-    diff = (block_diff_sarray)(query_shifted,&(ref_blocks[endblocki]),
-			       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    if (nonzero_p(diff)) {
-      mismatch_position = offset - (relpos = count_leading_zeroes(diff));
+      if (nonzero_p_32(diff_32)) {
+	mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
+	debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
+	return (pos3 - mismatch_position - 1);
+      }
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+      offset -= 32;
+    }
+
+    /* Start block */
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+                                     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    if (nonzero_p_32(diff_32)) {
+      mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
       debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
-      debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - mismatch_position - 1)));
       return (pos3 - mismatch_position - 1);
     } else {
-      debug(printf("returning %d - %d consecutive matches\n",pos3,pos5));
-      debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - pos5)));
       return (pos3 - pos5);
     }
+#endif
 
-  } else {
+#if defined(USE_WRAP_FIRST_MISMATCH) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    startdiscard += (startcolumni - endcolumni - 1)*32;
+    enddiscard += 96;
+    assert(startdiscard == ((left+pos5) % 128) - (endcolumni + 1)*32);
+    assert(enddiscard == ((left+pos3) % 128) + (3 - endcolumni)*32);
+
+    diff_128 = (block_diff_sarray_128_wrap_hi)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true,
+					       endcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    if (nonzero_p_128(diff_128)) {
+      mismatch_position = offset - (relpos = count_leading_zeroes_128(diff_128));
+      debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
+      return (pos3 - mismatch_position - 1);
+    } else {
+      debug(printf("Would return %d - %d consecutive matches\n",pos3,pos5));
+      return (pos3 - pos5);
+    }
 #endif
 
-    /* Endblock */
-    diff = (block_diff_sarray)(query_shifted,&(ref_blocks[endblocki]),
-			       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-    diff = clear_end(diff,enddiscard);
+  } else {
+    /* End block */
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-    if (nonzero_p(diff)) {
-      mismatch_position = offset - (relpos = count_leading_zeroes(diff));
-      debug(printf("returning %d - %d - 1 consecutive matches",pos3,mismatch_position));
-      debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - mismatch_position - 1)));
+    if (nonzero_p_32(diff_32)) {
+      mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
+      debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
       return (pos3 - mismatch_position - 1);
     }
+    query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+    offset -= 32;
 
-    query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[endblocki]);
-    ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
-#else
-    ptr = &(ref_blocks[endblocki-12]);
+    /* End row */
+    while (--endcolumni >= 0) {
+      diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+
+      if (nonzero_p_32(diff_32)) {
+	mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
+	debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
+	return (pos3 - mismatch_position - 1);
+      }
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+      offset -= 32;
+    }
+#ifdef HAVE_SSE2
+    query_shifted -= QUERY_NEXTROW;
 #endif
-    start = &(ref_blocks[startblocki]);
-    offset -= STEP_SIZE; /* 128 or 32 */
-    while (ptr > start) {
-      diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    ptr -= GENOME_NEXTROW;
+
+
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr >= startptr + 24) {
+      diff_256 = (block_diff_sarray_256)(&(query_shifted[-15]),&(ptr[-15]),plusp,genestrand,/*query_unk_mismatch_local_p*/true);
 
-      if (nonzero_p(diff)) {
-	mismatch_position = offset - (relpos = count_leading_zeroes(diff));
+      if (nonzero_p_256(diff_256)) {
+	mismatch_position = offset - (relpos = count_leading_zeroes_256(diff_256));
 	debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
-	debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - mismatch_position - 1)));
 	return (pos3 - mismatch_position - 1);
       }
+      query_shifted -= 24; ptr -= 24;
+      offset -= 256;
+    }
+#endif
 
-      query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#ifdef HAVE_SSE2
+    while (ptr >= startptr + 12) {
+      diff_128 = (block_diff_sarray_128)(&(query_shifted[-3]),&(ptr[-3]),plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+
+      if (nonzero_p_128(diff_128)) {
+	mismatch_position = offset - (relpos = count_leading_zeroes_128(diff_128));
+	debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
+	return (pos3 - mismatch_position - 1);
+      }
+      query_shifted -= 12; ptr -= 12;
+      offset -= 128;
+    }
 #else
-      ptr -= 12;
+    while (ptr >= startptr + 12) {
+     for (endcolumni = 3; endcolumni >= 0; --endcolumni) {
+       diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+					plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+       
+       if (nonzero_p_32(diff_32)) {
+	 mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
+	 debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
+	 return (pos3 - mismatch_position - 1);
+       }
+       query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+       offset -= 32;
+     }
+     /* query_shifted -= QUERY_NEXTROW; */ ptr -= GENOME_NEXTROW;
+    }
 #endif
-      offset -= STEP_SIZE; /* 128 or 32 */
+
+    /* Start row */
+    while (ptr > startptr) {
+      diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+				       plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+      if (nonzero_p_32(diff_32)) {
+	mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
+	debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
+	return (pos3 - mismatch_position - 1);
+      }
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+      offset -= 32;
     }
 
-    /* Startblock */
-    diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true);
-    diff = clear_start(diff,startdiscard);
+    /* Start block */
+    diff_32 = (block_diff_sarray_32)(query_shifted,ptr,
+                                     plusp,genestrand,/*query_unk_mismatch_local_p*/true);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-    if (nonzero_p(diff)) {
-      mismatch_position = offset - (relpos = count_leading_zeroes(diff));
+    if (nonzero_p_32(diff_32)) {
+      mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32));
       debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position));
-      debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - mismatch_position - 1)));
       return (pos3 - mismatch_position - 1);
     } else {
-      debug(printf("returning %d - %d consecutive matches\n",pos3,pos5));
-      debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - pos5)));
       return (pos3 - pos5);
     }
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
   }
-#endif
 }
 
 
@@ -19430,170 +22304,22 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
 	return offset + enddiscard;
       }
     }
-
-  } else if (ptr2 == end) {
-    /* Single block */
-    enddiscard = genomelength % 32; /* Not STEP_SIZE */
-
-    ptr1 = &(ref_blocks[startblocki_1]);
-    ptr2 = &(ref_blocks[startblocki_2]);
-#ifdef WORDS_BIGENDIAN
-    shifted1[0] = Bigendian_convert_uint(ptr1[0]) << nshift;
-    shifted1[1] = Bigendian_convert_uint(ptr1[4]) << nshift;
-    shifted1[2] = Bigendian_convert_uint(ptr1[8]) << nshift;
-#else
-    shifted1[0] = ptr1[0] << nshift;
-    shifted1[1] = ptr1[4] << nshift;
-    shifted1[2] = ptr1[8] << nshift;
-#endif
-    debug2(Compress_print_one_block(ptr1));
-    debug2(Compress_print_one_block(ptr2));
-    debug2(Compress_print_one_block(shifted1));
-
-#ifdef WORDS_BIGENDIAN
-    diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
-#else
-    diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
-#endif
-    diff = clear_start_32(diff,startdiscard);
-    diff = clear_end_32(diff,enddiscard);
-
-    if (diff /* != 0U */) {
-#ifdef HAVE_BUILTIN_CTZ
-      mismatch_position = offset + (relpos = __builtin_ctz(diff));
-#else
-      mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37];
-#endif
-      debug2(printf("case 6: returning %d matches\n",mismatch_position));
-      return mismatch_position;
-    } else {
-      debug2(printf("case 7: returning %d - %d matches\n",enddiscard,startdiscard));
-      return (enddiscard - startdiscard);
-    }
-
-  } else {
-
-    /* Startblock */
-    ptr1 = &(ref_blocks[startblocki_1]);
-    ptr2 = &(ref_blocks[startblocki_2]);
-#ifdef WORDS_BIGENDIAN
-    shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift);
-    shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift);
-    shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift);
-#else
-    shifted1[0] = (ptr1[0] << nshift);
-    shifted1[1] = (ptr1[4] << nshift);
-    shifted1[2] = (ptr1[8] << nshift);
-#endif
-    debug2(Compress_print_one_block(ptr1));
-    debug2(Compress_print_one_block(ptr2));
-    debug2(Compress_print_one_block(shifted1));
-
-#ifdef WORDS_BIGENDIAN
-    diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
-#else
-    diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
-#endif
-    diff = clear_start_32(diff,startdiscard);
-
-    if (diff /* != 0U */) {
-#ifdef HAVE_BUILTIN_CTZ
-      mismatch_position = offset + (relpos = __builtin_ctz(diff));
-#else
-      mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37];
-#endif
-      debug2(printf("case 8: returning %d matches\n",mismatch_position));
-      return mismatch_position;
-    } else {
-      ptr1_prev = ptr1;
-      ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;}
-      ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;}
-      offset += 32;		/* Not STEP_SIZE */
-    }
-
-    while (ptr1 < end && ptr2 < end) {
-      if (nshift == 0) {
-	/* rightshift of 32 is a no-op */
-#ifdef WORDS_BIGENDIAN
-	shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]);
-#else
-	shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8];
-#endif
-      } else {
-#ifdef WORDS_BIGENDIAN
-	shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
-	shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
-	shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
-#else
-	shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift);
-	shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift);
-	shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift);
-#endif
-      }
-      debug2(Compress_print_one_block(ptr1));
-      debug2(Compress_print_one_block(ptr2));
-      debug2(Compress_print_one_block(shifted1));
-
-#ifdef WORDS_BIGENDIAN
-      diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
-#else
-      diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
-#endif
-      if (diff /* != 0U */) {
-#ifdef HAVE_BUILTIN_CTZ
-	mismatch_position = offset + (relpos = __builtin_ctz(diff));
-#else
-	mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37];
-#endif
-	debug2(printf("case 9: returning %d matches\n",mismatch_position));
-	return mismatch_position;
-      } else {
-	ptr1_prev = ptr1;
-	ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;}
-	ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;}
-	offset += 32;		/* Not STEP_SIZE */
-      }
-    }
-
-    /* Last block of entire genome */
-    enddiscard = genomelength % 32; /* Not STEP_SIZE */
-    if (ptr2 == end) {
-      debug2(printf("ptr2 == end\n"));
-      /* Keep enddiscard */
-      nblocks = 1;
-    } else if (nshift + enddiscard < 32) {
-      debug2(printf("ptr1 == end and nshift %d + enddiscard %d < 32\n",nshift,enddiscard));
-      enddiscard = nshift + enddiscard;
-      nblocks = 1;
-    } else if (nshift > 0) {
-      debug2(printf("ptr1 == end and nshift %d + enddiscard %d >= 32\n",nshift,enddiscard));
-      enddiscard -= (32 - nshift);
-      nblocks = 2;
-    } else {
-      debug2(printf("ptr1 == end and nshift %d + enddiscard %d >= 32\n",nshift,enddiscard));
-      /* Keep enddiscard */
-      nblocks = 2;
-    }
-
-    /* Block 1 */
-    if (nshift == 0) {
-      /* rightshift of 32 is a no-op */
-#ifdef WORDS_BIGENDIAN
-      shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]);
-#else
-      shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8];
-#endif
-    } else {
+
+  } else if (ptr2 == end) {
+    /* Single block */
+    enddiscard = genomelength % 32; /* Not STEP_SIZE */
+
+    ptr1 = &(ref_blocks[startblocki_1]);
+    ptr2 = &(ref_blocks[startblocki_2]);
 #ifdef WORDS_BIGENDIAN
-      shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
-      shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
-      shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
+    shifted1[0] = Bigendian_convert_uint(ptr1[0]) << nshift;
+    shifted1[1] = Bigendian_convert_uint(ptr1[4]) << nshift;
+    shifted1[2] = Bigendian_convert_uint(ptr1[8]) << nshift;
 #else
-      shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift);
-      shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift);
-      shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift);
+    shifted1[0] = ptr1[0] << nshift;
+    shifted1[1] = ptr1[4] << nshift;
+    shifted1[2] = ptr1[8] << nshift;
 #endif
-    }
     debug2(Compress_print_one_block(ptr1));
     debug2(Compress_print_one_block(ptr2));
     debug2(Compress_print_one_block(shifted1));
@@ -19603,9 +22329,8 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
 #else
     diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
 #endif
-    if (nblocks == 1) {
-      diff = clear_end_32(diff,enddiscard);
-    }
+    diff = clear_start_32(diff,startdiscard);
+    diff = clear_end_32(diff,enddiscard);
 
     if (diff /* != 0U */) {
 #ifdef HAVE_BUILTIN_CTZ
@@ -19613,27 +22338,26 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
 #else
       mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37];
 #endif
-      debug2(printf("case 10: returning %d matches\n",mismatch_position));
+      debug2(printf("case 6: returning %d matches\n",mismatch_position));
       return mismatch_position;
-    } else if (nblocks == 1) {
-      debug2(printf("case 11: returning offset %d + enddiscard %d matches\n",offset,enddiscard));
-      return offset + enddiscard;
     } else {
-      ptr1_prev = ptr1;
-      ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;}
-      ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;}
-      offset += 32;		/* Not STEP_SIZE */
+      debug2(printf("case 7: returning %d - %d matches\n",enddiscard,startdiscard));
+      return (enddiscard - startdiscard);
     }
 
-    /* Block 2 */
+  } else {
+
+    /* Startblock */
+    ptr1 = &(ref_blocks[startblocki_1]);
+    ptr2 = &(ref_blocks[startblocki_2]);
 #ifdef WORDS_BIGENDIAN
-    shifted1[0] = (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
-    shifted1[1] = (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
-    shifted1[2] = (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
+    shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift);
+    shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift);
+    shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift);
 #else
-    shifted1[0] = (ptr1_prev[0] >> rightshift);
-    shifted1[1] = (ptr1_prev[4] >> rightshift);
-    shifted1[2] = (ptr1_prev[8] >> rightshift);
+    shifted1[0] = (ptr1[0] << nshift);
+    shifted1[1] = (ptr1[4] << nshift);
+    shifted1[2] = (ptr1[8] << nshift);
 #endif
     debug2(Compress_print_one_block(ptr1));
     debug2(Compress_print_one_block(ptr2));
@@ -19644,7 +22368,7 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
 #else
     diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
 #endif
-    diff = clear_end_32(diff,enddiscard);
+    diff = clear_start_32(diff,startdiscard);
 
     if (diff /* != 0U */) {
 #ifdef HAVE_BUILTIN_CTZ
@@ -19652,548 +22376,189 @@ Genome_consecutive_matches_pair (UINT4 lefta, UINT4 leftb, UINT4 genomelength) {
 #else
       mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37];
 #endif
-      debug2(printf("case 12: returning %d matches\n",mismatch_position));
+      debug2(printf("case 8: returning %d matches\n",mismatch_position));
       return mismatch_position;
     } else {
-      debug2(printf("case 13: returning offset %d + enddiscard %d matches\n",offset,enddiscard));
-      return offset + enddiscard;
-    }
-  }
-}
-
-
-
-
-static int
-count_mismatches_limit (Compress_T query_compress, Univcoord_T left, 
-			int pos5, int pos3, int max_mismatches, bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int nmismatches;
-  int startdiscard, enddiscard;
-  Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *endblock, *ptr;
-  Genomecomp_T *query_shifted, *query_shifted_save_start;
-  Genomediff_T diff;
-  UINT4 diff_32;
-  int nshift;
-  int startcolumni, endcolumni;
-
-
-  debug(
-	printf("\n\n");
-	printf("Genome (in count_mismatches_limit) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
-	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
-	printf("\n");
-	);
-
-
-  startblocki = (left+pos5)/128U*12;
-  startcolumni = ((left+pos5) % 128) / 32;
-  startblocki_32 = startblocki + startcolumni;
-
-  endblocki = (left+pos3)/128U*12;
-  endcolumni = ((left+pos3) % 128) / 32;
-  endblocki_32 = endblocki + endcolumni;
-
-  debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
-	       left,pos5,pos3,startblocki,endblocki));
-
-  nshift = left % STEP_SIZE;
-  query_shifted = Compress_shift(query_compress,nshift);
-  debug(printf("Query shifted %d:\n",nshift));
-  debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (endblocki_32 == startblocki_32) {
-    debug(printf("** Single block **\n"));
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
-#else
-    diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
-#endif
-    diff_32 = clear_start_32(diff_32,startdiscard);
-    diff_32 = clear_end_32(diff_32,enddiscard);
-
-#ifdef DEBUG14
-    answer = popcount_ones_32(diff_32);
-#else
-    return popcount_ones_32(diff_32);
-#endif
-
-  }
-#ifndef DEBUG14
-  else {
-#endif
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
-
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
-#ifndef DEBUG14
-  }
-#endif
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    debug(printf("** Single block **\n"));
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    debug(print_diff_popcount(diff));
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == popcount_ones(diff)));
-    return popcount_ones(diff);
-
-  } else if (endblocki == startblocki + 12) {
-    /* Only two blocks to check */
-
-    if (STEP_SIZE - startdiscard >= enddiscard) {
-      /* Two blocks to check and more bits counted in startblock */
-      debug(printf("* Two blocks, start block first **\n"));
-
-      /* 1/2: Startblock */
-      diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			  plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_start(diff,startdiscard);
-      
-      debug(print_diff_popcount(diff));
-      if ((nmismatches = popcount_ones(diff)) > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
-      
-      /* 2/2: Endblock */
-      diff = (block_diff)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE,
-			  &(ref_blocks[endblocki]),
-			  plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_end(diff,enddiscard);
-
-      debug(print_diff_popcount(diff));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-      return nmismatches + popcount_ones(diff);
-
-    } else {
-      /* Two blocks to check and more bits counted in endblock */
-      debug(printf("** Two blocks, end block first **\n"));
-
-      /* 1/2: Endblock */
-      diff = (block_diff)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE,
-			  &(ref_blocks[endblocki]),
-			  plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_end(diff,enddiscard);
-
-      debug(print_diff_popcount(diff));
-      if ((nmismatches = popcount_ones(diff)) > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
-
-      /* 2/2: Startblock */
-      diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			  plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_start(diff,startdiscard);
-
-      debug(print_diff_popcount(diff));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-      return nmismatches + popcount_ones(diff);
-    }
-
-  } else {
-#endif
-
-    /* More than 2 blocks to check */
-    debug(printf("** More than two blocks **\n"));
-
-    query_shifted_save_start = query_shifted;
-
-    /* 2..(n-1) / n: Check all middle blocks first */
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[startblocki]);
-    ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
-#else
-    ptr = &(ref_blocks[startblocki+12]);
-#endif
-    endblock = &(ref_blocks[endblocki]);
-    nmismatches = 0;
-
-    while (ptr < endblock) {
-      diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-      
-      debug(print_diff_popcount(diff));
-      if ((nmismatches += popcount_ones(diff)) > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
-
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
-#else
-      ptr += 12;
-#endif
-    }
-
-    if (enddiscard >= STEP_SIZE - startdiscard) {
-      /* More bits in end block */
-      debug(printf("** Final block, end block first **\n"));
-
-      /* n/n: Go first to end block */
-      diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_end(diff,enddiscard);
-
-      debug(print_diff_popcount(diff));
-      if ((nmismatches += popcount_ones(diff)) > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
-
-      /* 1/n: Go second to start block */
-      diff = (block_diff)(query_shifted_save_start,&(ref_blocks[startblocki]),
-			  plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_start(diff,startdiscard);
-      
-      debug(print_diff_popcount(diff));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-      return nmismatches + popcount_ones(diff);
-
-    } else {
-      debug(printf("** Final block, start block first **\n"));
-
-      /* 1/n: Go first to start block */
-      diff = (block_diff)(query_shifted_save_start,&(ref_blocks[startblocki]),
-			  plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_start(diff,startdiscard);
-      
-      debug(print_diff_popcount(diff));
-      if ((nmismatches += popcount_ones(diff)) > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
-
-      /* n/n: Go second to end block */
-      diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
-			  plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_end(diff,enddiscard);
-
-      debug(print_diff_popcount(diff));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-      return nmismatches + popcount_ones(diff);
+      ptr1_prev = ptr1;
+      ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;}
+      ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;}
+      offset += 32;		/* Not STEP_SIZE */
     }
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  }
-#endif
-}
-
-
-static int
-count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, int max_mismatches,
-			     bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int nmismatches;
-  int startdiscard, enddiscard;
-  Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *endblock;
-  Genomecomp_T *query_shifted, *query_shifted_save_start;
-  UINT4 diff_32;
-  Genomediff_T diff;
-  int nshift;
-  Genomecomp_T *ref_ptr, *alt_ptr;
-  int startcolumni, endcolumni;
-
-
-  debug(
-	printf("\n\n");
-	printf("Genome (in count_mismatches_limit_snps) from %u+%d to %u+%d\n",left,pos5,left,pos3);
-	Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3);
-	printf("\n");
-	);
-
-
-  startblocki = (left+pos5)/128U*12;
-  startcolumni = ((left+pos5) % 128) / 32;
-  startblocki_32 = startblocki + startcolumni;
-
-  endblocki = (left+pos3)/128U*12;
-  endcolumni = ((left+pos3) % 128) / 32;
-  endblocki_32 = endblocki + endcolumni;
-
-  debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
-	       left,pos5,pos3,startblocki,endblocki));
-
-  nshift = left % STEP_SIZE;
-  query_shifted = Compress_shift(query_compress,nshift);
-  debug(printf("Query shifted %d:\n",nshift));
-  debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (endblocki_32 == startblocki_32) {
-    debug(printf("** Single block **\n"));
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
-#else
-    diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
-#endif
-    diff_32 = clear_start_32(diff_32,startdiscard);
-    diff_32 = clear_end_32(diff_32,enddiscard);
-
-#ifdef DEBUG14
-    answer = popcount_ones_32(diff_32);
+    while (ptr1 < end && ptr2 < end) {
+      if (nshift == 0) {
+	/* rightshift of 32 is a no-op */
+#ifdef WORDS_BIGENDIAN
+	shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]);
 #else
-    return popcount_ones_32(diff_32);
-#endif
-
-  }
-#ifndef DEBUG14
-  else {
+	shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8];
 #endif
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
+      } else {
+#ifdef WORDS_BIGENDIAN
+	shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
+	shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
+	shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
+#else
+	shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift);
+	shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift);
+	shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift);
 #endif
+      }
+      debug2(Compress_print_one_block(ptr1));
+      debug2(Compress_print_one_block(ptr2));
+      debug2(Compress_print_one_block(shifted1));
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
-#ifndef DEBUG14
-  }
+#ifdef WORDS_BIGENDIAN
+      diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
+      diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
 #endif
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+      if (diff /* != 0U */) {
+#ifdef HAVE_BUILTIN_CTZ
+	mismatch_position = offset + (relpos = __builtin_ctz(diff));
 #else
-  if (endblocki == startblocki) {
-    debug(printf("** Single block **\n"));
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    debug(print_diff_popcount(diff));
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == popcount_ones(diff)));
-    return popcount_ones(diff);
-
-  } else if (endblocki == startblocki + 12) {
-    /* Only two blocks to check */
-
-    if (STEP_SIZE - startdiscard >= enddiscard) {
-      /* Two blocks to check and more bits counted in startblock */
-      debug(printf("* Two blocks, start block first **\n"));
-
-      /* 1/2: Startblock */
-      diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			      plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_start(diff,startdiscard);
-
-      debug(print_diff_popcount(diff));
-      nmismatches /* init */ = popcount_ones(diff);
-      if (nmismatches > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
+	mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37];
+#endif
+	debug2(printf("case 9: returning %d matches\n",mismatch_position));
+	return mismatch_position;
+      } else {
+	ptr1_prev = ptr1;
+	ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;}
+	ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;}
+	offset += 32;		/* Not STEP_SIZE */
       }
+    }
 
-      /* 2/2: Endblock */
-      diff = (block_diff_snp)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE,
-			      &(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
-			      plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_end(diff,enddiscard);
-
-      debug(print_diff_popcount(diff));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-      return nmismatches + popcount_ones(diff);
-
+    /* Last block of entire genome */
+    enddiscard = genomelength % 32; /* Not STEP_SIZE */
+    if (ptr2 == end) {
+      debug2(printf("ptr2 == end\n"));
+      /* Keep enddiscard */
+      nblocks = 1;
+    } else if (nshift + enddiscard < 32) {
+      debug2(printf("ptr1 == end and nshift %d + enddiscard %d < 32\n",nshift,enddiscard));
+      enddiscard = nshift + enddiscard;
+      nblocks = 1;
+    } else if (nshift > 0) {
+      debug2(printf("ptr1 == end and nshift %d + enddiscard %d >= 32\n",nshift,enddiscard));
+      enddiscard -= (32 - nshift);
+      nblocks = 2;
     } else {
-      /* Two blocks to check and more bits counted in endblock */
-      debug(printf("** Two blocks, end block first **\n"));
-
-      /* 1/2: Endblock */
-      diff = (block_diff_snp)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE,
-			      &(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
-			      plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_end(diff,enddiscard);
-
-      debug(print_diff_popcount(diff));
-      nmismatches /* init */ = popcount_ones(diff);
-      if (nmismatches > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
-
-      /* 2/2: Startblock */
-      diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			      plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_start(diff,startdiscard);
-
-      debug(print_diff_popcount(diff));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-      return nmismatches + popcount_ones(diff);
+      debug2(printf("ptr1 == end and nshift %d + enddiscard %d >= 32\n",nshift,enddiscard));
+      /* Keep enddiscard */
+      nblocks = 2;
     }
 
-  } else {
+    /* Block 1 */
+    if (nshift == 0) {
+      /* rightshift of 32 is a no-op */
+#ifdef WORDS_BIGENDIAN
+      shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]);
+#else
+      shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8];
 #endif
-
-    /* More than 2 blocks to check */
-    debug(printf("** More than two blocks **\n"));
-
-    query_shifted_save_start = query_shifted;
-
-    /* 2..(n-1) / n: Check all middle blocks first */
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ref_ptr = &(ref_blocks[startblocki]);
-    alt_ptr = &(snp_blocks[startblocki]);
-    ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+    } else {
+#ifdef WORDS_BIGENDIAN
+      shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
+      shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
+      shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
 #else
-    ref_ptr = &(ref_blocks[startblocki+12]);
-    alt_ptr = &(snp_blocks[startblocki+12]);
+      shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift);
+      shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift);
+      shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift);
 #endif
-    endblock = &(ref_blocks[endblocki]);
-    nmismatches = 0;
-
-    while (ref_ptr < endblock) {
-      diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-
-      debug(print_diff_popcount(diff));
-      if ((nmismatches += popcount_ones(diff)) > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
+    }
+    debug2(Compress_print_one_block(ptr1));
+    debug2(Compress_print_one_block(ptr2));
+    debug2(Compress_print_one_block(shifted1));
 
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#ifdef WORDS_BIGENDIAN
+    diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
 #else
-      ref_ptr += 12; alt_ptr += 12;
+    diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
 #endif
+    if (nblocks == 1) {
+      diff = clear_end_32(diff,enddiscard);
     }
 
-    if (enddiscard >= STEP_SIZE - startdiscard) {
-      /* More bits in end block */
-      debug(printf("** Final block, end block first **\n"));
-
-      /* n/n: Go first to end block */
-      diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_end(diff,enddiscard);
-
-      debug(print_diff_popcount(diff));
-      if ((nmismatches += popcount_ones(diff)) > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
-
-      /* 1/n: Go second to start block */
-      diff = (block_diff_snp)(query_shifted_save_start,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			      plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_start(diff,startdiscard);
-      
-      debug(print_diff_popcount(diff));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-      return nmismatches + popcount_ones(diff);
-
+    if (diff /* != 0U */) {
+#ifdef HAVE_BUILTIN_CTZ
+      mismatch_position = offset + (relpos = __builtin_ctz(diff));
+#else
+      mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37];
+#endif
+      debug2(printf("case 10: returning %d matches\n",mismatch_position));
+      return mismatch_position;
+    } else if (nblocks == 1) {
+      debug2(printf("case 11: returning offset %d + enddiscard %d matches\n",offset,enddiscard));
+      return offset + enddiscard;
     } else {
-      debug(printf("** Final block, start block first **\n"));
-
-      /* 1/n: Go first to start block */
-      diff = (block_diff_snp)(query_shifted_save_start,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			      plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_start(diff,startdiscard);
-      
-      debug(print_diff_popcount(diff));
-      if ((nmismatches += popcount_ones(diff)) > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
-      
-      /* n/n: Go second to end block */
-      diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
-			      plusp,genestrand,query_unk_mismatch_p);
-      diff = clear_end(diff,enddiscard);
-
-      debug(print_diff_popcount(diff));
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-      return nmismatches + popcount_ones(diff);
+      ptr1_prev = ptr1;
+      ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;}
+      ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;}
+      offset += 32;		/* Not STEP_SIZE */
     }
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+    /* Block 2 */
+#ifdef WORDS_BIGENDIAN
+    shifted1[0] = (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift);
+    shifted1[1] = (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift);
+    shifted1[2] = (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift);
 #else
-  }
+    shifted1[0] = (ptr1_prev[0] >> rightshift);
+    shifted1[1] = (ptr1_prev[4] >> rightshift);
+    shifted1[2] = (ptr1_prev[8] >> rightshift);
 #endif
-}
-
-
-int
-Genome_count_mismatches_limit (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-			       int max_mismatches, bool plusp, int genestrand) {
-
-#if 0
-  if (dibasep) {
-    debug(printf("Dibase_count_mismatches_limit from %u+%d to %u+%d with max_mismatches %d:\n",
-		 left,pos5,left,pos3,max_mismatches));
+    debug2(Compress_print_one_block(ptr1));
+    debug2(Compress_print_one_block(ptr2));
+    debug2(Compress_print_one_block(shifted1));
 
-    return Dibase_count_mismatches_limit(&(*ncolordiffs),query,pos5,pos3,
-					 /*startpos*/left+pos5,/*endpos*/left+pos3,max_mismatches);
-  }
+#ifdef WORDS_BIGENDIAN
+    diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8]));
+#else
+    diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]);
 #endif
+    diff = clear_end_32(diff,enddiscard);
 
-  if (snp_blocks == NULL) {
-    return count_mismatches_limit(query_compress,left,pos5,pos3,max_mismatches,plusp,genestrand);
-  } else {
-    return count_mismatches_limit_snps(query_compress,left,pos5,pos3,max_mismatches,plusp,genestrand);
+    if (diff /* != 0U */) {
+#ifdef HAVE_BUILTIN_CTZ
+      mismatch_position = offset + (relpos = __builtin_ctz(diff));
+#else
+      mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37];
+#endif
+      debug2(printf("case 12: returning %d matches\n",mismatch_position));
+      return mismatch_position;
+    } else {
+      debug2(printf("case 13: returning offset %d + enddiscard %d matches\n",offset,enddiscard));
+      return offset + enddiscard;
+    }
   }
 }
 
 
 
-int
-Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-				       bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int nmismatches;
+static int
+count_mismatches_limit (Compress_T query_compress, Univcoord_T left, 
+			int pos5, int pos3, int max_mismatches, bool plusp, int genestrand) {
+  int nmismatches = 0;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *ptr, *end;
+  Genomecomp_T *ptr, *endptr;
   Genomecomp_T *query_shifted;
-  UINT4 diff_32;
-  Genomediff_T diff;
   int nshift;
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
 
   debug(
 	printf("\n\n");
-	printf("Genome (in count_mismatches_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
+	printf("Genome (in count_mismatches_limit) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
 	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
@@ -20215,128 +22580,189 @@ Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T le
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
   query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
+  query_shifted += startcolumni;
+#endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  ptr = &(ref_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
 
   if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
+    /* Single block */
+    debug(printf("** Single block **\n"));
     debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
-#else
-    diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
-#endif
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
-#ifdef DEBUG14
-    answer = popcount_ones_32(diff_32);
-#else
     return popcount_ones_32(diff_32);
-#endif
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_POPCOUNT) && defined(HAVE_SSE2)
+    /* Shift */
+#ifdef USE_SHIFT_HILO
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					 startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    return popcount_ones_128(diff_128);
+#else
+    /* Faster */
+    startdiscard += startcolumni*32;
+    enddiscard += endcolumni*32;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
+    diff_128 = (block_diff_128)(query_shifted - startcolumni,ptr - startcolumni,plusp,genestrand,query_unk_mismatch_p);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    return popcount_ones_128(diff_128);
 #endif
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
+#else
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    nmismatches = popcount_ones_32(diff_32);
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
 
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    }
 
-#ifndef DEBUG14
-  }
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    return (nmismatches + popcount_ones_32(diff_32));
 #endif
 
+#if defined(USE_WRAP_POPCOUNT) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    debug(print_diff_popcount(diff));
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == popcount_ones(diff)));
-    return popcount_ones(diff);
+    return popcount_ones_128(diff_128);
+#endif
 
   } else {
-#endif
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    nmismatches = popcount_ones_32(diff_32);
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
 
-    /* Startblock */
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
+#endif
+    ptr += GENOME_NEXTROW;
 
-    debug(print_diff_popcount(diff));
-    nmismatches = popcount_ones(diff);
 
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[startblocki]);
-    ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
-#else
-    ptr = &(ref_blocks[startblocki+12]);
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr + 24 <= endptr) {
+      diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_256(diff_256);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += 24; ptr += 24;
+    }
 #endif
-    end = &(ref_blocks[endblocki]);
-    while (ptr < end) {
-      diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-
-      debug(print_diff_popcount(diff));
-      nmismatches += popcount_ones(diff);
 
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#ifdef HAVE_SSE2
+    while (ptr + 12 <= endptr) {
+      diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_128(diff_128);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += 12; ptr += 12;
+    }
 #else
-      ptr += 12;
+    while (ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+	nmismatches += popcount_ones_32(diff_32);
+	if (nmismatches > max_mismatches) {
+	  return nmismatches;
+	}
+	query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW;
+    }
 #endif
+
+    /* End row */
+    while (ptr < endptr) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
     }
 
-    /* Endblock */
-    diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_end(diff,enddiscard);
-
-    debug(print_diff_popcount(diff));
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-    return nmismatches + popcount_ones(diff);
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    return (nmismatches + popcount_ones_32(diff_32));
   }
-#endif
 }
 
+
 static int
-count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-				 bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int nmismatches;
+count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, 
+			     int pos5, int pos3, int max_mismatches, bool plusp, int genestrand) {
+  int nmismatches = 0;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *ref_ptr, *alt_ptr, *end;
+  Genomecomp_T *ref_ptr, *alt_ptr, *endptr;
   Genomecomp_T *query_shifted;
-  UINT4 diff_32;
-  Genomediff_T diff;
   int nshift;
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
 
   debug(
 	printf("\n\n");
-	printf("Genome (in count_mismatches_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
-	Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3);
+	printf("Genome (in count_mismatches_limit) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
+	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
 
@@ -20357,281 +22783,214 @@ count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, in
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
   query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
+  query_shifted += startcolumni;
+#endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  ref_ptr = &(ref_blocks[startblocki_32]);
+  alt_ptr = &(snp_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
 
   if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
+    /* Single block */
+    debug(printf("** Single block **\n"));
     debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
-#else
-    diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
-#endif
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
-#ifdef DEBUG14
-    answer = popcount_ones_32(diff_32);
-#else
     return popcount_ones_32(diff_32);
-#endif
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_POPCOUNT) && defined(HAVE_SSE2)
+    /* Shift */
+#ifdef USE_SHIFT_HILO
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					     startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    return popcount_ones_128(diff_128);
+#else
+    /* Faster */
+    startdiscard += startcolumni*32;
+    enddiscard += endcolumni*32;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
+    diff_128 = (block_diff_snp_128)(query_shifted - startcolumni,alt_ptr - startcolumni,ref_ptr - startcolumni,
+				    plusp,genestrand,query_unk_mismatch_p);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    return popcount_ones_128(diff_128);
 #endif
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
+#else
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    nmismatches = popcount_ones_32(diff_32);
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
 
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    }
 
-#ifndef DEBUG14
-  }
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    return (nmismatches + popcount_ones_32(diff_32));
 #endif
 
+#if defined(USE_WRAP_POPCOUNT) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					    startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    debug(print_diff_popcount(diff));
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == popcount_ones(diff)));
-    return popcount_ones(diff);
+    return popcount_ones_128(diff_128);
+#endif
 
   } else {
-#endif
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    nmismatches = popcount_ones_32(diff_32);
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
 
-    /* Startblock */
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
+#endif
+    ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
 
-    debug(print_diff_popcount(diff));
-    nmismatches = popcount_ones(diff);
 
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ref_ptr = &(ref_blocks[startblocki]);
-    alt_ptr = &(snp_blocks[startblocki]);
-    ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
-#else
-    ref_ptr = &(ref_blocks[startblocki+12]);
-    alt_ptr = &(snp_blocks[startblocki+12]);
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ref_ptr + 24 <= endptr) {
+      diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_256(diff_256);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += 24; ref_ptr += 24; alt_ptr += 24;
+    }
 #endif
-    end = &(ref_blocks[endblocki]);
-    while (ref_ptr < end) {
-      diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-
-      debug(print_diff_popcount(diff));
-      nmismatches += popcount_ones(diff);
 
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#ifdef HAVE_SSE2
+    while (ref_ptr + 12 <= endptr) {
+      diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_128(diff_128);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += 12; ref_ptr += 12; alt_ptr += 12;
+    }
 #else
-      ref_ptr += 12; alt_ptr += 12;
-#endif
+    while (ref_ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+	nmismatches += popcount_ones_32(diff_32);
+	if (nmismatches > max_mismatches) {
+	  return nmismatches;
+	}
+	query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
     }
+#endif
 
-    /* Endblock */
-    diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_end(diff,enddiscard);
-
-    debug(print_diff_popcount(diff));
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff)));
-    return nmismatches + popcount_ones(diff);
+    /* End row */
+    while (ref_ptr < endptr) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    }
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    return (nmismatches + popcount_ones_32(diff_32));
   }
-#endif
 }
 
 
-/* left is where the start of the query matches.  pos5 is where we
-   want to start comparing in the query.  pos3 is just after where we
-   want to stop comparing in the query, i.e., stop at (pos3-1)
-   inclusive */
+
 int
-Genome_count_mismatches_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-				   bool plusp, int genestrand) {
+Genome_count_mismatches_limit (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
+			       int max_mismatches, bool plusp, int genestrand) {
 
 #if 0
   if (dibasep) {
-    Dibase_count_mismatches_substring(&ncolordiffs,query,pos5,pos3,
-				      /*startpos*/left+pos5,/*endpos*/left+pos3);
+    debug(printf("Dibase_count_mismatches_limit from %u+%d to %u+%d with max_mismatches %d:\n",
+		 left,pos5,left,pos3,max_mismatches));
+
+    return Dibase_count_mismatches_limit(&(*ncolordiffs),query,pos5,pos3,
+					 /*startpos*/left+pos5,/*endpos*/left+pos3,max_mismatches);
   }
 #endif
 
   if (snp_blocks == NULL) {
-    return Genome_count_mismatches_substring_ref(query_compress,left,pos5,pos3,plusp,genestrand);
+    return count_mismatches_limit(query_compress,left,pos5,pos3,max_mismatches,plusp,genestrand);
   } else {
-    return count_mismatches_substring_snps(query_compress,left,pos5,pos3,plusp,genestrand);
+    return count_mismatches_limit_snps(query_compress,left,pos5,pos3,max_mismatches,plusp,genestrand);
   }
 }
 
 
-/* pos5 is where we want to start comparing in the query.  pos3 is
-   just after where we want to stop comparing in the query, i.e., stop
-   at (pos3-1) inclusive */
-int
-Genome_count_mismatches_fragment_left (Compress_T query_compress, int pos5, int pos3,
-				       Genomecomp_T ref_fragment, Genomecomp_T alt_fragment) {
-  Genomecomp_T diff, alt_diff, mask;
-  int startdiscard;
-  Genomecomp_T query_high, query_low, query_flags;
-  Genomecomp_T ref_high, ref_low, alt_high, alt_low;
-
-  Compress_get_16mer_left(&query_high,&query_low,&query_flags,query_compress,pos3);
-  startdiscard = 16 - (pos3 - pos5);
-
-  mask = clear_start_mask(startdiscard);
-  mask &= 0x0000FFFF;		/* Therefore, result of Compress does not need masking */
-  debug1(printf("Mask for startdiscard %d: %08X\n",startdiscard,mask));
-
-
-  /* Unpack genomic fragments */
-  ref_high = ref_fragment >> 16;
-  ref_low = ref_fragment /* & 0x0000FFFF */;
-
-  alt_high = alt_fragment >> 16;
-  alt_low = alt_fragment /* & 0x0000FFFF */;
-
-
-  debug1(printf("Comparing: query high %08X, low %08X with ref fragment high %08X, %08X\n",query_high & 0xFFFF,query_low & 0xFFFF,ref_high & 0xFFFF,ref_low & 0xFFFF));
-
-  /* Taken from block_diff */
-  diff = (query_high ^ ref_high) | (query_low ^ ref_low);
-  debug1(printf(" => ref_diff %04X",(unsigned short) diff));
-
-  alt_diff = (query_high ^ alt_high) | (query_low ^ alt_low);
-  debug1(printf(" and alt_diff %04X\n",(unsigned short) alt_diff));
-
-  diff &= alt_diff;
-
-  diff |= query_flags;
-
-  diff &= mask;
-
-  assert(diff <= 0x0000FFFF);
-
-#if !defined(HAVE_SSE4_2)
-  debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff]));
-  return count_bits[diff];
-#elif defined(HAVE_POPCNT)
-  debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff)));
-  return _popcnt32(diff);
-#elif defined(HAVE_MM_POPCNT)
-  debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff)));
-  return _mm_popcnt_u32(diff);
-#elif defined(HAVE_BUILTIN_POPCOUNT)
-  debug1(printf("nmismatches %08X => %d\n",diff,__builtin_popcount(diff)));
-  return __builtin_popcount(diff);
-#else
-  debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff]));
-  return count_bits[diff];
-#endif
-}
-
 
-/* pos5 is where we want to start comparing in the query.  pos3 is
-   just after where we want to stop comparing in the query, i.e., stop
-   at (pos3-1) inclusive */
 int
-Genome_count_mismatches_fragment_right (Compress_T query_compress, int pos5, int pos3,
-					Genomecomp_T ref_fragment, Genomecomp_T alt_fragment) {
-  Genomecomp_T diff, alt_diff, mask;
-  int enddiscard;
-  Genomecomp_T query_high, query_low, query_flags;
-  Genomecomp_T ref_high, ref_low, alt_high, alt_low;
-
-  Compress_get_16mer_right(&query_high,&query_low,&query_flags,query_compress,pos5);
-  enddiscard = pos3 - pos5;
-
-  mask = clear_end_mask(enddiscard);
-  mask &= 0x0000FFFF;		/* Therefore, result of Compress does not need masking */
-  debug1(printf("Mask for enddiscard %d: %08X\n",enddiscard,mask));
-
-
-  /* Unpack genomic fragments */
-  ref_high = ref_fragment >> 16;
-  ref_low = ref_fragment /* & 0x0000FFFF */;
-
-  alt_high = alt_fragment >> 16;
-  alt_low = alt_fragment /* & 0x0000FFFF */;
-
-
-  debug1(printf("Comparing: query high %08X, low %08X with ref fragment high %08X, %08X\n",query_high & 0xFFFF,query_low & 0xFFFF,ref_high & 0xFFFF,ref_low & 0xFFFF));
-
-  /* Taken from block_diff */
-  diff = (query_high ^ ref_high) | (query_low ^ ref_low);
-  debug1(printf(" => ref_diff %08X",diff));
-
-  alt_diff = (query_high ^ alt_high) | (query_low ^ alt_low);
-  debug1(printf(" and alt_diff %08X\n",alt_diff));
-
-  diff &= alt_diff;
-
-  diff |= query_flags;
-
-  diff &= mask;
-
-  assert(diff <= 0x0000FFFF);
-
-#if !defined(HAVE_SSE4_2)
-  debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff]));
-  return count_bits[diff];
-#elif defined(HAVE_POPCNT)
-  debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff)));
-  return _popcnt32(diff);
-#elif defined(HAVE_MM_POPCNT)
-  debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff)));
-  return _mm_popcnt_u32(diff);
-#elif defined(HAVE_BUILTIN_POPCOUNT)
-  debug1(printf("nmismatches %08X => %d\n",diff,__builtin_popcount(diff)));
-  return __builtin_popcount(diff);
-#else
-  debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff]));
-  return count_bits[diff];
-#endif
-}
-
-
-
-static int
-mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
-		 Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
-		 bool query_unk_mismatch_local_p) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int nmismatches = 0, offset, nshift;
+Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
+				       bool plusp, int genestrand) {
+  int nmismatches = 0;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *query_shifted, *ptr, *end;
-  UINT4 diff_32;
-  Genomediff_T diff;
-  int relpos;
+  Genomecomp_T *ptr, *endptr;
+  Genomecomp_T *query_shifted;
+  int nshift;
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
 
   debug(
 	printf("\n\n");
-	printf("Entered mismatches_left with %d max_mismatches\n",max_mismatches);
-	printf("Genome (in mismatches_left):\n");
+	printf("Genome (in count_mismatches_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
 	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
@@ -20653,161 +23012,165 @@ mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_c
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
   query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
+  query_shifted += startcolumni;
+#endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  ptr = &(ref_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
 
   if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = -startdiscard + pos5;
+    /* Single block */
     debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_local_p);
-#else
-    diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_local_p);
-#endif
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
-    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
-      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
-      diff_32 = clear_lowbit_32(diff_32,relpos);
-    }
-#ifdef DEBUG14
-    debug(printf("Would return nmismatches %d\n",nmismatches));
-    answer = nmismatches;
-    nmismatches = 0;
+    return popcount_ones_32(diff_32);
+
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_POPCOUNT) && defined(HAVE_SSE2)
+    /* Shift */
+#ifdef USE_SHIFT_HILO
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					 startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    return popcount_ones_128(diff_128);
 #else
-    return nmismatches;
-#endif
+    /* Faster */
+    startdiscard += startcolumni*32;
+    enddiscard += endcolumni*32;
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+    diff_128 = (block_diff_128)(query_shifted - startcolumni,ptr - startcolumni,plusp,genestrand,query_unk_mismatch_p);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
+    return popcount_ones_128(diff_128);
 #endif
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = -startdiscard + pos5;
-
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+#else
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    nmismatches = popcount_ones_32(diff_32);
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    }
 
-#ifndef DEBUG14
-  }
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    return (nmismatches + popcount_ones_32(diff_32));
 #endif
 
+#if defined(USE_WRAP_POPCOUNT) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    while (nonzero_p(diff) && nmismatches <= max_mismatches) {
-      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff));
-      debug(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-    }
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-    return nmismatches;
+    return popcount_ones_128(diff_128);
+#endif
 
   } else {
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    nmismatches = popcount_ones_32(diff_32);
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
 #endif
+    ptr += GENOME_NEXTROW;
 
-    /* Startblock */
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_start(diff,startdiscard);
 
-    while (nonzero_p(diff) && nmismatches <= max_mismatches) {
-      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff));
-      debug(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-    }
-    if (nmismatches > max_mismatches) {
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-      return nmismatches;
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr + 24 <= endptr) {
+      diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_256(diff_256);
+      query_shifted += 24; ptr += 24;
     }
-
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[startblocki]);
-    ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
-#else
-    ptr = &(ref_blocks[startblocki+12]);
 #endif
-    end = &(ref_blocks[endblocki]);
-    offset += STEP_SIZE; /* 128 or 32 */
-    while (ptr < end) {
-      diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
-
-      while (nonzero_p(diff) && nmismatches <= max_mismatches) {
-	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff));
-	debug(print_diff_trailing_zeroes(diff,offset));
-	diff = clear_lowbit(diff,relpos);
-      }
-      if (nmismatches > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-	return nmismatches;
-      }
 
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
+#ifdef HAVE_SSE2
+    while (ptr + 12 <= endptr) {
+      diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_128(diff_128);
+      query_shifted += 12; ptr += 12;
+    }
 #else
-      ptr += 12;
-#endif
-      offset += STEP_SIZE; /* 128 or 32 */
+    while (ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+	nmismatches += popcount_ones_32(diff_32);
+	query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW;
     }
+#endif
 
-    /* Endblock */
-    diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_end(diff,enddiscard);
-
-    while (nonzero_p(diff) && nmismatches <= max_mismatches) {
-      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff));
-      debug(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
+    /* End row */
+    while (ptr < endptr) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
     }
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches));
-    return nmismatches;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    return (nmismatches + popcount_ones_32(diff_32));
   }
-#endif
-
 }
 
-/* Returns mismatch_positions[0..max_mismatches] */
 static int
-mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
-		      Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
-		      bool query_unk_mismatch_local_p) {
+count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
+				 bool plusp, int genestrand) {
 #ifdef DEBUG14
   int answer;
 #endif
-  int nmismatches_both = 0, offset, nshift;
+  int nmismatches;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *end;
-  UINT4 diff_32;
-  Genomediff_T diff;
-  int relpos;
+  Genomecomp_T *ref_ptr, *alt_ptr, *endptr;
+  Genomecomp_T *query_shifted;
+  int nshift;
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
 
   debug(
 	printf("\n\n");
-	printf("Genome (in mismatches_left_snps):\n");
+	printf("Genome (in count_mismatches_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
 	Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
@@ -20829,254 +23192,322 @@ mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T qu
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
   query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
+  query_shifted += startcolumni;
+#endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  ref_ptr = &(ref_blocks[startblocki_32]);
+  alt_ptr = &(snp_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
 
   if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = -startdiscard + pos5;
+    /* Single block */
+    debug(printf("** Single block **\n"));
     debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_local_p);
-#else
-    diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_local_p);
-#endif
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
-    while (nonzero_p_32(diff_32) && nmismatches_both <= max_mismatches) {
-      mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
-      diff_32 = clear_lowbit_32(diff_32,relpos);
-    }
-#ifdef DEBUG14
-    answer = nmismatches_both;
-    nmismatches_both = 0;
+    return popcount_ones_32(diff_32);
+
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_POPCOUNT) && defined(HAVE_SSE2)
+    /* Shift */
+#ifdef USE_SHIFT_HILO
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					     startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    return popcount_ones_128(diff_128);
 #else
-    return nmismatches_both;
-#endif
+    /* Faster */
+    startdiscard += startcolumni*32;
+    enddiscard += endcolumni*32;
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+    diff_128 = (block_diff_snp_128)(query_shifted - startcolumni,alt_ptr - startcolumni,ref_ptr - startcolumni,
+                                    plusp,genestrand,query_unk_mismatch_p);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
+    return popcount_ones_128(diff_128);
 #endif
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = -startdiscard + pos5;
-
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+#else
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    nmismatches = popcount_ones_32(diff_32);
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    }
 
-#ifndef DEBUG14
-  }
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    return (nmismatches + popcount_ones_32(diff_32));
 #endif
 
+#if defined(USE_WRAP_POPCOUNT) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					    startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
-      mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff));
-      debug(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-    }
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
-    return nmismatches_both;
+    return popcount_ones_128(diff_128);
+#endif
 
   } else {
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    nmismatches = popcount_ones_32(diff_32);
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
 #endif
+    ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
 
-    /* Startblock */
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_start(diff,startdiscard);
 
-    while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
-      mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff));
-      debug(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-    }
-    if (nmismatches_both > max_mismatches) {
-      debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
-      return nmismatches_both;
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ref_ptr + 24 <= endptr) {
+      diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_256(diff_256);
+      query_shifted += 24; ref_ptr += 24; alt_ptr += 24;
     }
+#endif
 
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ref_ptr = &(ref_blocks[startblocki]);
-    alt_ptr = &(snp_blocks[startblocki]);
-    ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#ifdef HAVE_SSE2
+    while (ref_ptr + 12 <= endptr) {
+      diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_128(diff_128);
+      query_shifted += 12; ref_ptr += 12; alt_ptr += 12;
+    }
 #else
-    ref_ptr = &(ref_blocks[startblocki+12]);
-    alt_ptr = &(snp_blocks[startblocki+12]);
-#endif
-    end = &(ref_blocks[endblocki]);
-    offset += STEP_SIZE; /* 128 or 32 */
-    while (ref_ptr < end) {
-      diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
-
-      while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
-	mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff));
-	debug(print_diff_trailing_zeroes(diff,offset));
-	diff = clear_lowbit(diff,relpos);
-      }
-      if (nmismatches_both > max_mismatches) {
-	debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
-	return nmismatches_both;
+    while (ref_ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+	nmismatches += popcount_ones_32(diff_32);
+	query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
       }
-
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
-#else
-      ref_ptr += 12; alt_ptr += 12;
-#endif
-      offset += STEP_SIZE; /* 128 or 32 */
+      /* query_shifted += QUERY_NEXTROW; */  ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
     }
+#endif
 
-    /* Endblock */
-    diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_end(diff,enddiscard);
-
-    while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
-      mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff));
-      debug(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
+    /* End row */
+    while (ref_ptr < endptr) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      nmismatches += popcount_ones_32(diff_32);
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
     }
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
-    return nmismatches_both;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    return (nmismatches + popcount_ones_32(diff_32));
   }
-#endif
 }
 
 
-
-/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches + 1 */
-/* If request max_mismatches 3, could return m0, m1, m2, m3, m4 */
+/* left is where the start of the query matches.  pos5 is where we
+   want to start comparing in the query.  pos3 is just after where we
+   want to stop comparing in the query, i.e., stop at (pos3-1)
+   inclusive */
 int
-Genome_mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
-			Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
-  int nmismatches;
-#ifdef DEBUG
-  int i;
-#endif
+Genome_count_mismatches_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
+				   bool plusp, int genestrand) {
 
 #if 0
   if (dibasep) {
-    debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3));
-
-    nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
-					 pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
-    mismatch_positions[nmismatches] = pos3 + 1;	/* Need +1 because of starting assumed nt */
-
+    Dibase_count_mismatches_substring(&ncolordiffs,query,pos5,pos3,
+				      /*startpos*/left+pos5,/*endpos*/left+pos3);
   }
 #endif
 
   if (snp_blocks == NULL) {
-    nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress,
-				  left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
-    mismatch_positions[nmismatches] = pos3;
+    return Genome_count_mismatches_substring_ref(query_compress,left,pos5,pos3,plusp,genestrand);
   } else {
-    nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress,
-				       left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
-    mismatch_positions[nmismatches] = pos3;
+    return count_mismatches_substring_snps(query_compress,left,pos5,pos3,plusp,genestrand);
   }
-  debug(
-	printf("%d mismatches on left: ",nmismatches);
-	for (i = 0; i <= nmismatches; i++) {
-	  printf("%d ",mismatch_positions[i]);
-	}
-	printf("\n");
-	);
-  
-  return nmismatches;
 }
 
 
-/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches + 1 */
-/* If request max_mismatches 3, could return m0, m1, m2, m3, m4 */
-/* See note above about why we set query_unk_mismatch_p to false */
-int
-Genome_mismatches_left_trim (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
-			     Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
-  int nmismatches;
-#ifdef DEBUG
-  int i;
-#endif
+/* pos5 is where we want to start comparing in the query.  pos3 is
+   just after where we want to stop comparing in the query, i.e., stop
+   at (pos3-1) inclusive */
+int
+Genome_count_mismatches_fragment_left (Compress_T query_compress, int pos5, int pos3,
+				       Genomecomp_T ref_fragment, Genomecomp_T alt_fragment) {
+  Genomecomp_T diff, alt_diff, mask;
+  int startdiscard;
+  Genomecomp_T query_high, query_low, query_flags;
+  Genomecomp_T ref_high, ref_low, alt_high, alt_low;
+
+  Compress_get_16mer_left(&query_high,&query_low,&query_flags,query_compress,pos3);
+  startdiscard = 16 - (pos3 - pos5);
+
+  mask = clear_start_mask(startdiscard);
+  mask &= 0x0000FFFF;		/* Therefore, result of Compress does not need masking */
+  debug1(printf("Mask for startdiscard %d: %08X\n",startdiscard,mask));
+
+
+  /* Unpack genomic fragments */
+  ref_high = ref_fragment >> 16;
+  ref_low = ref_fragment /* & 0x0000FFFF */;
+
+  alt_high = alt_fragment >> 16;
+  alt_low = alt_fragment /* & 0x0000FFFF */;
+
+
+  debug1(printf("Comparing: query high %08X, low %08X with ref fragment high %08X, %08X\n",query_high & 0xFFFF,query_low & 0xFFFF,ref_high & 0xFFFF,ref_low & 0xFFFF));
+
+  /* Taken from block_diff */
+  diff = (query_high ^ ref_high) | (query_low ^ ref_low);
+  debug1(printf(" => ref_diff %04X",(unsigned short) diff));
+
+  alt_diff = (query_high ^ alt_high) | (query_low ^ alt_low);
+  debug1(printf(" and alt_diff %04X\n",(unsigned short) alt_diff));
+
+  diff &= alt_diff;
+
+  diff |= query_flags;
+
+  diff &= mask;
+
+  assert(diff <= 0x0000FFFF);
+
+#if !defined(HAVE_SSE4_2)
+  debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff]));
+  return count_bits[diff];
+#elif defined(HAVE_POPCNT)
+  debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff)));
+  return _popcnt32(diff);
+#elif defined(HAVE_MM_POPCNT)
+  debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff)));
+  return _mm_popcnt_u32(diff);
+#elif defined(HAVE_BUILTIN_POPCOUNT)
+  debug1(printf("nmismatches %08X => %d\n",diff,__builtin_popcount(diff)));
+  return __builtin_popcount(diff);
+#else
+  debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff]));
+  return count_bits[diff];
+#endif
+}
+
+
+/* pos5 is where we want to start comparing in the query.  pos3 is
+   just after where we want to stop comparing in the query, i.e., stop
+   at (pos3-1) inclusive */
+int
+Genome_count_mismatches_fragment_right (Compress_T query_compress, int pos5, int pos3,
+					Genomecomp_T ref_fragment, Genomecomp_T alt_fragment) {
+  Genomecomp_T diff, alt_diff, mask;
+  int enddiscard;
+  Genomecomp_T query_high, query_low, query_flags;
+  Genomecomp_T ref_high, ref_low, alt_high, alt_low;
+
+  Compress_get_16mer_right(&query_high,&query_low,&query_flags,query_compress,pos5);
+  enddiscard = pos3 - pos5;
+
+  mask = clear_end_mask(enddiscard);
+  mask &= 0x0000FFFF;		/* Therefore, result of Compress does not need masking */
+  debug1(printf("Mask for enddiscard %d: %08X\n",enddiscard,mask));
+
+
+  /* Unpack genomic fragments */
+  ref_high = ref_fragment >> 16;
+  ref_low = ref_fragment /* & 0x0000FFFF */;
+
+  alt_high = alt_fragment >> 16;
+  alt_low = alt_fragment /* & 0x0000FFFF */;
+
+
+  debug1(printf("Comparing: query high %08X, low %08X with ref fragment high %08X, %08X\n",query_high & 0xFFFF,query_low & 0xFFFF,ref_high & 0xFFFF,ref_low & 0xFFFF));
+
+  /* Taken from block_diff */
+  diff = (query_high ^ ref_high) | (query_low ^ ref_low);
+  debug1(printf(" => ref_diff %08X",diff));
+
+  alt_diff = (query_high ^ alt_high) | (query_low ^ alt_low);
+  debug1(printf(" and alt_diff %08X\n",alt_diff));
+
+  diff &= alt_diff;
+
+  diff |= query_flags;
 
-#if 0
-  if (dibasep) {
-    debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+  diff &= mask;
 
-    nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
-					 pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
-    mismatch_positions[nmismatches] = pos3 + 1;	/* Need +1 because of starting assumed nt */
+  assert(diff <= 0x0000FFFF);
 
-  }
+#if !defined(HAVE_SSE4_2)
+  debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff]));
+  return count_bits[diff];
+#elif defined(HAVE_POPCNT)
+  debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff)));
+  return _popcnt32(diff);
+#elif defined(HAVE_MM_POPCNT)
+  debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff)));
+  return _mm_popcnt_u32(diff);
+#elif defined(HAVE_BUILTIN_POPCOUNT)
+  debug1(printf("nmismatches %08X => %d\n",diff,__builtin_popcount(diff)));
+  return __builtin_popcount(diff);
+#else
+  debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff]));
+  return count_bits[diff];
 #endif
-
-  if (snp_blocks == NULL) {
-    nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress,
-				  left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
-    mismatch_positions[nmismatches] = pos3;
-  } else {
-    nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress,
-				       left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
-    mismatch_positions[nmismatches] = pos3;
-  }
-  debug(
-	printf("%d mismatches on left: ",nmismatches);
-	for (i = 0; i <= nmismatches; i++) {
-	  printf("%d ",mismatch_positions[i]);
-	}
-	printf("\n");
-	);
-  
-  return nmismatches;
 }
 
 
+
 static int
-mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
-		  Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
-		  bool query_unk_mismatch_local_p) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int nmismatches = 0, offset, relpos, nshift;
+mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
+		 Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
+		 bool query_unk_mismatch_local_p) {
+  int nmismatches = 0, offset, nshift;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *query_shifted, *start, *ptr;
+  Genomecomp_T *query_shifted, *ptr, *endptr;
+  int relpos;
+  int startcolumni, endcolumni;
   UINT4 diff_32;
-  Genomediff_T diff;
-#ifndef HAVE_BUILTIN_CLZ
-  Genomecomp_T top;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
 #endif
-  int startcolumni, endcolumni;
 
 
   debug(
 	printf("\n\n");
-	printf("Entered mismatches_right with %d max_mismatches\n",max_mismatches);
-	printf("Genome (in mismatches_right):\n");
+	printf("Entered mismatches_left with %d max_mismatches\n",max_mismatches);
+	printf("Genome (in mismatches_left):\n");
 	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
 
+
   startblocki = (left+pos5)/128U*12;
   startcolumni = ((left+pos5) % 128) / 32;
   startblocki_32 = startblocki + startcolumni;
@@ -21092,165 +23523,247 @@ mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_
   query_shifted = Compress_shift(query_compress,nshift);
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (startblocki_32 == endblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = (pos3 - 1) - enddiscard + 32;
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
+  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[endblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_local_p);
 #else
-    diff_32 = (block_diff_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_local_p);
+  query_shifted += startcolumni;
 #endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = -startdiscard + pos5;
+  ptr = &(ref_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
+
+  if (endblocki_32 == startblocki_32) {
+    /* Single block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
     while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
-      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
-      diff_32 = clear_highbit_32(diff_32,relpos);
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
     }
-#ifdef DEBUG14
-    answer = nmismatches;
-    nmismatches = 0;
-#else
     return nmismatches;
-#endif
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT) && defined(HAVE_SSE2)
+    /* Shift */
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p,
+					 startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      diff_128 = clear_lowbit_128(diff_128,relpos);
+    }
+    return nmismatches;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
+#else
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = (pos3 - 1) - enddiscard + STEP_SIZE;
-  
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n",
-		 nshift,startdiscard,enddiscard,offset));
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+    }
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
 
-#ifndef DEBUG14
-  }
-#endif
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    offset += 32;
 
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (startblocki == endblocki) {
-    diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
-			plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    while (nonzero_p(diff) && nmismatches <= max_mismatches) {
-      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff));
-      debug(print_diff_leading_zeroes(diff,offset));
-      diff = clear_highbit(diff,relpos);
-    }
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches));
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
+
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+    }
     return nmismatches;
+#endif
 
-  } else {
+#if defined(USE_WRAP) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p,
+					startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      diff_128 = clear_lowbit_128(diff_128,relpos);
+    }
+    return nmismatches;
 #endif
 
-    /* Endblock */
-    diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
-			plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_end(diff,enddiscard);
+  } else {
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-    while (nonzero_p(diff) && nmismatches <= max_mismatches) {
-      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff));
-      debug(print_diff_leading_zeroes(diff,offset));
-      diff = clear_highbit(diff,relpos);
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
     }
     if (nmismatches > max_mismatches) {
-      debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches));
       return nmismatches;
     }
+    
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    offset += 32;
 
-    query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[endblocki]);
-    ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
-#else
-    ptr = &(ref_blocks[endblocki-12]);
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
 #endif
-    start = &(ref_blocks[startblocki]);
-    offset -= STEP_SIZE; /* 128 or 32 */
-    while (ptr > start) {
-      diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    ptr += GENOME_NEXTROW;
+
+
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr + 24 <= endptr) {
+      diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
 
-      while (nonzero_p(diff) && nmismatches <= max_mismatches) {
-	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff));
-	debug(print_diff_leading_zeroes(diff,offset));
-	diff = clear_highbit(diff,relpos);
+      while (nonzero_p_256(diff_256) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_256(diff_256));
+	diff_256 = clear_lowbit_256(diff_256,relpos);
       }
       if (nmismatches > max_mismatches) {
-	debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches));
 	return nmismatches;
       }
+      
+      query_shifted += 24; ptr += 24;
+      offset += 256;
+    }
+#endif
 
-      query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#ifdef HAVE_SSE2
+    while (ptr + 12 <= endptr) {
+      diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128));
+	diff_128 = clear_lowbit_128(diff_128,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      
+      query_shifted += 12; ptr += 12;
+      offset += 128;
+    }
 #else
-      ptr -= 12;
-#endif
-      offset -= STEP_SIZE; /* 128 or 32 */
+    while (ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+	while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	  mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	  diff_32 = clear_lowbit_32(diff_32,relpos);
+	}
+	if (nmismatches > max_mismatches) {
+	  return nmismatches;
+	}
+      
+	query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+	offset += 32;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW;
     }
+#endif
 
-    /* Startblock */
-    diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_start(diff,startdiscard);
+    /* End row */
+    while (ptr < endptr) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
 
-    while (nonzero_p(diff) && nmismatches <= max_mismatches) {
-      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff));
-      debug(print_diff_leading_zeroes(diff,offset));
-      diff = clear_highbit(diff,relpos);
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
     }
 
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches));
-    return nmismatches;
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+    }
+    return nmismatches;
   }
-#endif
 }
 
-
+/* Returns nmismatches_both */
 static int
-mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
-		       Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
-		       bool query_unk_mismatch_local_p) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int nmismatches_both = 0, offset, relpos, nshift;
+mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
+		      Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
+		      bool query_unk_mismatch_local_p) {
+  int nmismatches = 0, offset, nshift;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *start;
+  Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *endptr;
+  int relpos;
+  int startcolumni, endcolumni;
   UINT4 diff_32;
-  Genomediff_T diff;
-#ifndef HAVE_BUILTIN_CLZ
-  Genomecomp_T top;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
 #endif
-  int startcolumni, endcolumni;
 
 
   debug(
 	printf("\n\n");
-	printf("Genome (in mismatches_right_snps):\n");
-	Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3);
+	printf("Entered mismatches_left_snps with %d max_mismatches\n",max_mismatches);
+	printf("Genome (in mismatches_left):\n");
+	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
 
@@ -21270,147 +23783,229 @@ mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T q
   query_shifted = Compress_shift(query_compress,nshift);
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (startblocki_32 == endblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = (pos3 - 1) - enddiscard + 32;
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
+  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_local_p);
 #else
-    diff_32 = (block_diff_snp_32)(query_shifted + endcolumni,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_local_p);
+  query_shifted += startcolumni;
 #endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = -startdiscard + pos5;
+  ref_ptr = &(ref_blocks[startblocki_32]);
+  alt_ptr = &(snp_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
+
+  if (endblocki_32 == startblocki_32) {
+    /* Single block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
-    while (nonzero_p_32(diff_32) && nmismatches_both <= max_mismatches) {
-      mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes_32(diff_32));
-      diff_32 = clear_highbit_32(diff_32,relpos);
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
     }
-#ifdef DEBUG14
-    answer = nmismatches_both;
-    nmismatches_both = 0;
+    return nmismatches;
+
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT) && defined(HAVE_SSE2)
+    /* Shift */
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p,
+					     startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      diff_128 = clear_lowbit_128(diff_128,relpos);
+    }
+    return nmismatches;
+
 #else
-    return nmismatches_both;
-#endif
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+    }
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    offset += 32;
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = (pos3 - 1) - enddiscard + STEP_SIZE;
-  
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n",
-		 nshift,startdiscard,enddiscard,offset));
-#ifndef DEBUG14
-  }
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
+
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+    }
+    return nmismatches;
 #endif
 
+#if defined(USE_WRAP) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p,
+					    startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (startblocki == endblocki) {
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
-			    plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
-      mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff));
-      debug(print_diff_leading_zeroes(diff,offset));
-      diff = clear_highbit(diff,relpos);
+    while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      diff_128 = clear_lowbit_128(diff_128,relpos);
     }
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both));
-    return nmismatches_both;
-
-  } else {
+    return nmismatches;
 #endif
 
-    /* Endblock */
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
-			    plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_end(diff,enddiscard);
+  } else {
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-    while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
-      mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff));
-      debug(print_diff_leading_zeroes(diff,offset));
-      diff = clear_highbit(diff,relpos);
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
     }
-    if (nmismatches_both > max_mismatches) {
-      debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both));
-      return nmismatches_both;
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
     }
+    
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    offset += 32;
 
-    query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ref_ptr = &(ref_blocks[endblocki]);
-    alt_ptr = &(snp_blocks[endblocki]);
-    ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;}
-#else
-    ref_ptr = &(ref_blocks[endblocki-12]);
-    alt_ptr = &(snp_blocks[endblocki-12]);
-#endif
-    start = &(ref_blocks[startblocki]);
-    offset -= STEP_SIZE; /* 128 or 32 */
-    while (ref_ptr > start) {
-      diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
-
-      while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
-	mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff));
-	debug(print_diff_leading_zeroes(diff,offset));
-	diff = clear_highbit(diff,relpos);
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
       }
-      if (nmismatches_both > max_mismatches) {
-        debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both));
-	return nmismatches_both;
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
       }
+      
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
+#endif
+    ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
 
-      query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;}
-#else
-      ref_ptr -= 12; alt_ptr -= 12;
+
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ref_ptr + 24 <= endptr) {
+      diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_256(diff_256) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_256(diff_256));
+	diff_256 = clear_lowbit_256(diff_256,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      
+      query_shifted += 24; ref_ptr += 24; alt_ptr += 24;
+      offset += 256;
+    }
 #endif
-      offset -= STEP_SIZE; /* 128 or 32 */
+
+#ifdef HAVE_SSE2
+    while (ref_ptr + 12 <= endptr) {
+      diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128));
+	diff_128 = clear_lowbit_128(diff_128,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      
+      query_shifted += 12; ref_ptr += 12; alt_ptr += 12;
+      offset += 128;
+    }
+#else
+    while (ref_ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+	
+	while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	  mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	  diff_32 = clear_lowbit_32(diff_32,relpos);
+	}
+	if (nmismatches > max_mismatches) {
+	  return nmismatches;
+	}
+	
+	query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+	offset += 32;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
     }
+#endif
 
-    /* Startblock */
-    diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
-    diff = clear_start(diff,startdiscard);
+    /* End row */
+    while (ref_ptr < endptr) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
 
-    while (nonzero_p(diff) && nmismatches_both <= max_mismatches) {
-      mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff));
-      debug(print_diff_leading_zeroes(diff,offset));
-      diff = clear_highbit(diff,relpos);
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+      offset += 32;
     }
 
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both));
-    return nmismatches_both;
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+    }
+    return nmismatches;
   }
-#endif
 }
 
 
-/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches */
+/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches + 1 */
+/* If request max_mismatches 3, could return m0, m1, m2, m3, m4 */
 int
-Genome_mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
-			 Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
+Genome_mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
+			Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
   int nmismatches;
 #ifdef DEBUG
   int i;
@@ -21418,37 +24013,42 @@ Genome_mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T
 
 #if 0
   if (dibasep) {
-    debug(printf("Dibase_mismatches_right from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+    debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+
+    nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
+					 pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
+    mismatch_positions[nmismatches] = pos3 + 1;	/* Need +1 because of starting assumed nt */
 
-    nmismatches = Dibase_mismatches_right(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
-					  pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
   }
 #endif
 
   if (snp_blocks == NULL) {
-    nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress,
-				   left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
+    nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress,
+				  left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
+    mismatch_positions[nmismatches] = pos3;
   } else {
-    nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress,
-					left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
+    nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress,
+				       left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
+    mismatch_positions[nmismatches] = pos3;
   }
-  mismatch_positions[nmismatches] = -1;
   debug(
-	printf("%d mismatches on right: ",nmismatches);
+	printf("%d mismatches on left: ",nmismatches);
 	for (i = 0; i <= nmismatches; i++) {
 	  printf("%d ",mismatch_positions[i]);
 	}
 	printf("\n");
 	);
+  
   return nmismatches;
 }
 
 
-/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches */
+/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches + 1 */
+/* If request max_mismatches 3, could return m0, m1, m2, m3, m4 */
 /* See note above about why we set query_unk_mismatch_p to false */
 int
-Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
-			      Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
+Genome_mismatches_left_trim (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
+			     Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
   int nmismatches;
 #ifdef DEBUG
   int i;
@@ -21456,64 +24056,65 @@ Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Compr
 
 #if 0
   if (dibasep) {
-    debug(printf("Dibase_mismatches_right from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+    debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+
+    nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
+					 pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
+    mismatch_positions[nmismatches] = pos3 + 1;	/* Need +1 because of starting assumed nt */
 
-    nmismatches = Dibase_mismatches_right(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
-					  pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
   }
 #endif
 
   if (snp_blocks == NULL) {
-    nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress,
-				   left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
+    nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress,
+				  left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
+    mismatch_positions[nmismatches] = pos3;
   } else {
-    nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress,
-					left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
+    nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress,
+				       left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
+    mismatch_positions[nmismatches] = pos3;
   }
-  mismatch_positions[nmismatches] = -1;
   debug(
-	printf("%d mismatches on right: ",nmismatches);
+	printf("%d mismatches on left: ",nmismatches);
 	for (i = 0; i <= nmismatches; i++) {
 	  printf("%d ",mismatch_positions[i]);
 	}
 	printf("\n");
 	);
+  
   return nmismatches;
 }
 
 
-/************************************************************************
- *  Marking
- ************************************************************************/
-
-/* Derived from mismatches_left() */
-int
-Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_compress,
-			    Univcoord_T left, int pos5, int pos3,
-			    bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int mismatch_position;
-  int nmismatches = 0, offset, nshift;
+static int
+mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
+		  Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
+		  bool query_unk_mismatch_local_p) {
+  int nmismatches = 0, offset, relpos, nshift;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *query_shifted, *ptr, *end;
-  UINT4 diff_32;
-  Genomediff_T diff;
-  int relpos;
+  Genomecomp_T *query_shifted, *ptr, *startptr;
+#ifndef HAVE_BUILTIN_CLZ
+  Genomecomp_T top;
+#endif
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
 
-  debug5(
+  debug(
 	printf("\n\n");
-	printf("genomic = %s\n",genomic);
-	printf("Genome (in mark_mismatches_ref):\n");
+	printf("Entered mismatches_right with %d max_mismatches\n",max_mismatches);
+	printf("Genome (in mismatches_right):\n");
 	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
 
-
   startblocki = (left+pos5)/128U*12;
   startcolumni = ((left+pos5) % 128) / 32;
   startblocki_32 = startblocki + startcolumni;
@@ -21522,211 +24123,257 @@ Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_com
   endcolumni = ((left+pos3) % 128) / 32;
   endblocki_32 = endblocki + endcolumni;
 
-  debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d, step_size %d\n",
-		left,pos5,pos3,startblocki,endblocki,plusp,STEP_SIZE));
+  debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
+	       left,pos5,pos3,startblocki,endblocki));
 
   nshift = left % STEP_SIZE;
   query_shifted = Compress_shift(query_compress,nshift);
-  debug5(printf("Query shifted %d:\n",nshift));
-  debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-#if 0
-    if (plusp == true) {
-      offset = -startdiscard + pos5 /*+ mismatch_offset*/;
-    } else {
-      offset = -startdiscard + pos5 /*- mismatch_offset*/;
-    }
+  debug(printf("Query shifted %d:\n",nshift));
+  debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
+  query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
 #else
-    offset = -startdiscard + pos5;
+  query_shifted += endcolumni;
 #endif
-    debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = (pos3 - 1) - enddiscard + 32;
+  ptr = &(ref_blocks[endblocki_32]);
+  startptr = &(ref_blocks[startblocki_32]);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
-#else
-    diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
-#endif
+  if (startblocki_32 == endblocki_32) {
+    /* Single block */
+    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
-    while (nonzero_p_32(diff_32)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
-      diff_32 = clear_lowbit_32(diff_32,relpos);
-      if (plusp == false) {
-	mismatch_position = (querylength - 1) - mismatch_position;
-      }
-      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-      nmismatches++;
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
     }
-    debug5(printf("genomic = %s\n",genomic));
-#ifdef DEBUG14
-    answer = nmismatches;
-    nmismatches = 0;
+    return nmismatches;
+
+  } else if (startblocki == endblocki) {
+#if defined(USE_SHIFT_MISMATCH_POSITIONS) && defined(HAVE_SSE2)
+    /* Shift */
+    startdiscard += 96 - (endcolumni - startcolumni)*32;
+    enddiscard += 96;
+    diff_128 = (block_diff_128_shift_hi)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p,
+					 endcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128));
+      diff_128 = clear_highbit_128(diff_128,relpos);
+    }
+    return nmismatches;
+
 #else
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
+    }
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
+
+    query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+    offset -= 32;
+
+    /* Single row */
+    while (--endcolumni > startcolumni) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+	diff_32 = clear_highbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+      offset -= 32;
+    }
+
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
+    }
     return nmismatches;
 #endif
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+#if defined(USE_WRAP_MISMATCH_POSITIONS) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    startdiscard += (startcolumni - endcolumni - 1)*32;
+    enddiscard += 96;
+    diff_128 = (block_diff_128_wrap_hi)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p,
+					endcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
+    while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128));
+      diff_128 = clear_highbit_128(diff_128,relpos);
+    }
+    return nmismatches;
 #endif
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-#if 0
-    if (plusp == true) {
-      offset = -startdiscard + pos5 /*+ mismatch_offset*/;
-    } else {
-      offset = -startdiscard + pos5 /*- mismatch_offset*/;
+  } else {
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
     }
-#else
-    offset = -startdiscard + pos5;
-#endif
-  
-    debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
+    query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+    offset -= 32;
 
-#ifndef DEBUG14
-  }
+    /* End row */
+    while (--endcolumni >= 0) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+	diff_32 = clear_highbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+      offset -= 32;
+    }
+#ifdef HAVE_SSE2
+    query_shifted -= QUERY_NEXTROW;
 #endif
+    ptr -= GENOME_NEXTROW;
 
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    while (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug5(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-      if (plusp == false) {
-	mismatch_position = (querylength - 1) - mismatch_position;
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr >= startptr + 24) {
+      diff_256 = (block_diff_256)(&(query_shifted[-15]),&(ptr[-15]),plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_256(diff_256) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_256(diff_256));
+	diff_256 = clear_highbit_256(diff_256,relpos);
       }
-      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-      nmismatches++;
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted -= 24; ptr -= 24;
+      offset -= 256;
     }
-    debug5(printf("genomic = %s\n",genomic));
-    debug14(if (endblocki_32 == startblocki) assert(answer == nmismatches));
-    return nmismatches;
-
-  } else {
 #endif
 
-    /* Startblock */
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
-
-    while (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug5(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-      if (plusp == false) {
-	mismatch_position = (querylength - 1) - mismatch_position;
+#ifdef HAVE_SSE2
+    while (ptr >= startptr + 12) {
+      diff_128 = (block_diff_128)(&(query_shifted[-3]),&(ptr[-3]),plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128));
+	diff_128 = clear_highbit_128(diff_128,relpos);
       }
-      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-      nmismatches++;
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted -= 12; ptr -= 12;
+      offset -= 128;
     }
-
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[startblocki]);
-    ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
 #else
-    ptr = &(ref_blocks[startblocki+12]);
-#endif
-    end = &(ref_blocks[endblocki]);
-    offset += STEP_SIZE; /* 128 or 32 */
-    while (ptr < end) {
-      diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    while (ptr >= startptr + 12) {
+      for (endcolumni = 3; endcolumni >= 0; --endcolumni) {
+	diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
 
-      while (nonzero_p(diff)) {
-	mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-	debug5(print_diff_trailing_zeroes(diff,offset));
-	diff = clear_lowbit(diff,relpos);
-	if (plusp == false) {
-	  mismatch_position = (querylength - 1) - mismatch_position;
+	while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	  mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+	  diff_32 = clear_highbit_32(diff_32,relpos);
 	}
-	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-	nmismatches++;
+	if (nmismatches > max_mismatches) {
+	  return nmismatches;
+	}
+	query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+	offset -= 32;
       }
-
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
-#else
-      ptr += 12;
-#endif
-      offset += STEP_SIZE; /* 128 or 32 */
+      /* query_shifted -= QUERY_NEXTROW; */ ptr -= GENOME_NEXTROW;
     }
+#endif
 
-    /* Endblock */
-    diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_end(diff,enddiscard);
+    /* Start row */
+    while (ptr > startptr) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
 
-    while (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug5(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-      if (plusp == false) {
-	mismatch_position = (querylength - 1) - mismatch_position;
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+	diff_32 = clear_highbit_32(diff_32,relpos);
       }
-      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-      nmismatches++;
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+      offset -= 32;
     }
-    debug5(printf("genomic = %s\n",genomic));
-    debug14(if (endblocki_32 == startblocki) assert(answer == nmismatches));
-    return nmismatches;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
+    }
+    return nmismatches;
   }
-#endif
 }
 
-/* Derived from mismatches_left_snps() */
+/* Returns nmismatches_both */
 static int
-mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
-		      Univcoord_T left, int pos5, int pos3,
-		      bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int mismatch_position;
-  int nmismatches_both = 0, offset, nshift;
+mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
+		       Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand,
+		       bool query_unk_mismatch_local_p) {
+  int nmismatches = 0, offset, relpos, nshift;
   int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *end;
-  UINT4 diff_32;
-  Genomediff_T diff;
-  int relpos;
+  Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *startptr;
+#ifndef HAVE_BUILTIN_CLZ
+  Genomecomp_T top;
+#endif
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
 
-  debug5(
+  debug(
 	printf("\n\n");
-	printf("genomic = %s\n",genomic);
-	printf("Genome (in mark_mismatches_snps):\n");
-	Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3);
+	printf("Entered mismatches_right_snps with %d max_mismatches\n",max_mismatches);
+	printf("Genome (in mismatches_right):\n");
+	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
 
-
   startblocki = (left+pos5)/128U*12;
   startcolumni = ((left+pos5) % 128) / 32;
   startblocki_32 = startblocki + startcolumni;
@@ -21735,241 +24382,334 @@ mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
   endcolumni = ((left+pos3) % 128) / 32;
   endblocki_32 = endblocki + endcolumni;
 
-  debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
+  debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
 	       left,pos5,pos3,startblocki,endblocki));
 
   nshift = left % STEP_SIZE;
   query_shifted = Compress_shift(query_compress,nshift);
-  debug5(printf("Query shifted %d:\n",nshift));
-  debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-#if 0
-    if (plusp == true) {
-      offset = -startdiscard + pos5 /*+ mismatch_offset*/;
-    } else {
-      offset = -startdiscard + pos5 /*- mismatch_offset*/;
-    }
+  debug(printf("Query shifted %d:\n",nshift));
+  debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
+  query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
 #else
-    offset = -startdiscard + pos5;
+  query_shifted += endcolumni;
 #endif
-    debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = (pos3 - 1) - enddiscard + 32;
+  ref_ptr = &(ref_blocks[endblocki_32]);
+  alt_ptr = &(snp_blocks[endblocki_32]);
+  startptr = &(ref_blocks[startblocki_32]);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
-#else
-    diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
-#endif
+  if (startblocki_32 == endblocki_32) {
+    /* Single block */
+    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
     diff_32 = clear_start_32(diff_32,startdiscard);
     diff_32 = clear_end_32(diff_32,enddiscard);
 
-    while (nonzero_p_32(diff_32)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
-      diff_32 = clear_lowbit_32(diff_32,relpos);
-      if (plusp == false) {
-	mismatch_position = (querylength - 1) - mismatch_position;
-      }
-      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-      nmismatches_both++;
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
     }
-    debug5(printf("genomic = %s\n",genomic));
-#ifdef DEBUG14
-    answer = nmismatches_both;
-    nmismatches_both = 0;
+    return nmismatches;
+
+  } else if (startblocki == endblocki) {
+#if defined(USE_SHIFT_MISMATCH_POSITIONS) && defined(HAVE_SSE2)
+    /* Shift */
+    startdiscard += 96 - (endcolumni - startcolumni)*32;
+    enddiscard += 96;
+    diff_128 = (block_diff_snp_128_shift_hi)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p,
+					     endcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128));
+      diff_128 = clear_highbit_128(diff_128,relpos);
+    }
+    return nmismatches;
+
 #else
-    return nmismatches_both;
-#endif
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-  }
-#ifndef DEBUG14
-  else {
-#endif
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
+    }
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
+    query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+    offset -= 32;
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-#if 0
-    if (plusp == true) {
-      offset = -startdiscard + pos5 /*+ mismatch_offset*/;
-    } else {
-      offset = -startdiscard + pos5 /*- mismatch_offset*/;
+    /* Single row */
+    while (--endcolumni > startcolumni) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+	diff_32 = clear_highbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+
+      query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+      offset -= 32;
     }
-#else
-    offset = -startdiscard + pos5;
-#endif
-  
-    debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-#ifndef DEBUG14
-  }
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
+    }
+    return nmismatches;
 #endif
 
+#if defined(USE_WRAP_MISMATCH_POSITIONS) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    startdiscard += (startcolumni - endcolumni - 1)*32;
+    enddiscard += 96;
+    diff_128 = (block_diff_snp_128_wrap_hi)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p,
+					    endcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-#if defined(WORDS_BIGENDIAN)|| !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
-    diff = clear_end(diff,enddiscard);
-
-    while (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug5(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-      if (plusp == false) {
-	mismatch_position = (querylength - 1) - mismatch_position;
-      }
-      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-      nmismatches_both++;
+    while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128));
+      diff_128 = clear_highbit_128(diff_128,relpos);
     }
-    debug5(printf("genomic = %s\n",genomic));
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
-    return nmismatches_both;
+    return nmismatches;
+#endif
 
   } else {
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
+    }
+    if (nmismatches > max_mismatches) {
+      return nmismatches;
+    }
+    query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+    offset -= 32;
+
+    /* End row */
+    while (--endcolumni >= 0) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+	diff_32 = clear_highbit_32(diff_32,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+      offset -= 32;
+    }
+#ifdef HAVE_SSE2
+    query_shifted -= QUERY_NEXTROW;
 #endif
+    ref_ptr -= GENOME_NEXTROW; alt_ptr -= GENOME_NEXTROW;
 
-    /* Startblock */
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard);
-
-    while (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug5(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-      if (plusp == false) {
-	mismatch_position = (querylength - 1) - mismatch_position;
+
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ref_ptr >= startptr + 24) {
+      diff_256 = (block_diff_snp_256)(&(query_shifted[-15]),&(alt_ptr[-15]),&(ref_ptr[-15]),plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_256(diff_256) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_256(diff_256));
+	diff_256 = clear_highbit_256(diff_256,relpos);
       }
-      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-      nmismatches_both++;
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted -= 24; ref_ptr -= 24; alt_ptr -= 24;
+      offset -= 256;
     }
+#endif
 
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ref_ptr = &(ref_blocks[startblocki]);
-    alt_ptr = &(snp_blocks[startblocki]);
-    ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
+#ifdef HAVE_SSE2
+    while (ref_ptr >= startptr + 12) {
+      diff_128 = (block_diff_snp_128)(&(query_shifted[-3]),&(alt_ptr[-3]),&(ref_ptr[-3]),plusp,genestrand,query_unk_mismatch_local_p);
+
+      while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128));
+	diff_128 = clear_highbit_128(diff_128,relpos);
+      }
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted -= 12; ref_ptr -= 12; alt_ptr -= 12;
+      offset -= 128;
+    }
 #else
-    ref_ptr = &(ref_blocks[startblocki+12]);
-    alt_ptr = &(snp_blocks[startblocki+12]);
-#endif
-    end = &(ref_blocks[endblocki]);
-    offset += STEP_SIZE; /* 128 or 32 */
-    while (ref_ptr < end) {
-      diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-
-      while (nonzero_p(diff)) {
-	mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-	debug5(print_diff_trailing_zeroes(diff,offset));
-	diff = clear_lowbit(diff,relpos);
-	if (plusp == false) {
-	  mismatch_position = (querylength - 1) - mismatch_position;
+    while (ref_ptr >= startptr + 12) {
+      for (endcolumni = 3; endcolumni >= 0; --endcolumni) {
+	diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+
+	while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	  mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+	  diff_32 = clear_highbit_32(diff_32,relpos);
 	}
-	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-	nmismatches_both++;
+	if (nmismatches > max_mismatches) {
+	  return nmismatches;
+	}
+	query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+	offset -= 32;
       }
-
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
-#else
-      ref_ptr += 12; alt_ptr += 12;
-#endif
-      offset += STEP_SIZE; /* 128 or 32 */
+      /* query_shifted -= QUERY_NEXTROW; */ ref_ptr -= GENOME_NEXTROW; alt_ptr -= GENOME_NEXTROW;
     }
+#endif
 
-    /* Endblock */
-    diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_end(diff,enddiscard);
+    /* Start row */
+    while (ref_ptr > startptr) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
 
-    while (nonzero_p(diff)) {
-      mismatch_position = offset + (relpos = count_trailing_zeroes(diff));
-      debug5(print_diff_trailing_zeroes(diff,offset));
-      diff = clear_lowbit(diff,relpos);
-      if (plusp == false) {
-	mismatch_position = (querylength - 1) - mismatch_position;
+      while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+	mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+	diff_32 = clear_highbit_32(diff_32,relpos);
       }
-      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
-      nmismatches_both++;
+      if (nmismatches > max_mismatches) {
+	return nmismatches;
+      }
+      query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+      offset -= 32;
     }
-    debug5(printf("genomic = %s\n",genomic));
-    debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both));
-    return nmismatches_both;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) {
+      mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32));
+      diff_32 = clear_highbit_32(diff_32,relpos);
+    }
+    return nmismatches;
+  }
+}
+
+
+
+/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches */
+int
+Genome_mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
+			 Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
+  int nmismatches;
+#ifdef DEBUG
+  int i;
+#endif
+
+#if 0
+  if (dibasep) {
+    debug(printf("Dibase_mismatches_right from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+
+    nmismatches = Dibase_mismatches_right(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
+					  pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
   }
 #endif
+
+  if (snp_blocks == NULL) {
+    nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress,
+				   left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
+  } else {
+    nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress,
+					left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p);
+  }
+  mismatch_positions[nmismatches] = -1;
+  debug(
+	printf("%d mismatches on right: ",nmismatches);
+	for (i = 0; i <= nmismatches; i++) {
+	  printf("%d ",mismatch_positions[i]);
+	}
+	printf("\n");
+	);
+  return nmismatches;
 }
 
 
+/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches */
+/* See note above about why we set query_unk_mismatch_p to false */
 int
-Genome_mark_mismatches (char *genomic, int querylength, Compress_T query_compress,
-			Univcoord_T left, int pos5, int pos3,
-			bool plusp, int genestrand) {
+Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Compress_T query_compress,
+			      Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) {
+  int nmismatches;
+#ifdef DEBUG
+  int i;
+#endif
 
 #if 0
   if (dibasep) {
-    fprintf(stderr,"Not implemented\n");
-#if 0
-    debug5(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+    debug(printf("Dibase_mismatches_right from %u+%d to %u+%d:\n",left,pos5,left,pos3));
 
-    nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
-					 pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
-    mismatch_positions[nmismatches] = pos3 + 1;	/* Need +1 because of starting assumed nt */
-#endif
-    return 0;
+    nmismatches = Dibase_mismatches_right(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
+					  pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
   }
 #endif
 
   if (snp_blocks == NULL) {
-    return Genome_mark_mismatches_ref(&(*genomic),querylength,query_compress,
-				      left,pos5,pos3,plusp,genestrand);
+    nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress,
+				   left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
   } else {
-    return mark_mismatches_snps(&(*genomic),querylength,query_compress,
-				left,pos5,pos3,plusp,genestrand);
+    nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress,
+					left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false);
   }
+  mismatch_positions[nmismatches] = -1;
+  debug(
+	printf("%d mismatches on right: ",nmismatches);
+	for (i = 0; i <= nmismatches; i++) {
+	  printf("%d ",mismatch_positions[i]);
+	}
+	printf("\n");
+	);
+  return nmismatches;
 }
 
 
 /************************************************************************
- *  Trimming
+ *  Marking
  ************************************************************************/
 
-static int
-trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-		     bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
-  int startdiscard, enddiscard, offset;
+/* Derived from mismatches_left() */
+int
+Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_compress,
+			    Univcoord_T left, int pos5, int pos3,
+			    bool plusp, int genestrand) {
+  int mismatch_position;
+  int nmismatches = 0, offset, nshift;
+  int startdiscard, enddiscard;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *ptr, *start;
-  Genomecomp_T *query_shifted;
-  UINT4 diff_32;
-  Genomediff_T diff;
-  int nshift;
+  Genomecomp_T *query_shifted, *ptr, *endptr;
+  int relpos;
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
-  int totalscore, bestscore, score;
-  int trimpos;
-  Genomecomp_T p;
 
-  debug(
+  debug5(
 	printf("\n\n");
-	printf("Genome (in trim_left_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
+	printf("genomic = %s\n",genomic);
+	printf("Genome (in mark_mismatches_ref):\n");
 	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
@@ -21983,1082 +24723,1042 @@ trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
   endcolumni = ((left+pos3) % 128) / 32;
   endblocki_32 = endblocki + endcolumni;
 
-  debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
-	       left,pos5,pos3,startblocki,endblocki));
+  debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d, step_size %d\n",
+		left,pos5,pos3,startblocki,endblocki,plusp,STEP_SIZE));
 
   nshift = left % STEP_SIZE;
   query_shifted = Compress_shift(query_compress,nshift);
-  debug(printf("Query shifted %d:\n",nshift));
-  debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (startblocki_32 == endblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = (pos3 - 1) - enddiscard + 32;
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
+  debug5(printf("Query shifted %d:\n",nshift));
+  debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
+  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[endblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
 #else
-    diff_32 = (block_diff_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
+  query_shifted += startcolumni;
 #endif
-    diff_32 = clear_end_32(diff_32,enddiscard); /* puts 0 (matches) at end */
-    diff_32 = set_start_32(diff_32,startdiscard);  /* puts 1 (mismatches) at start */
-
 
-    p = 3*(diff_32 >> 16);
-    bestscore = score_high[p];
-    trimpos = offset - score_high[p+1];
-    totalscore = score_high[p+2];
-    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = -startdiscard + pos5;
+  ptr = &(ref_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
 
-    p = 3*(diff_32 & 0x0000FFFF);
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    /* totalscore += score_high[p+2]; */
-    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    /* offset -= 16 */
+  if (endblocki_32 == startblocki_32) {
+    /* Single block */
+    debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-#ifdef DEBUG14
-    answer = (trimpos - 1);
-#else
-    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
-#endif
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-#ifndef DEBUG14
-    else {
-#endif
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
-
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = (pos3 - 1) - enddiscard + STEP_SIZE;
-
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n",
-		 nshift,startdiscard,enddiscard,offset));
-
-#ifndef DEBUG14
-  }
-#endif
+    debug5(printf("genomic = %s\n",genomic));
+    return nmismatches;
 
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_MISMATCH_POSITIONS) && defined(HAVE_SSE2)
+    /* Shift */
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					 startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    while (nonzero_p_128(diff_128)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      diff_128 = clear_lowbit_128(diff_128,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
+    }
+    return nmismatches;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
 #else
-  if (startblocki == endblocki) {
-    diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
-    diff = set_start(diff,startdiscard);  /* puts 1 (mismatches) at start */
-
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    bestscore = score_high[p];
-    trimpos = offset - score_high[p+1];
-    totalscore = score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,_mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,_mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,_mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,_mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    offset += 32;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,_mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+      while (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,_mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,_mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,_mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1));
-    return trimpos - 1;		/* trimpos-1 is on side of mismatch */
-
-  } else {
+    
+    return nmismatches;
 #endif
 
-    /* Endblock */
-    diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
-
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    p = 3*(diff >> 16);
-    bestscore = score_high[p];
-    trimpos = offset - score_high[p+1];
-    totalscore = score_high[p+2];
-    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-
-    p = 3*(diff & 0x0000FFFF);
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-
-#else
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    bestscore = score_high[p];
-    trimpos = offset - score_high[p+1];
-    totalscore = score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+#if defined(USE_WRAP_MISMATCH_POSITIONS) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    while (nonzero_p_128(diff_128)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      diff_128 = clear_lowbit_128(diff_128,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    return nmismatches;
+#endif
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+  } else {
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    offset += 32;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+      while (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
+#endif
+    ptr += GENOME_NEXTROW;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr + 24 <= endptr) {
+      diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      while (nonzero_p_256(diff_256)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_256(diff_256));
+	diff_256 = clear_lowbit_256(diff_256,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += 24; ptr += 24;
+      offset += 256;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 #endif
 
-    query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[endblocki]);
-    ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
+#ifdef HAVE_SSE2
+    while (ptr + 12 <= endptr) {
+      diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      while (nonzero_p_128(diff_128)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+	diff_128 = clear_lowbit_128(diff_128,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += 12; ptr += 12;
+      offset += 128;
+    }
 #else
-    ptr = &(ref_blocks[endblocki-12]);
+    while (ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+
+	while (nonzero_p_32(diff_32)) {
+	  mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	  diff_32 = clear_lowbit_32(diff_32,relpos);
+	  if (plusp == false) {
+	    mismatch_position = (querylength - 1) - mismatch_position;
+	  }
+	  genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	  nmismatches++;
+	}
+      
+	query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+	offset += 32;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW;
+    }
 #endif
-    start = &(ref_blocks[startblocki]);
-    while (ptr > start) {
-      diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
 
+    /* End row */
+    while (ptr < endptr) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      p = 3*(diff >> 16);
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
+      while (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
       }
-      totalscore += score_high[p+2];
-      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+      
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
 
-      p = 3*(diff & 0x0000FFFF);
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-#else
-      p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
       }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
+    }
+    return nmismatches;
+  }
+}
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+/* Derived from mismatches_left_snps() */
+/* Returns nmismatches_both */
+static int
+mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress,
+		      Univcoord_T left, int pos5, int pos3,
+		      bool plusp, int genestrand) {
+  int mismatch_position;
+  int nmismatches = 0, offset, nshift;
+  int startdiscard, enddiscard;
+  Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
+  Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *endptr;
+  int relpos;
+  int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+  debug5(
+	printf("\n\n");
+	printf("genomic = %s\n",genomic);
+	printf("Genome (in mark_mismatches_ref):\n");
+	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
+	printf("\n");
+	);
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+  startblocki = (left+pos5)/128U*12;
+  startcolumni = ((left+pos5) % 128) / 32;
+  startblocki_32 = startblocki + startcolumni;
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
-#endif
+  endblocki = (left+pos3)/128U*12;
+  endcolumni = ((left+pos3) % 128) / 32;
+  endblocki_32 = endblocki + endcolumni;
+
+  debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d, step_size %d\n",
+		left,pos5,pos3,startblocki,endblocki,plusp,STEP_SIZE));
 
-      query_shifted -= COMPRESS_BLOCKSIZE;
+  nshift = left % STEP_SIZE;
+  query_shifted = Compress_shift(query_compress,nshift);
+  debug5(printf("Query shifted %d:\n",nshift));
+  debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
+  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;}
 #else
-      ptr -= 12;
+  query_shifted += startcolumni;
 #endif
-    }
-
-    /* Startblock */
-    diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-    diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */
-
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    p = 3*(diff >> 16);
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = -startdiscard + pos5;
+  ref_ptr = &(ref_blocks[startblocki_32]);
+  alt_ptr = &(snp_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
 
-    p = 3*(diff & 0x0000FFFF);
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    /* totalscore += score_high[p+2]; */
-    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    /* offset -= 16; */
+  if (endblocki_32 == startblocki_32) {
+    /* Single block */
+    debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
-#else
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    debug5(printf("genomic = %s\n",genomic));
+    return nmismatches;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_MISMATCH_POSITIONS) && defined(HAVE_SSE2)
+    /* Shift */
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					     startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+
+    while (nonzero_p_128(diff_128)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      diff_128 = clear_lowbit_128(diff_128,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    debug5(printf("genomic = %s\n",genomic));
+    return nmismatches;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+#else
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    offset += 32;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      while (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+      offset += 32;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-#endif
     
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1));
-    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  }
+    return nmismatches;
 #endif
-}
 
+#if defined(USE_WRAP_MISMATCH_POSITIONS) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					    startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard);
+    diff_128 = clear_end_128(diff_128,enddiscard);
 
-static int
-trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-			  bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
+    while (nonzero_p_128(diff_128)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+      diff_128 = clear_lowbit_128(diff_128,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
+    }
+    debug5(printf("genomic = %s\n",genomic));
+    return nmismatches;
 #endif
-  int startdiscard, enddiscard, offset;
-  Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *ref_ptr, *alt_ptr, *start;
-  Genomecomp_T *query_shifted;
-  UINT4 diff_32;
-  Genomediff_T diff;
-  int nshift;
-  int startcolumni, endcolumni;
 
-  int totalscore, bestscore, score;
-  int trimpos;
-  Genomecomp_T p;
+  } else {
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-  debug(
-	printf("\n\n");
-	printf("Genome (in trim_left_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
-	Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3);
-	printf("\n");
-	);
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
+    }
 
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    offset += 32;
 
-  startblocki = (left+pos5)/128U*12;
-  startcolumni = ((left+pos5) % 128) / 32;
-  startblocki_32 = startblocki + startcolumni;
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
 
-  endblocki = (left+pos3)/128U*12;
-  endcolumni = ((left+pos3) % 128) / 32;
-  endblocki_32 = endblocki + endcolumni;
+      while (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
+#endif
+    ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
 
-  debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
-	       left,pos5,pos3,startblocki,endblocki));
 
-  nshift = left % STEP_SIZE;
-  query_shifted = Compress_shift(query_compress,nshift);
-  debug(printf("Query shifted %d:\n",nshift));
-  debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ref_ptr + 24 <= endptr) {
+      diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
 
-  if (startblocki_32 == endblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = (pos3 - 1) - enddiscard + 32;
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+      while (nonzero_p_256(diff_256)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_256(diff_256));
+	diff_256 = clear_lowbit_256(diff_256,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += 24; ref_ptr += 24; alt_ptr += 24;
+      offset += 256;
+    }
+#endif
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
+#ifdef HAVE_SSE2
+    while (ref_ptr + 12 <= endptr) {
+      diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      while (nonzero_p_128(diff_128)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128));
+	diff_128 = clear_lowbit_128(diff_128,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += 12; ref_ptr += 12; alt_ptr += 12;
+      offset += 128;
+    }
 #else
-    diff_32 = (block_diff_snp_32)(query_shifted + endcolumni,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
+    while (ref_ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+
+	while (nonzero_p_32(diff_32)) {
+	  mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	  diff_32 = clear_lowbit_32(diff_32,relpos);
+	  if (plusp == false) {
+	    mismatch_position = (querylength - 1) - mismatch_position;
+	  }
+	  genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	  nmismatches++;
+	}
+      
+	query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+	offset += 32;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
+    }
 #endif
 
-    diff_32 = clear_end_32(diff_32,enddiscard); /* puts 0 (matches) at end */
-    diff_32 = set_start_32(diff_32,startdiscard);  /* puts 1 (mismatches) at start */
+    /* End row */
+    while (ref_ptr < endptr) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
 
+      while (nonzero_p_32(diff_32)) {
+	mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+	diff_32 = clear_lowbit_32(diff_32,relpos);
+	if (plusp == false) {
+	  mismatch_position = (querylength - 1) - mismatch_position;
+	}
+	genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+	nmismatches++;
+      }
+      
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+      offset += 32;
+    }
 
-    p = 3*(diff_32 >> 16);
-    bestscore = score_high[p];
-    trimpos = offset - score_high[p+1];
-    totalscore = score_high[p+2];
-    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-    p = 3*(diff_32 & 0x0000FFFF);
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+    while (nonzero_p_32(diff_32)) {
+      mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32));
+      diff_32 = clear_lowbit_32(diff_32,relpos);
+      if (plusp == false) {
+	mismatch_position = (querylength - 1) - mismatch_position;
+      }
+      genomic[mismatch_position] = tolower(genomic[mismatch_position]);
+      nmismatches++;
     }
-    /* totalscore += score_high[p+2]; */
-    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    /* offset -= 16; */
-    
-#ifdef DEBUG14
-    answer = (trimpos - 1);
-#else
-    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
-#endif
-
+    return nmismatches;
   }
-#ifndef DEBUG14
-  else {
-#endif
+}
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
 
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = (pos3 - 1) - enddiscard + STEP_SIZE;
-    
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n",
-		 nshift,startdiscard,enddiscard,offset));
-#ifndef DEBUG14
+int
+Genome_mark_mismatches (char *genomic, int querylength, Compress_T query_compress,
+			Univcoord_T left, int pos5, int pos3,
+			bool plusp, int genestrand) {
+
+#if 0
+  if (dibasep) {
+    fprintf(stderr,"Not implemented\n");
+#if 0
+    debug5(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3));
+
+    nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query,
+					 pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3);
+    mismatch_positions[nmismatches] = pos3 + 1;	/* Need +1 because of starting assumed nt */
+#endif
+    return 0;
   }
 #endif
 
+  if (snp_blocks == NULL) {
+    return Genome_mark_mismatches_ref(&(*genomic),querylength,query_compress,
+				      left,pos5,pos3,plusp,genestrand);
+  } else {
+    return mark_mismatches_snps(&(*genomic),querylength,query_compress,
+				left,pos5,pos3,plusp,genestrand);
+  }
+}
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (startblocki == endblocki) {
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
 
-    diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
-    diff = set_start(diff,startdiscard);  /* puts 1 (mismatches) at start */
+/************************************************************************
+ *  Trimming
+ ************************************************************************/
 
+static int
+trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
+		     bool plusp, int genestrand) {
+  int startdiscard, enddiscard, offset;
+  Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
+  Genomecomp_T *ptr, *startptr;
+  Genomecomp_T *query_shifted;
+  int nshift;
+  int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+  int i;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
+#ifdef HAVE_AVX2
+  unsigned short array[16];
+#elif defined(HAVE_SSE2)
+  unsigned short array[8];
+#endif
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    bestscore = score_high[p];
-    trimpos = offset - score_high[p+1];
-    totalscore = score_high[p+2];
-    debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-    
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+  int totalscore, bestscore, score;
+  int trimpos;
+  Genomecomp_T p;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+  debug(
+	printf("\n\n");
+	printf("Genome (in trim_left_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
+	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
+	printf("\n");
+	);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+  startblocki = (left+pos5)/128U*12;
+  startcolumni = ((left+pos5) % 128) / 32;
+  startblocki_32 = startblocki + startcolumni;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+  endblocki = (left+pos3)/128U*12;
+  endcolumni = ((left+pos3) % 128) / 32;
+  endblocki_32 = endblocki + endcolumni;
 
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1));
-    return trimpos - 1;		/* trimpos-1 is on side of mismatch */
+  debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
+	       left,pos5,pos3,startblocki,endblocki));
 
-  } else {
+  nshift = left % STEP_SIZE;
+  query_shifted = Compress_shift(query_compress,nshift);
+  debug(printf("Query shifted %d:\n",nshift));
+  debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
+  query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
+  query_shifted += endcolumni;
 #endif
 
-    /* Endblock */
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]),
-			    plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = (pos3 - 1) - enddiscard + 32;
+  ptr = &(ref_blocks[endblocki_32]);
+  startptr = &(ref_blocks[startblocki_32]);
+
+  if (startblocki_32 == endblocki_32) {
+    /* Single block */
+    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    diff_32 = set_start_32(diff_32,startdiscard);  /* puts 1 (mismatches) at start */
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    p = 3*(diff >> 16);
+    p = 3*(diff_32 >> 16);
     bestscore = score_high[p];
     trimpos = offset - score_high[p+1];
     totalscore = score_high[p+2];
     debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset -= 16;
 
-    p = 3*(diff & 0x0000FFFF);
+    p = 3*(diff_32 & 0x0000FFFF);
     if ((score = score_high[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset - score_high[p+1];
     }
-    totalscore += score_high[p+2];
+    /* totalscore += score_high[p+2]; */
     debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-
-#else
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    bestscore = score_high[p];
-    trimpos = offset - score_high[p+1];
-    totalscore = score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-    
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset -= 16; */
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+  } else if (startblocki == endblocki) {
+#if defined(USE_SHIFT_TRIM) && defined(HAVE_SSE2)
+    /* Shift */
+    startdiscard += 96 - (endcolumni - startcolumni)*32;
+    enddiscard += 96;
+    diff_128 = (block_diff_128_shift_hi)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					 endcolumni);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+    diff_128 = set_start_128(diff_128,startdiscard);  /* puts 1 (mismatches) at start */
+    _mm_store_si128((__m128i *) array,diff_128);
+
+    bestscore = -100;
+    for (i = 7; i >= 0; --i) {
+      p = 3*array[i];
+      if ((score = score_high[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset - score_high[p+1];
+      }
+      totalscore += score_high[p+2];
+      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+#else
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    p = 3*(diff_32 >> 16);
+    bestscore = score_high[p];
+    trimpos = offset - score_high[p+1];
+    totalscore = score_high[p+2];
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
+    p = 3*(diff_32 & 0x0000FFFF);
     if ((score = score_high[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset - score_high[p+1];
     }
     totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset -= 16;
-#endif
-
-    query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ref_ptr = &(ref_blocks[endblocki]);
-    alt_ptr = &(snp_blocks[endblocki]);
-    ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;}
-#else
-    ref_ptr = &(ref_blocks[endblocki-12]);
-    alt_ptr = &(snp_blocks[endblocki-12]);
-#endif
-    start = &(ref_blocks[startblocki]);
-    while (ref_ptr > start) {
-      diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
 
+    /* Single row */
+    while (--endcolumni > startcolumni) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      p = 3*(diff >> 16);
+      p = 3*(diff_32 >> 16);
       if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset - score_high[p+1];
       }
       totalscore += score_high[p+2];
       debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset -= 16;
-
-      p = 3*(diff & 0x0000FFFF);
+      
+      p = 3*(diff_32 & 0x0000FFFF);
       if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset - score_high[p+1];
       }
       totalscore += score_high[p+2];
       debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset -= 16;
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+    }
 
-#else
-      p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = set_start_32(diff_32,startdiscard);  /* puts 1 (mismatches) at start */
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+    p = 3*(diff_32 >> 16);
+    if ((score = score_high[p] + totalscore) > bestscore) {
+      bestscore = score;
+      trimpos = offset - score_high[p+1];
+    }
+    totalscore += score_high[p+2];
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
+    
+    p = 3*(diff_32 & 0x0000FFFF);
+    if ((score = score_high[p] + totalscore) > bestscore) {
+      bestscore = score;
+      trimpos = offset - score_high[p+1];
+    }
+    /* totalscore += score_high[p+2]; */
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset -= 16; */
+    
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
+#endif
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,5));
+#if defined(USE_WRAP_TRIM) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    startdiscard += (startcolumni - endcolumni - 1)*32;
+    enddiscard += 96;
+    diff_128 = (block_diff_128_wrap_hi)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					endcolumni);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+    diff_128 = set_start_128(diff_128,startdiscard);  /* puts 1 (mismatches) at start */
+    _mm_store_si128((__m128i *) array,diff_128);
+
+    bestscore = -100;
+    for (i = 7; i >= 0; --i) {
+      p = 3*array[i];
       if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset - score_high[p+1];
       }
       totalscore += score_high[p+2];
       debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		   i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset -= 16;
+    }
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
+#endif
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+  } else {
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-      if ((score = score_high[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset - score_high[p+1];
-      }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset -= 16;
+    p = 3*(diff_32 >> 16);
+    bestscore = score_high[p];
+    trimpos = offset - score_high[p+1];
+    totalscore = score_high[p+2];
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
+
+    p = 3*(diff_32 & 0x0000FFFF);
+    if ((score = score_high[p] + totalscore) > bestscore) {
+      bestscore = score;
+      trimpos = offset - score_high[p+1];
+    }
+    /* totalscore += score_high[p+2]; */
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+
+    /* End row */
+    while (--endcolumni >= 0) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,1));
+      p = 3*(diff_32 >> 16);
       if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset - score_high[p+1];
       }
-      totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      totalscore = score_high[p+2];
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset -= 16;
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,0));
+      p = 3*(diff_32 & 0x0000FFFF);
       if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset - score_high[p+1];
       }
       totalscore += score_high[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset -= 16;
-#endif
-
-      query_shifted -= COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;}
-#else
-      ref_ptr -= 12; alt_ptr -= 12;
-#endif
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
     }
-
-    /* Startblock */
-    diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-
-    diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */
+#ifdef HAVE_SSE2
+    query_shifted -= QUERY_NEXTROW;
+#endif
+    ptr -= GENOME_NEXTROW;
 
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    p = 3*(diff >> 16);
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr >= startptr + 24) {
+      diff_256 = (block_diff_256)(&(query_shifted[-15]),&(ptr[-15]),plusp,genestrand,query_unk_mismatch_p);
+      _mm256_store_si256((__m256i *) array,diff_256);
 
-    p = 3*(diff & 0x0000FFFF);
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+      for (i = 15; i >= 0; --i) {
+	p = 3*array[i];
+	if ((score = score_high[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset - score_high[p+1];
+	}
+	totalscore += score_high[p+2];
+	debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset -= 16;
+      }
+      query_shifted -= 24; ptr -= 24;
     }
-    /* totalscore += score_high[p+2]; */
-    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    /* offset -= 16; */
+#endif
 
-#else
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+#ifdef HAVE_SSE2
+    while (ptr >= startptr + 12) {
+      diff_128 = (block_diff_128)(&(query_shifted[-3]),&(ptr[-3]),plusp,genestrand,query_unk_mismatch_p);
+      _mm_store_si128((__m128i *) array,diff_128);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+      for (i = 7; i >= 0; --i) {
+	p = 3*array[i];
+	if ((score = score_high[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset - score_high[p+1];
+	}
+	totalscore += score_high[p+2];
+	debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset -= 16;
+      }
+      query_shifted -= 12; ptr -= 12;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+#else
+    while (ptr >= startptr + 12) {
+      for (endcolumni = 3; endcolumni >= 0; --endcolumni) {
+	diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+
+	p = 3*(diff_32 >> 16);
+	if ((score = score_high[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset - score_high[p+1];
+	}
+	totalscore = score_high[p+2];
+	debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset -= 16;
+	
+	p = 3*(diff_32 & 0x0000FFFF);
+	if ((score = score_high[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset - score_high[p+1];
+	}
+	totalscore += score_high[p+2];
+	debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset -= 16;
+	query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
+      }
+      /* query_shifted -= QUERY_NEXTROW; */ ptr -= GENOME_NEXTROW;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+#endif
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+    /* Start row */
+    while (ptr > startptr) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
-    }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
+      p = 3*(diff_32 >> 16);
+      if ((score = score_high[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset - score_high[p+1];
+      }
+      totalscore = score_high[p+2];
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_high[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset - score_high[p+1];
+      p = 3*(diff_32 & 0x0000FFFF);
+      if ((score = score_high[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset - score_high[p+1];
+      }
+      totalscore += score_high[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
+      query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL;
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = set_start_32(diff_32,startdiscard);  /* puts 1 (mismatches) at start */
+
+    p = 3*(diff_32 >> 16);
     if ((score = score_high[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset - score_high[p+1];
     }
     totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset -= 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
+    
+    p = 3*(diff_32 & 0x0000FFFF);
     if ((score = score_high[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset - score_high[p+1];
     }
-    totalscore += score_high[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset -= 16;
-#endif
-
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1));
+    /* totalscore += score_high[p+2]; */
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset -= 16; */
+    
     return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
   }
-#endif
 }
 
 
-
 static int
-trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-		      bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
+trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
+			  bool plusp, int genestrand) {
   int startdiscard, enddiscard, offset;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *ptr, *end;
+  Genomecomp_T *ref_ptr, *alt_ptr, *startptr;
   Genomecomp_T *query_shifted;
-  UINT4 diff_32;
-  Genomediff_T diff;
   int nshift;
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+  int i;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
+#ifdef HAVE_AVX2
+  unsigned short array[16];
+#elif defined(HAVE_SSE2)
+  unsigned short array[8];
+#endif
 
   int totalscore, bestscore, score;
   int trimpos;
@@ -23066,7 +25766,7 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
 
   debug(
 	printf("\n\n");
-	printf("Genome (in trim_right_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
+	printf("Genome (in trim_left_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
 	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
@@ -23087,525 +25787,378 @@ trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int
   query_shifted = Compress_shift(query_compress,nshift);
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
-  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = -startdiscard + pos5;
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
+  query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE;
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
 #else
-    diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]),
-			      plusp,genestrand,query_unk_mismatch_p);
+  query_shifted += endcolumni;
 #endif
-    diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */
-    diff_32 = set_end_32(diff_32,enddiscard);  /* puts 1 (mismatches) at end */
 
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = (pos3 - 1) - enddiscard + 32;
+  ref_ptr = &(ref_blocks[endblocki_32]);
+  alt_ptr = &(snp_blocks[endblocki_32]);
+  startptr = &(ref_blocks[startblocki_32]);
 
-    p = 3*(diff_32 & 0x0000FFFF);
-    bestscore = score_low[p];
-    trimpos = offset + score_low[p+1];
-    totalscore = score_low[p+2];
-    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+  if (startblocki_32 == endblocki_32) {
+    /* Single block */
+    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
+    diff_32 = set_start_32(diff_32,startdiscard);  /* puts 1 (mismatches) at start */
 
     p = 3*(diff_32 >> 16);
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    /* totalscore += score_low[p+2]; */
+    bestscore = score_high[p];
+    trimpos = offset - score_high[p+1];
+    totalscore = score_high[p+2];
     debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
 		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    /* offset += 16; */
-    
-#ifdef DEBUG14
-    answer = (trimpos + 1);
-#else
-    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
-#endif
-
-  }
-#ifndef DEBUG14
-  else {
-#endif
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
-
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = -startdiscard + pos5;
-  
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
-#ifndef DEBUG14
-  }
-#endif
-
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
-  if (endblocki == startblocki) {
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
-    diff = set_end(diff,enddiscard);  /* puts 1 (mismatches) at end */
-
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-    bestscore = score_low[p];
-    trimpos = offset + score_low[p+1];
-    totalscore = score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+    offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    if ((score = score_low[p] + totalscore) > bestscore) {
+    p = 3*(diff_32 & 0x0000FFFF);
+    if ((score = score_high[p] + totalscore) > bestscore) {
       bestscore = score;
-      trimpos = offset + score_low[p+1];
+      trimpos = offset - score_high[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1));
-    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
-
-  } else {
-#endif
-
-    /* Startblock */
-    diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]),
-			plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
-    debug(printf("clearing start %08X\n",clear_start_mask(startdiscard)));
-
-      
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    p = 3*(diff & 0x0000FFFF);
-    bestscore = score_low[p];
-    trimpos = offset + score_low[p+1];
-    totalscore = score_low[p+2];
+    /* totalscore += score_high[p+2]; */
     debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*(diff >> 16);
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset -= 16; */
 
-#else
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-    bestscore = score_low[p];
-    trimpos = offset + score_low[p+1];
-    totalscore = score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-      
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-      
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-      
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-      
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-      
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-      
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
+  } else if (startblocki == endblocki) {
+#if defined(USE_SHIFT_TRIM) && defined(HAVE_SSE2)
+    /* Shift */
+    startdiscard += 96 - (endcolumni - startcolumni)*32;
+    enddiscard += 96;
+    diff_128 = (block_diff_snp_128_shift_hi)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					     endcolumni);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+    diff_128 = set_start_128(diff_128,startdiscard);  /* puts 1 (mismatches) at start */
+    _mm_store_si128((__m128i *) array,diff_128);
+
+    bestscore = -100;
+    for (i = 7; i >= 0; --i) {
+      p = 3*array[i];
+      if ((score = score_high[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset - score_high[p+1];
+      }
+      totalscore += score_high[p+2];
+      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-	      7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-#endif
+    
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
 
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ptr = &(ref_blocks[startblocki]);
-    ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
 #else
-    ptr = &(ref_blocks[startblocki+12]);
-#endif
-    end = &(ref_blocks[endblocki]);
-    while (ptr < end) {
-      diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
+    p = 3*(diff_32 >> 16);
+    bestscore = score_high[p];
+    trimpos = offset - score_high[p+1];
+    totalscore = score_high[p+2];
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      p = 3*(diff & 0x0000FFFF);
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
-      }
-      totalscore += score_low[p+2];
-      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+    p = 3*(diff_32 & 0x0000FFFF);
+    if ((score = score_high[p] + totalscore) > bestscore) {
+      bestscore = score;
+      trimpos = offset - score_high[p+1];
+    }
+    totalscore += score_high[p+2];
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
+    query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
 
-      p = 3*(diff >> 16);
-      if ((score = score_low[p] + totalscore) > bestscore) {
+    /* Single row */
+    while (--endcolumni > startcolumni) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      p = 3*(diff_32 >> 16);
+      if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
-	trimpos = offset + score_low[p+1];
+	trimpos = offset - score_high[p+1];
       }
-      totalscore += score_low[p+2];
+      totalscore += score_high[p+2];
       debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
-
-#else
-      p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-      if ((score = score_low[p] + totalscore) > bestscore) {
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
+      
+      p = 3*(diff_32 & 0x0000FFFF);
+      if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
-	trimpos = offset + score_low[p+1];
+	trimpos = offset - score_high[p+1];
       }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+      totalscore += score_high[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
+      query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+    }
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
-      }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = set_start_32(diff_32,startdiscard);  /* puts 1 (mismatches) at start */
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
-      }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+    p = 3*(diff_32 >> 16);
+    if ((score = score_high[p] + totalscore) > bestscore) {
+      bestscore = score;
+      trimpos = offset - score_high[p+1];
+    }
+    totalscore += score_high[p+2];
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
+    
+    p = 3*(diff_32 & 0x0000FFFF);
+    if ((score = score_high[p] + totalscore) > bestscore) {
+      bestscore = score;
+      trimpos = offset - score_high[p+1];
+    }
+    /* totalscore += score_high[p+2]; */
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset -= 16; */
+    
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
+#endif
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-      if ((score = score_low[p] + totalscore) > bestscore) {
+#if defined(USE_WRAP_TRIM) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    startdiscard += (startcolumni - endcolumni - 1)*32;
+    enddiscard += 96;
+    diff_128 = (block_diff_snp_128_wrap_hi)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					    endcolumni);
+    diff_128 = clear_end_128(diff_128,enddiscard);
+    diff_128 = set_start_128(diff_128,startdiscard);  /* puts 1 (mismatches) at start */
+    _mm_store_si128((__m128i *) array,diff_128);
+
+    bestscore = -100;
+    for (i = 7; i >= 0; --i) {
+      p = 3*array[i];
+      if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
-	trimpos = offset + score_low[p+1];
+	trimpos = offset - score_high[p+1];
       }
-      totalscore += score_low[p+2];
+      totalscore += score_high[p+2];
       debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+		   i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
+    }
+    
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
+#endif
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
-      }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+  } else {
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_end_32(diff_32,enddiscard);
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
-      }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+    p = 3*(diff_32 >> 16);
+    bestscore = score_high[p];
+    trimpos = offset - score_high[p+1];
+    totalscore = score_high[p+2];
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-      if ((score = score_low[p] + totalscore) > bestscore) {
+    p = 3*(diff_32 & 0x0000FFFF);
+    if ((score = score_high[p] + totalscore) > bestscore) {
+      bestscore = score;
+      trimpos = offset - score_high[p+1];
+    }
+    /* totalscore += score_high[p+2]; */
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
+    query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+
+    /* End row */
+    while (--endcolumni >= 0) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      p = 3*(diff_32 >> 16);
+      if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
-	trimpos = offset + score_low[p+1];
+	trimpos = offset - score_high[p+1];
       }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+      totalscore = score_high[p+2];
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-      if ((score = score_low[p] + totalscore) > bestscore) {
+      p = 3*(diff_32 & 0x0000FFFF);
+      if ((score = score_high[p] + totalscore) > bestscore) {
 	bestscore = score;
-	trimpos = offset + score_low[p+1];
+	trimpos = offset - score_high[p+1];
       }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
-#endif
-
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;}
-#else
-      ptr += 12;
-#endif
+      totalscore += score_high[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
+      query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
     }
+#ifdef HAVE_SSE2
+    query_shifted -= QUERY_NEXTROW;
+#endif
+    ref_ptr -= GENOME_NEXTROW; alt_ptr -= GENOME_NEXTROW;
 
-    /* Endblock */
-    diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
-    diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */
 
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ref_ptr >= startptr + 24) {
+      diff_256 = (block_diff_snp_256)(&(query_shifted[-15]),&(ref_ptr[-15]),alt_ptr,plusp,genestrand,query_unk_mismatch_p);
+      _mm256_store_si256((__m256i *) array,diff_256);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    p = 3*(diff & 0x0000FFFF);
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
+      for (i = 15; i >= 0; --i) {
+	p = 3*array[i];
+	if ((score = score_high[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset - score_high[p+1];
+	}
+	totalscore += score_high[p+2];
+	debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset -= 16;
+      }
+      query_shifted -= 24; ref_ptr -= 24; alt_ptr -= 24;
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+#endif
 
-    p = 3*(diff >> 16);
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    /* totalscore += score_low[p+2]; */
-    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    /* offset += 16; */
+#ifdef HAVE_SSE2
+    while (ref_ptr >= startptr + 12) {
+      diff_128 = (block_diff_snp_128)(&(query_shifted[-3]),&(ref_ptr[-3]),alt_ptr,plusp,genestrand,query_unk_mismatch_p);
+      _mm_store_si128((__m128i *) array,diff_128);
 
-#else
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
+      for (i = 7; i >= 0; --i) {
+	p = 3*array[i];
+	if ((score = score_high[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset - score_high[p+1];
+	}
+	totalscore += score_high[p+2];
+	debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset -= 16;
+      }
+      query_shifted -= 12; ref_ptr -= 12; alt_ptr -= 12;
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
+#else
+    while (ref_ptr >= startptr + 12) {
+      for (endcolumni = 3; endcolumni >= 0; --endcolumni) {
+	diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+
+	p = 3*(diff_32 >> 16);
+	if ((score = score_high[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset - score_high[p+1];
+	}
+	totalscore = score_high[p+2];
+	debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset -= 16;
+	
+	p = 3*(diff_32 & 0x0000FFFF);
+	if ((score = score_high[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset - score_high[p+1];
+	}
+	totalscore += score_high[p+2];
+	debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset -= 16;
+	query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
+      }
+      /* query_shifted -= QUERY_NEXTROW; */ ref_ptr -= GENOME_NEXTROW; alt_ptr -= GENOME_NEXTROW;
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+#endif
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+    /* Start row */
+    while (ref_ptr > startptr) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+      p = 3*(diff_32 >> 16);
+      if ((score = score_high[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset - score_high[p+1];
+      }
+      totalscore = score_high[p+2];
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
+      p = 3*(diff_32 & 0x0000FFFF);
+      if ((score = score_high[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset - score_high[p+1];
+      }
+      totalscore += score_high[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset -= 16;
+      query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL;
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = set_start_32(diff_32,startdiscard);  /* puts 1 (mismatches) at start */
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_low[p] + totalscore) > bestscore) {
+    p = 3*(diff_32 >> 16);
+    if ((score = score_high[p] + totalscore) > bestscore) {
       bestscore = score;
-      trimpos = offset + score_low[p+1];
+      trimpos = offset - score_high[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    if ((score = score_low[p] + totalscore) > bestscore) {
+    totalscore += score_high[p+2];
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    offset -= 16;
+    
+    p = 3*(diff_32 & 0x0000FFFF);
+    if ((score = score_high[p] + totalscore) > bestscore) {
       bestscore = score;
-      trimpos = offset + score_low[p+1];
+      trimpos = offset - score_high[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-#endif
+    /* totalscore += score_high[p+2]; */
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset -= 16; */
     
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1));
-    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+    return (trimpos - 1);	/* trimpos-1 is on side of mismatch */
   }
-#endif
 }
 
 
-
 static int
-trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
-			   bool plusp, int genestrand) {
-#ifdef DEBUG14
-  int answer;
-#endif
+trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
+		      bool plusp, int genestrand) {
   int startdiscard, enddiscard, offset;
   Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
-  Genomecomp_T *ref_ptr, *alt_ptr, *end;
+  Genomecomp_T *ptr, *endptr;
   Genomecomp_T *query_shifted;
-  UINT4 diff_32;
-  Genomediff_T diff;
   int nshift;
   int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+  int i;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
+#ifdef HAVE_AVX2
+  unsigned short array[16];
+#elif defined(HAVE_SSE2)
+  unsigned short array[8];
+#endif
 
   int totalscore, bestscore, score;
   int trimpos;
@@ -23613,8 +26166,8 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
 
   debug(
 	printf("\n\n");
-	printf("Genome (in trim_right_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
-	Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3);
+	printf("Genome (in trim_right_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
+	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
 	printf("\n");
 	);
 
@@ -23635,24 +26188,25 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
   debug(printf("Query shifted %d:\n",nshift));
   debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
   query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
-
-  if (endblocki_32 == startblocki_32) {
-    startdiscard = (left+pos5) % 32;
-    enddiscard = (left+pos3) % 32;
-    offset = -startdiscard + pos5;
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
 #else
-    diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]),
-				  plusp,genestrand,query_unk_mismatch_p);
+  query_shifted += startcolumni;
 #endif
+
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = -startdiscard + pos5;
+  ptr = &(ref_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
+
+  if (endblocki_32 == startblocki_32) {
+    /* Single block */
+    debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
+
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
     diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */
     diff_32 = set_end_32(diff_32,enddiscard);  /* puts 1 (mismatches) at end */
 
-
     p = 3*(diff_32 & 0x0000FFFF);
     bestscore = score_low[p];
     trimpos = offset + score_low[p+1];
@@ -23671,472 +26225,709 @@ trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5
 		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     /* offset += 16; */
     
-#ifdef DEBUG14
-    answer = (trimpos + 1);
-#else
     return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
-#endif
-
-  }
-#ifndef DEBUG14
-  else {
-#endif
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    startblocki = startblocki_32;
-    endblocki = endblocki_32;
-#endif
-
-    startdiscard = (left+pos5) % STEP_SIZE;
-    enddiscard = (left+pos3) % STEP_SIZE;
-    offset = -startdiscard + pos5;
-  
-    debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
-#ifndef DEBUG14
-  }
-#endif  
 
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_TRIM) && defined(HAVE_SSE2)
+    /* Shift */
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					 startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard); /* puts 0 (matches) at start */
+    diff_128 = set_end_128(diff_128,enddiscard);  /* puts 1 (mismatches) at end */
+    _mm_store_si128((__m128i *) array,diff_128);
+
+    bestscore = -100;
+    for (i = 0; i < 8; i++) {
+      p = 3*array[i];
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore += score_low[p+2];
+      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+    }
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
 #else
-  if (endblocki == startblocki) {
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_p);
-    diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
-    diff = set_end(diff,enddiscard);  /* puts 1 (mismatches) at end */
-
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
+    p = 3*(diff_32 & 0x0000FFFF);
     bestscore = score_low[p];
     trimpos = offset + score_low[p+1];
     totalscore = score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
     totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      p = 3*(diff_32 & 0x0000FFFF);
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore = score_low[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+      
+      p = 3*(diff_32 >> 16);
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore += score_low[p+2];
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = set_end_32(diff_32,enddiscard);  /* puts 1 (mismatches) at end */
+
+    p = 3*(diff_32 & 0x0000FFFF);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    totalscore = score_low[p+2];
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* totalscore += score_low[p+2]; */
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset += 16; */
+    
+    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
+#endif
+
+#if defined(USE_WRAP_TRIM) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p,
+					startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard); /* puts 0 (matches) at start */
+    diff_128 = set_end_128(diff_128,enddiscard);  /* puts 1 (mismatches) at end */
+    _mm_store_si128((__m128i *) array,diff_128);
+
+    bestscore = -100;
+    for (i = 0; i < 8; i++) {
+      p = 3*array[i];
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore += score_low[p+2];
+      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+    }
+
+    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
+#endif
+
+  } else {
+    /* Start block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    p = 3*(diff_32 & 0x0000FFFF);
+    bestscore = score_low[p];
+    trimpos = offset + score_low[p+1];
+    totalscore = score_low[p+2];
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
     totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
+    query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      p = 3*(diff_32 & 0x0000FFFF);
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore = score_low[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+      
+      p = 3*(diff_32 >> 16);
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore += score_low[p+2];
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
+#endif
+    ptr += GENOME_NEXTROW;
+
+
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ptr + 24 <= endptr) {
+      diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      _mm256_store_si256((__m256i *) array,diff_256);
+
+      for (i = 0; i < 16; i++) {
+	p = 3*array[i];
+	if ((score = score_low[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset + score_low[p+1];
+	}
+	totalscore += score_low[p+2];
+	debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset += 16;
+      }
+      query_shifted += 24; ptr += 24;
+    }
+#endif
+
+#ifdef HAVE_SSE2
+    while (ptr + 12 <= endptr) {
+      diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+      _mm_store_si128((__m128i *) array,diff_128);
+
+      for (i = 0; i < 8; i++) {
+	p = 3*array[i];
+	if ((score = score_low[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset + score_low[p+1];
+	}
+	totalscore += score_low[p+2];
+	debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset += 16;
+      }
+      query_shifted += 12; ptr += 12;
+    }
+#else
+    while (ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+
+	p = 3*(diff_32 & 0x0000FFFF);
+	if ((score = score_low[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset + score_low[p+1];
+	}
+	totalscore = score_low[p+2];
+	debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset += 16;
+	
+	p = 3*(diff_32 >> 16);
+	if ((score = score_low[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset + score_low[p+1];
+	}
+	totalscore += score_low[p+2];
+	debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset += 16;
+	query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+      }
+      /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW;
+    }
+#endif
+
+    /* End row */
+    while (ptr < endptr) {
+      diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+
+      p = 3*(diff_32 & 0x0000FFFF);
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore = score_low[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+      
+      p = 3*(diff_32 >> 16);
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore += score_low[p+2];
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+      query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL;
+    }
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
+    /* End block */
+    diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = set_end_32(diff_32,enddiscard);  /* puts 1 (mismatches) at end */
+
+    p = 3*(diff_32 & 0x0000FFFF);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    totalscore = score_low[p+2];
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1));
+    /* totalscore += score_low[p+2]; */
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset += 16; */
+    
     return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
+  }
+}
 
-  } else {
+
+static int
+trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
+			   bool plusp, int genestrand) {
+  int startdiscard, enddiscard, offset;
+  Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32;
+  Genomecomp_T *ref_ptr, *alt_ptr, *endptr;
+  Genomecomp_T *query_shifted;
+  int nshift;
+  int startcolumni, endcolumni;
+  UINT4 diff_32;
+#ifdef HAVE_SSE2
+  __m128i diff_128;
+  int i;
+#endif
+#ifdef HAVE_AVX2
+  __m256i diff_256;
+#endif
+#ifdef HAVE_AVX2
+  unsigned short array[16];
+#elif defined(HAVE_SSE2)
+  unsigned short array[8];
+#endif
+
+  int totalscore, bestscore, score;
+  int trimpos;
+  Genomecomp_T p;
+
+  debug(
+	printf("\n\n");
+	printf("Genome (in trim_right_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3);
+	Genome_print_blocks(ref_blocks,left+pos5,left+pos3);
+	printf("\n");
+	);
+
+
+  startblocki = (left+pos5)/128U*12;
+  startcolumni = ((left+pos5) % 128) / 32;
+  startblocki_32 = startblocki + startcolumni;
+
+  endblocki = (left+pos3)/128U*12;
+  endcolumni = ((left+pos3) % 128) / 32;
+  endblocki_32 = endblocki + endcolumni;
+
+  debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n",
+	       left,pos5,pos3,startblocki,endblocki));
+
+  nshift = left % STEP_SIZE;
+  query_shifted = Compress_shift(query_compress,nshift);
+  debug(printf("Query shifted %d:\n",nshift));
+  debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3));
+  query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE;
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
+#else
+  query_shifted += startcolumni;
 #endif
 
-    /* Startblock */
-    diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]),
-			    plusp,genestrand,query_unk_mismatch_p);
+  startdiscard = (left+pos5) % 32;
+  enddiscard = (left+pos3) % 32;
+  offset = -startdiscard + pos5;
+  ref_ptr = &(ref_blocks[startblocki_32]);
+  alt_ptr = &(snp_blocks[startblocki_32]);
+  endptr = &(ref_blocks[endblocki_32]);
 
-    diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */
+  if (endblocki_32 == startblocki_32) {
+    /* Single block */
+    debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard));
 
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */
+    diff_32 = set_end_32(diff_32,enddiscard);  /* puts 1 (mismatches) at end */
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    p = 3*(diff & 0x0000FFFF);
+    p = 3*(diff_32 & 0x0000FFFF);
     bestscore = score_low[p];
     trimpos = offset + score_low[p+1];
     totalscore = score_low[p+2];
     debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*(diff >> 16);
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
-    totalscore += score_low[p+2];
+    /* totalscore += score_low[p+2]; */
     debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset += 16; */
+    
+    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
+
+  } else if (endblocki == startblocki) {
+#if defined(USE_SHIFT_TRIM) && defined(HAVE_SSE2)
+    /* Shift */
+    enddiscard += (endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					     startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard); /* puts 0 (matches) at start */
+    diff_128 = set_end_128(diff_128,enddiscard);  /* puts 1 (mismatches) at end */
+    _mm_store_si128((__m128i *) array,diff_128);
+
+    bestscore = -100;
+    for (i = 0; i < 8; i++) {
+      p = 3*array[i];
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore += score_low[p+2];
+      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+    }
+
+    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
 
 #else
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    p = 3*(diff_32 & 0x0000FFFF);
     bestscore = score_low[p];
     trimpos = offset + score_low[p+1];
     totalscore = score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
     totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+    /* Single row */
+    while (++startcolumni < endcolumni) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
+      p = 3*(diff_32 & 0x0000FFFF);
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore = score_low[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+      
+      p = 3*(diff_32 >> 16);
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore += score_low[p+2];
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = set_end_32(diff_32,enddiscard);  /* puts 1 (mismatches) at end */
+
+    p = 3*(diff_32 & 0x0000FFFF);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    totalscore = score_low[p+2];
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
+    /* totalscore += score_low[p+2]; */
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    /* offset += 16; */
+    
+    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
+#endif
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
+#if defined(USE_WRAP_TRIM) && defined(HAVE_SSSE3)
+  } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) {
+    /* Wrap */
+    enddiscard += (4 + endcolumni - startcolumni)*32;
+    diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p,
+					    startcolumni);
+    diff_128 = clear_start_128(diff_128,startdiscard); /* puts 0 (matches) at start */
+    diff_128 = set_end_128(diff_128,enddiscard);  /* puts 1 (mismatches) at end */
+    _mm_store_si128((__m128i *) array,diff_128);
+
+    bestscore = -100;
+    for (i = 0; i < 8; i++) {
+      p = 3*array[i];
+      if ((score = score_low[p] + totalscore) > bestscore) {
+	bestscore = score;
+	trimpos = offset + score_low[p+1];
+      }
+      totalscore += score_low[p+2];
+      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      offset += 16;
     }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+#endif
+
+    return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
+
+  } else {
+    /* Start block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = clear_start_32(diff_32,startdiscard);
+
+    p = 3*(diff_32 & 0x0000FFFF);
+    bestscore = score_low[p];
+    trimpos = offset + score_low[p+1];
+    totalscore = score_low[p+2];
+    debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
     totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+    debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
-#endif
-
-    query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    ref_ptr = &(ref_blocks[startblocki]);
-    alt_ptr = &(snp_blocks[startblocki]);
-    ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
-#else
-    ref_ptr = &(ref_blocks[startblocki+12]);
-    alt_ptr = &(snp_blocks[startblocki+12]);
-#endif
-    end = &(ref_blocks[endblocki]);
-    while (ref_ptr < end) {
-      diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
 
+    /* Start row */
+    while (++startcolumni < 4) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      p = 3*(diff & 0x0000FFFF);
+      p = 3*(diff_32 & 0x0000FFFF);
       if ((score = score_low[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset + score_low[p+1];
       }
-      totalscore += score_low[p+2];
+      totalscore = score_low[p+2];
       debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset += 16;
-
-      p = 3*(diff >> 16);
+      
+      p = 3*(diff_32 >> 16);
       if ((score = score_low[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset + score_low[p+1];
       }
       totalscore += score_low[p+2];
       debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset += 16;
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
+    }
+#ifdef HAVE_SSE2
+    query_shifted += QUERY_NEXTROW;
+#endif
+    ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
 
-#else
-      p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
-      }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
-      }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+    /* Middle rows */
+#ifdef HAVE_AVX2
+    while (ref_ptr + 24 <= endptr) {
+      diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      _mm256_store_si256((__m256i *) array,diff_256);
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
+      for (i = 0; i < 16; i++) {
+	p = 3*array[i];
+	if ((score = score_low[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset + score_low[p+1];
+	}
+	totalscore += score_low[p+2];
+	debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset += 16;
       }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+      query_shifted += 24; ref_ptr += 24; alt_ptr += 24;
+    }
+#endif
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
-      }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+#ifdef HAVE_SSE2
+    while (ref_ptr + 12 <= endptr) {
+      diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+      _mm_store_si128((__m128i *) array,diff_128);
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
+      for (i = 0; i < 8; i++) {
+	p = 3*array[i];
+	if ((score = score_low[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset + score_low[p+1];
+	}
+	totalscore += score_low[p+2];
+	debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset += 16;
       }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
-
-      p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-      if ((score = score_low[p] + totalscore) > bestscore) {
-	bestscore = score;
-	trimpos = offset + score_low[p+1];
+      query_shifted += 12; ref_ptr += 12; alt_ptr += 12;
+    }
+#else
+    while (ref_ptr + 12 <= endptr) {
+      for (startcolumni = 0; startcolumni < 4; startcolumni++) {
+	diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+
+	p = 3*(diff_32 & 0x0000FFFF);
+	if ((score = score_low[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset + score_low[p+1];
+	}
+	totalscore = score_low[p+2];
+	debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset += 16;
+	
+	p = 3*(diff_32 >> 16);
+	if ((score = score_low[p] + totalscore) > bestscore) {
+	  bestscore = score;
+	  trimpos = offset + score_low[p+1];
+	}
+	totalscore += score_low[p+2];
+	debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		     diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+	offset += 16;
+	query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
       }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-      offset += 16;
+      /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW;
+    }
+#endif
+
+    /* End row */
+    while (ref_ptr < endptr) {
+      diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
 
-      p = 3*((unsigned short) _mm_extract_epi16(diff,6));
+      p = 3*(diff_32 & 0x0000FFFF);
       if ((score = score_low[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset + score_low[p+1];
       }
-      totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      totalscore = score_low[p+2];
+      debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset += 16;
-
-      p = 3*((unsigned short) _mm_extract_epi16(diff,7));
+      
+      p = 3*(diff_32 >> 16);
       if ((score = score_low[p] + totalscore) > bestscore) {
 	bestscore = score;
 	trimpos = offset + score_low[p+1];
       }
       totalscore += score_low[p+2];
-      debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		   7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
+      debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
+		   diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
       offset += 16;
-#endif
-
-      query_shifted += COMPRESS_BLOCKSIZE;
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-      ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;}
-#else
-      ref_ptr += 12; alt_ptr += 12;
-#endif
+      query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL;
     }
 
-    /* Endblock */
-    diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
-
-    diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */
-
+    /* End block */
+    diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p);
+    diff_32 = set_end_32(diff_32,enddiscard);  /* puts 1 (mismatches) at end */
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-    p = 3*(diff & 0x0000FFFF);
+    p = 3*(diff_32 & 0x0000FFFF);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
-    totalscore += score_low[p+2];
+    totalscore = score_low[p+2];
     debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		 diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     offset += 16;
 
-    p = 3*(diff >> 16);
+    p = 3*(diff_32 >> 16);
     if ((score = score_low[p] + totalscore) > bestscore) {
       bestscore = score;
       trimpos = offset + score_low[p+1];
     }
     /* totalscore += score_low[p+2]; */
     debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
+		 diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore));
     /* offset += 16; */
-
-#else
-    p = 3*((unsigned short) _mm_extract_epi16(diff,0));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,1));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,2));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,3));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,4));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,5));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,6));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-
-    p = 3*((unsigned short) _mm_extract_epi16(diff,7));
-    if ((score = score_low[p] + totalscore) > bestscore) {
-      bestscore = score;
-      trimpos = offset + score_low[p+1];
-    }
-    totalscore += score_low[p+2];
-    debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n",
-		 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore));
-    offset += 16;
-#endif
-
-    debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1));
+    
     return (trimpos + 1);	/* trimpos+1 is on side of mismatch */
-
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
   }
-#endif
 }
 
 
diff --git a/src/genome128_hr.h b/src/genome128_hr.h
index dd99cba..b704014 100644
--- a/src/genome128_hr.h
+++ b/src/genome128_hr.h
@@ -1,4 +1,4 @@
-/* $Id: genome128_hr.h 184179 2016-02-12 20:14:39Z twu $ */
+/* $Id: genome128_hr.h 201740 2016-12-16 16:38:22Z twu $ */
 #ifndef GENOME128_HR_INCLUDED
 #define GENOME128_HR_INCLUDED
 #include "types.h"
@@ -11,11 +11,6 @@ Genome_hr_setup (Genomecomp_T *ref_blocks_in, Genomecomp_T *snp_blocks_in,
 		 bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in,
 		 Mode_T mode_in);
 
-extern void
-Genome_hr_user_setup (UINT4 *ref_blocks_in,
-		      bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in,
-		      Mode_T mode_in);
-
 extern int
 Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T left, int pos5, int pos3,
 				      bool plusp, int genestrand);
diff --git a/src/get-genome.c b/src/get-genome.c
index 84c4ad8..560fda6 100644
--- a/src/get-genome.c
+++ b/src/get-genome.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: get-genome.c 184460 2016-02-18 00:07:13Z twu $";
+static char rcsid[] = "$Id: get-genome.c 207146 2017-06-10 00:20:34Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -62,6 +62,7 @@ static char *map_iitfile = NULL;
 static int nflanking = 0;
 static bool exonsp = false;
 static bool sequencep = false;
+static bool uniquep = false;
 static bool force_label_p = false;
 
 static bool exactp = false;
@@ -70,6 +71,8 @@ static bool signedp = false;
 
 static bool vareffect_p = false;
 
+static bool codingp = false;
+
 /* Dump options */
 static bool dumpallp = false;
 static bool stream_chars_p = false;
@@ -101,9 +104,11 @@ static struct option long_options[] = {
   {"flanking", required_argument, 0, 'u'}, /* nflanking */
   {"exons", no_argument, 0, 'E'},	   /* exonsp */
   {"sequence", no_argument, 0, 'S'},	   /* sequencep */
+  {"nunique", no_argument, 0, 0},	   /* uniquep */
   {"exact", no_argument, 0, 0},		/* exactp */
   {"signed", no_argument, 0, 's'},	   /* signedp */
   {"aslabel", no_argument, 0, 0},	   /* force_label_p */
+  {"coding", no_argument, 0, 0},	   /* codingp */
 
   /* Dump options */
   {"dump", no_argument, 0, 'A'},	/* dumpallp */
@@ -168,6 +173,7 @@ External map file options\n\
                             this lists available map files.\n\
   -S, --sequence          For a gene map file, prints the sequence\n\
   -E, --exons             For a gene map file, prints the sequence, one exon per line\n\
+  --nunique               For a gene map file, also prints the number of unique positions\n\
   -k, --ranks             Prints levels for non-overlapping printing of map hits\n\
   -r, --raw               Prints sequence as ASCII numeric codes\n\
   -u, --flanking=INT      Show flanking hits (default 0)\n\
@@ -176,6 +182,7 @@ External map file options\n\
                             are also requested, show only flanking hits downstream in direction of\n\
                             query.\n\
   --aslabel               Consider all queries to be labels, even if numeric\n\
+  --coding                Print entry only if position overlaps a coding exon\n\
 \n\
 Dump options\n\
   -A, --dump              Dump entire genome in FASTA format\n\
@@ -964,6 +971,52 @@ genemap_print_sequence (char *annot, Univcoord_T chroffset, Genome_T genome) {
 
 
 static void
+genemap_print_unique (char *annot, Intlist_T unique_positions, Intlist_T unique_splicep, bool print_geneline_p) {
+  char *p;
+  Chrpos_T exonstart, exonend;
+  int nunique, uniquep;
+
+  /* Skip header */
+  p = annot;
+  if (print_geneline_p == true) {
+    while (*p != '\0' && *p != '\n') {
+      putchar(*p);
+      p++;
+    }
+    if (*p == '\n') p++;
+    printf("\n");
+  } else {
+    while (*p != '\0' && *p != '\n') p++;
+    if (*p == '\n') p++;
+  }
+
+  while (*p != '\0') {
+    if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
+      fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+      abort();
+    } else {
+      unique_positions = Intlist_pop(unique_positions,&nunique);
+      if (unique_splicep == NULL) {
+	printf("%u %u %d NA\n",exonstart,exonend,nunique);
+      } else {
+	unique_splicep = Intlist_pop(unique_splicep,&uniquep);
+	if (uniquep == 0) {
+	  printf("%u %u %d F\n",exonstart,exonend,nunique);
+	} else {
+	  printf("%u %u %d T\n",exonstart,exonend,nunique);
+	}
+      }
+    }
+
+    while (*p != '\0' && *p != '\n') p++;
+    if (*p == '\n') p++;
+  }
+
+  return;
+}
+
+
+static void
 print_interval (char *divstring, int index, IIT_T iit, int ndivs, Univ_IIT_T chromosome_iit,
 		Genome_T genome, int fieldint) {
   Interval_T interval;
@@ -973,8 +1026,9 @@ print_interval (char *divstring, int index, IIT_T iit, int ndivs, Univ_IIT_T chr
   bool annotationonlyp = false, signed_output_p = true;
   int divno;
   Univcoord_T chroffset;
+  Intlist_T unique_positions, unique_splicep;
 
-  if (exonsp == true || sequencep == true) {
+  if (exonsp == true || sequencep == true || uniquep == true) {
     label = IIT_label(iit,index,&allocp);
     printf(">%s ",label);
     if (allocp == true) {
@@ -1004,6 +1058,12 @@ print_interval (char *divstring, int index, IIT_T iit, int ndivs, Univ_IIT_T chr
 	genemap_print_exons(annotation,chroffset,genome);
       } else if (sequencep == true) {
 	genemap_print_sequence(annotation,chroffset,genome);
+      } else if (uniquep == true) {
+	unique_positions = IIT_unique_positions(iit,index,divno);
+	unique_splicep = IIT_unique_splicep(iit,index,divno);
+	genemap_print_unique(annotation,unique_positions,unique_splicep,/*print_geneline_p*/true);
+	/* Intlist_free(&unique_splicep); -- popped by above procedure */
+	/* Intlist_free(&unique_positions); -- popped by above procedure */
       }
     }
     if (allocp == true) {
@@ -1080,9 +1140,14 @@ main (int argc, char *argv[]) {
   char *divstring, *divstring2;
   Univ_IIT_T chromosome_iit, contig_iit;
   IIT_T map_iit = NULL;
+  Divread_T divread;
   char Buffer[BUFFERLEN], *segment;
   char coords[BUFFERLEN], typestring[BUFFERLEN];
 
+  int index;
+  Intlist_T unique_positions, unique_splicep;
+  char *label, *annotation, *restofheader;
+
   int fieldint = -1;
   int *matches, nmatches, ndivs, i, *leftflanks, *rightflanks, nleftflanks = 0, nrightflanks = 0;
   int sign;
@@ -1113,6 +1178,9 @@ main (int argc, char *argv[]) {
 	print_program_usage();
 	exit(0);
 
+      } else if (!strcmp(long_name,"nunique")) {
+	uniquep = true;
+
       } else if (!strcmp(long_name,"exact")) {
 	exactp = true;
 
@@ -1132,6 +1200,9 @@ main (int argc, char *argv[]) {
       } else if (!strcmp(long_name,"stream-ints")) {
 	stream_ints_p = true;
 
+      } else if (!strcmp(long_name,"coding")) {
+	codingp = true;
+
       } else {
 	/* Shouldn't reach here */
 	fprintf(stderr,"Don't recognize option %s.  For usage, run 'get-genome --help'",long_name);
@@ -1232,48 +1303,135 @@ main (int argc, char *argv[]) {
 
 
   } else if (dumpallp == true) {
-    iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
-			      strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
-    sprintf(iitfile,"%s/%s.chromosome.iit",genomesubdir,fileroot);
-    chromosome_iit = Univ_IIT_read(iitfile,/*readonlyp*/true,/*add_iit_p*/false);
-    FREE(iitfile);
+    if (map_iitfile != NULL) {
+      mapdir = Datadir_find_mapdir(user_mapdir,genomesubdir,fileroot);
+      iitfile = (char *) CALLOC(strlen(mapdir)+strlen("/")+
+				strlen(map_iitfile)+strlen(".iit")+1,sizeof(char));
+      sprintf(iitfile,"%s/%s.iit",mapdir,map_iitfile);
+      if (Access_file_exists_p(iitfile) == false) {
+	fprintf(stderr,"Map file %s.iit not found in %s.  Available files:\n",map_iitfile,mapdir);
+	Datadir_list_directory(stderr,mapdir);
+	fprintf(stderr,"Either install file %s.iit or specify a full directory path\n",map_iitfile);
+	fprintf(stderr,"using the -M flag to gmap.\n");
+	exit(9);
+      }
+      FREE(mapdir);
+    
+      if (sequencep == true || exonsp == true) {
+	/* User is requesting all sequences or exons in the map file */
+	if ((map_iit = IIT_read(iitfile,/*name*/NULL,true,/*divread*/READ_ALL,/*divstring*/NULL,
+				/*add_iit_p*/true)) == NULL) {
+	  fprintf(stderr,"Cannot open IIT file %s\n",iitfile);
+	  exit(9);
 
-    if (snps_root == NULL || print_snps_mode == 0) {
-      genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
-			  uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
-    } else if (print_snps_mode == 2) {
-      genome = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
-			  uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
-    } else if (print_snps_mode == 1 || print_snps_mode == 3) {
-      genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
-			  uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
-      genomealt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
-			     uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
-    }
+	} else {
+	  FREE(iitfile);
 
-    for (indx = 1; indx <= Univ_IIT_total_nintervals(chromosome_iit); indx++) {
-      chr = Univ_IIT_label(chromosome_iit,indx,&allocp);
-      with_colon = (char *) CALLOC(strlen(chr)+strlen(":")+1,sizeof(char));
-      sprintf(with_colon,"%s:",chr);
-      if (allocp == true) {
-	FREE(chr);
+	  iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
+				    strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
+	  sprintf(iitfile,"%s/%s.chromosome.iit",genomesubdir,fileroot);
+	  chromosome_iit = Univ_IIT_read(iitfile,/*readonlyp*/true,/*add_iit_p*/false);
+	  FREE(iitfile);
+
+	  genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
+			      uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
+
+	  ndivs = IIT_ndivs(map_iit);
+	  for (indx = 1; indx <= Univ_IIT_total_nintervals(chromosome_iit); indx++) {
+	    chr = Univ_IIT_label(chromosome_iit,indx,&allocp);
+	    chrlength = Univ_IIT_length(chromosome_iit,indx);
+	    matches = IIT_get(&nmatches,map_iit,chr,/*coordstart*/0,/*coordend*/chrlength,/*sortp*/false);
+	    for (i = 0; i < nmatches; i++) {
+	      print_interval(chr,matches[i],map_iit,ndivs,chromosome_iit,genome,/*fieldint*/-1);
+	    }
+	    FREE(matches);
+	    if (allocp) {
+	      FREE(chr);
+	    }
+	  }
+
+	  Genome_free(&genome);
+	  Univ_IIT_free(&chromosome_iit);
+	  IIT_free(&map_iit);
+	  FREE(dbversion);
+	  FREE(genomesubdir);
+	  FREE(fileroot);
+	  FREE(dbroot);
+
+	  return 0;
+	}
+
+      } else {
+	/* User is requesting a dump of the map file contents */
+	/* Don't need to find competing labels, since they are given, so divread can be READ_NONE */
+	if ((map_iit = IIT_read(iitfile,/*name*/NULL,true,/*divread*/READ_NONE,/*divstring*/NULL,
+				/*add_iit_p*/true)) == NULL) {
+	  fprintf(stderr,"Cannot open IIT file %s\n",iitfile);
+	  exit(9);
+
+	} else {
+	  FREE(iitfile);
+
+	  IIT_dump(map_iit,/*sortp*/false);
+	  IIT_free(&map_iit);
+	  FREE(dbversion);
+	  FREE(genomesubdir);
+	  FREE(fileroot);
+	  FREE(dbroot);
+	  return 0;
+	}
       }
-      if (Parserange_universal(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
-			       &chroffset,&chrlength,with_colon,genomesubdir,fileroot) == true) {
-	print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
-		       /*whole_chromosome_p*/true);
+      
+    } else {
+      /* User is requesting a dump of the genome */
+
+      iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
+				strlen(fileroot)+strlen(".chromosome.iit")+1,sizeof(char));
+      sprintf(iitfile,"%s/%s.chromosome.iit",genomesubdir,fileroot);
+      chromosome_iit = Univ_IIT_read(iitfile,/*readonlyp*/true,/*add_iit_p*/false);
+      FREE(iitfile);
+
+      if (snps_root == NULL || print_snps_mode == 0) {
+	genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
+			    uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
+      } else if (print_snps_mode == 2) {
+	genome = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
+			    uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
+      } else if (print_snps_mode == 1 || print_snps_mode == 3) {
+	genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
+			    uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
+	genomealt = Genome_new(snpsdir,fileroot,snps_root,/*genometype*/GENOME_OLIGOS,
+			       uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
       }
-      FREE(with_colon);
-    }
 
-    if (genomealt != NULL) {
-      Genome_free(&genomealt);
-    }
-    Genome_free(&genome);
+      for (indx = 1; indx <= Univ_IIT_total_nintervals(chromosome_iit); indx++) {
+	chr = Univ_IIT_label(chromosome_iit,indx,&allocp);
+	with_colon = (char *) CALLOC(strlen(chr)+strlen(":")+1,sizeof(char));
+	sprintf(with_colon,"%s:",chr);
+	if (allocp == true) {
+	  FREE(chr);
+	}
+	if (Parserange_universal(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
+				 &chroffset,&chrlength,with_colon,genomesubdir,fileroot) == true) {
+	  print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
+			 /*whole_chromosome_p*/true);
+	}
+	FREE(with_colon);
+      }
 
-    Univ_IIT_free(&chromosome_iit);
+      if (genomealt != NULL) {
+	Genome_free(&genomealt);
+      }
+      Genome_free(&genome);
 
-    return 0;
+      Univ_IIT_free(&chromosome_iit);
+      FREE(dbversion);
+      FREE(genomesubdir);
+      FREE(fileroot);
+      FREE(dbroot);
+
+      return 0;
+    }
 
   } else if (dumpchrp == true) {
     iitfile = (char *) CALLOC(strlen(genomesubdir)+strlen("/")+
@@ -1306,6 +1464,10 @@ main (int argc, char *argv[]) {
 
     Univ_IIT_dump_contigs(contig_iit,chromosome_iit,/*directionalp*/true);
     Univ_IIT_free(&contig_iit);
+    FREE(dbversion);
+    FREE(genomesubdir);
+    FREE(fileroot);
+    FREE(dbroot);
     return 0;
 
   }
@@ -1323,7 +1485,75 @@ main (int argc, char *argv[]) {
   FREE(iitfile);
 
 
-  if (argc >= 1) {
+  if (argc > 1 && map_iitfile != NULL) {
+    /* User is requesting multiple labels */
+
+    mapdir = Datadir_find_mapdir(user_mapdir,genomesubdir,fileroot);
+    iitfile = (char *) CALLOC(strlen(mapdir)+strlen("/")+
+			      strlen(map_iitfile)+strlen(".iit")+1,sizeof(char));
+    sprintf(iitfile,"%s/%s.iit",mapdir,map_iitfile);
+    if (Access_file_exists_p(iitfile) == false) {
+      fprintf(stderr,"Map file %s.iit not found in %s.  Available files:\n",map_iitfile,mapdir);
+      Datadir_list_directory(stderr,mapdir);
+      fprintf(stderr,"Either install file %s.iit or specify a full directory path\n",map_iitfile);
+      fprintf(stderr,"using the -M flag to gmap.\n");
+      exit(9);
+    }
+    
+    /* Don't need to find competing labels, since they are given, so divread can be READ_NONE */
+    if ((map_iit = IIT_read(iitfile,/*name*/NULL,true,/*divread*/READ_NONE,/*divstring*/NULL,
+			    /*add_iit_p*/true)) == NULL) {
+      fprintf(stderr,"Cannot open IIT file %s\n",iitfile);
+      exit(9);
+    } else {
+      ndivs = IIT_ndivs(map_iit);
+    }
+
+    if (uniquep == false) {
+      for (i = 0; i < argc; i++) {
+	if ((index = IIT_find_one(map_iit,argv[i])) < 0) {
+	  fprintf(stderr,"Cannot find %s in map file %s\n",argv[i],iitfile);
+	} else {
+	  print_interval(/*divstring*/NULL,index,map_iit,ndivs,chromosome_iit,/*genome*/NULL,/*fieldint*/-1);
+	}
+      }
+
+    } else {
+      /* User is requesting a comparison of labels for identifying uniqueness */
+      nmatches = argc;
+      matches = (int *) MALLOC(nmatches * sizeof(int));
+      for (i = 0; i < nmatches; i++) {
+	if ((matches[i] = IIT_find_one(map_iit,argv[i])) < 0) {
+	  fprintf(stderr,"Cannot find %s in map file %s\n",argv[i],iitfile);
+	  return 9;
+	}
+      }
+      
+      for (i = 0; i < nmatches; i++) {
+	unique_positions = IIT_unique_positions_given_others(map_iit,matches[i],matches,nmatches);
+	unique_splicep = IIT_unique_splicep_given_others(map_iit,matches[i],matches,nmatches);
+
+	label = IIT_label(map_iit,matches[i],&allocp);
+	printf(">%s",label);		/* Separator needed for multiple labels */
+	/* TODO: Print coordinates */
+	printf("\n");
+	if (allocp == true) {
+	  FREE(label);
+	}
+
+	annotation = IIT_annotation(&restofheader,map_iit,matches[i],&allocp);
+	genemap_print_unique(annotation,unique_positions,unique_splicep,/*print_geneline_p*/false);
+	if (allocp == true) {
+	  FREE(annotation);
+	}
+      }
+    }
+
+    FREE(iitfile);
+    FREE(mapdir);
+
+  } else if (argc >= 1) {
+    /* User is providing a single query */
     if (coordp == true) {
       debug(printf("coordp is true\n"));
       if (Parserange_universal(&segment,&revcomp,&genomicstart,&genomiclength,&chrstart,&chrend,
@@ -1422,7 +1652,9 @@ main (int argc, char *argv[]) {
 	if (nflanking > 0) {
 	  if (sign != +1) {
 	    for (i = nleftflanks-1; i >= 0; i--) {
-	      print_interval(divstring,leftflanks[i],map_iit,ndivs,chromosome_iit,genome,fieldint);
+	      if (codingp == false || IIT_gene_overlapp(map_iit,leftflanks[i],chrstart,chrend) == true) {
+		print_interval(divstring,leftflanks[i],map_iit,ndivs,chromosome_iit,genome,fieldint);
+	      }
 	    }
 	  }
 	  printf("====================\n");
@@ -1430,14 +1662,18 @@ main (int argc, char *argv[]) {
 	}
 
 	for (i = 0; i < nmatches; i++) {
-	  print_interval(divstring,matches[i],map_iit,ndivs,chromosome_iit,genome,fieldint);
+	  if (codingp == false || IIT_gene_overlapp(map_iit,matches[i],chrstart,chrend) == true) {
+	    print_interval(divstring,matches[i],map_iit,ndivs,chromosome_iit,genome,fieldint);
+	  }
 	}
 
 	if (nflanking > 0) {
 	  printf("====================\n");
 	  if (sign != -1) {
 	    for (i = 0; i < nrightflanks; i++) {
-	      print_interval(divstring,rightflanks[i],map_iit,ndivs,chromosome_iit,genome,fieldint);
+	      if (codingp == false || IIT_gene_overlapp(map_iit,rightflanks[i],chrstart,chrend) == true) {
+		print_interval(divstring,rightflanks[i],map_iit,ndivs,chromosome_iit,genome,fieldint);
+	      }
 	    }
 	  }
 	  FREE(rightflanks);
@@ -1452,7 +1688,13 @@ main (int argc, char *argv[]) {
 	if ((*iit = IIT_read(filename,/*name*/NULL,true,/*divread*/READ_NONE,/*divstring*/NULL,/*add_iit_p*/true)) == NULL) {
 	}
 #endif
-	if ((map_iit = IIT_read(iitfile,/*name*/NULL,true,/*divread*/READ_NONE,/*divstring*/NULL,
+	
+	if (uniquep == true) {
+	  divread = READ_ALL;	/* For subsequent search of overlapping genes */
+	} else {
+	  divread = READ_NONE;
+	}
+	if ((map_iit = IIT_read(iitfile,/*name*/NULL,true,divread,/*divstring*/NULL,
 				/*add_iit_p*/true)) == NULL) {
 	  fprintf(stderr,"Cannot open IIT file %s\n",iitfile);
 	  exit(9);
@@ -1494,7 +1736,7 @@ main (int argc, char *argv[]) {
     }
 
   } else {
-    /* Read from stdin */
+    /* argv == 0.  Read from stdin */
 
     if (map_iitfile == NULL) {
       /* Skip */
diff --git a/src/gmap.c b/src/gmap.c
index 26a6c10..b8e3386 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 200232 2016-11-08 00:55:43Z twu $";
+static char rcsid[] = "$Id: gmap.c 209123 2017-08-15 19:30:25Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -16,6 +16,7 @@ static char rcsid[] = "$Id: gmap.c 200232 2016-11-08 00:55:43Z twu $";
 #include <string.h>		/* For strcpy */
 #include <strings.h>		/* For rindex */
 #include <ctype.h>
+#include <math.h>		/* For rint */
 #ifdef HAVE_SSE2
 #include <emmintrin.h>
 #endif
@@ -125,6 +126,7 @@ static char rcsid[] = "$Id: gmap.c 200232 2016-11-08 00:55:43Z twu $";
 #define CHIMERA_PVALUE 0.01
 #define CHIMERA_FVALUE 6.634897	/* qnorm(CHIMERA_PVALUE/2)^2 */
 #define CHIMERA_SLOP 90	/* in nucleotides */
+#define CHIMERA_EXTEND 20	/* Was previously 8, but this missed exon-exon boundaries */
 
 #define MIN_MATCHES 20
 
@@ -316,8 +318,13 @@ static Access_mode_T genome_access = USE_ALLOCATE;
 static int min_intronlength = 9;
 static int max_deletionlength = 50;
 static int maxtotallen_bound = 2400000;
-static int maxintronlen = 200000; /* Was used previously in stage 1.  Now used only in stage 2 and Stage3_mergeable. */
+
+static bool split_large_introns_p = false;
+
+/* Need to set higher than 200,000 for many human genes, such as ALK */
+static int maxintronlen = 500000; /* Was used previously in stage 1.  Now used only in stage 2 and Stage3_mergeable. */
 static int maxintronlen_ends = 10000; /* Used in stage 3 */
+
 static int minendexon = 12;
 static int maxextension = 1000000; /* Used in stage 1.  Not adjustable by user */
 static int chimera_margin = 30;	/* Useful for finding readthroughs */
@@ -376,7 +383,7 @@ static bool timingp = false;
 static bool checkp = false;
 static int maxpaths_report = 5;	/* 0 means 1 if nonchimeric, 2 if chimeric */
 static bool quiet_if_excessive_p = false;
-static int suboptimal_score = 1000000;
+static double suboptimal_score_float = 0.50;
 static bool require_splicedir_p = false;
 
 
@@ -488,7 +495,7 @@ static struct option long_options[] = {
 #endif
   {"kmer", required_argument, 0, 'k'}, /* required_index1part, index1part */
   {"sampling", required_argument, 0, 0}, /* required_nterval, index1interval */
-  {"genomefull", no_argument, 0, 'G'}, /* uncompressedp */
+  {"genomefull", no_argument, 0, 'G'}, /* uncompressedp.  No longer supported. */
   {"gseg", required_argument, 0, 'g'}, /* user_genomicseg */
   {"selfalign", no_argument, 0, '1'}, /* user_selfalign_p */
   {"pairalign", no_argument, 0, '2'}, /* user_pairalign_p */
@@ -502,8 +509,11 @@ static struct option long_options[] = {
 #endif
   {"expand-offsets", required_argument, 0, 0}, /* expand_offsets_p */
   {"min-intronlength", required_argument, 0, 0}, /* min_intronlength */
+
   {"max-intronlength-middle", required_argument, 0, 0}, /* maxintronlen */
   {"max-intronlength-ends", required_argument, 0, 0}, /* maxintronlen_ends */
+  {"split-large-introns", no_argument, 0, 0},	      /* split_large_introns_p */
+
   {"trim-end-exons", required_argument, 0, 0}, /* minendexon */
   {"totallength", required_argument, 0, 'L'}, /* maxtotallen_bound */
   {"chimera-margin", required_argument, 0, 'x'}, /* chimera_margin */
@@ -555,7 +565,7 @@ static struct option long_options[] = {
   {"split-output", required_argument, 0, 0}, /* split_output_root */
   {"failed-input", required_argument, 0, 0}, /* failedinput_root */
   {"append-output", no_argument, 0, 0},	     /* appendp */
-  {"suboptimal-score", required_argument, 0, 0}, /* suboptimal_score */
+  {"suboptimal-score", required_argument, 0, 0}, /* suboptimal_score_float */
   {"require-splicedir", no_argument, 0, 0}, /* require_splicedir_p */
 
   {"gff3-add-separators", required_argument, 0, 0}, /* gff3_separators_p */
@@ -696,7 +706,7 @@ print_program_version () {
   fprintf(stdout,"\n");
 
 
-  fprintf(stdout,"SIMD functions:");
+  fprintf(stdout,"SIMD functions compiled:");
 #ifdef HAVE_ALTIVEC
   fprintf(stdout," Altivec");
 #endif
@@ -721,8 +731,11 @@ print_program_version () {
 #ifdef HAVE_SSE4_2
   fprintf(stdout," SSE4.2");
 #endif
-#ifdef HAVE_AVX
-  fprintf(stdout," AVX");
+#ifdef HAVE_AVX2
+  fprintf(stdout," AVX2");
+#endif
+#ifdef HAVE_AVX512
+  fprintf(stdout," AVX512");
 #endif
   fprintf(stdout,"\n");
 
@@ -894,6 +907,7 @@ evaluate_query (bool *poorp, bool *repetitivep, char *queryuc_ptr, int queryleng
 static Stage3_T *
 stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *second_absmq,
 		       List_T stage3list, bool mergedp, bool chimerap, bool remove_overlaps_p) {
+  List_T p;
   Stage3_T *array1, *array0, x, y;
   bool *eliminate;
   int norig_primary, norig_altloc, i_primary, i_altloc, i, j;
@@ -901,7 +915,8 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
 
   Univcoord_T alias_start, alias_end;
 
-  debug2(printf("Entering stage3array_from_list with %d entries\n",List_length(stage3list)));
+  debug(printf("Entering stage3array_from_list with %d entries\n",List_length(stage3list)));
+
   /* Stage3_recompute_goodness(stage3list); -- No longer necessary */
   Stage3_compute_mapq(stage3list);
 
@@ -910,21 +925,23 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
     *second_absmq = 0;
     return (Stage3_T *) NULL;
 
+#if 0
   } else if (mergedp == true) {
-    debug2(printf("mergedp is true\n"));
+    debug(printf("mergedp is true\n"));
     Stage3_count_paths(&norig_primary,&norig_altloc,stage3list);
-    array0 = (Stage3_T *) List_to_array(stage3list,NULL);
+    array0 = (Stage3_T *) List_to_array_out(stage3list,NULL);
     List_free(&stage3list);
     *first_absmq = 0;
     *second_absmq = 0;
     *npaths_primary = norig_primary;
     *npaths_altloc = norig_altloc;
     return array0;
+#endif
 
   } else if (chimerap == true) {
-    debug2(printf("chimerap is true\n"));
+    debug(printf("chimerap is true\n"));
     Stage3_count_paths(&norig_primary,&norig_altloc,stage3list);
-    array0 = (Stage3_T *) List_to_array(stage3list,NULL);
+    array0 = (Stage3_T *) List_to_array_out(stage3list,NULL);
     List_free(&stage3list);
     *first_absmq = Stage3_absmq_score(array0[0]);
     if (norig_primary + norig_altloc <= 2) {
@@ -938,13 +955,22 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
     return array0;
 
   } else if (remove_overlaps_p == false) {
-    debug2(printf("remove_overlaps_p is false\n"));
+    debug(printf("remove_overlaps_p is false\n"));
     Stage3_count_paths(&norig_primary,&norig_altloc,stage3list);
-    array0 = (Stage3_T *) List_to_array(stage3list,NULL);
+    array0 = (Stage3_T *) List_to_array_out(stage3list,NULL);
     List_free(&stage3list);
     qsort(array0,norig_primary + norig_altloc,sizeof(Stage3_T),Stage3_cmp);
 
-    threshold_score = Stage3_goodness(array0[0]) - suboptimal_score;
+    if (suboptimal_score_float < 1.0) {
+      threshold_score = Stage3_goodness(array0[0]) * suboptimal_score_float;
+      debug(printf("threshold score %d = goodness %d * suboptimal score_float %f\n",
+		   threshold_score,Stage3_goodness(array0[0]),suboptimal_score_float));
+    } else {
+      threshold_score = Stage3_goodness(array0[0]) - (int) suboptimal_score_float;
+      debug(printf("threshold score %d = goodness %d - suboptimal score %d\n",
+		   threshold_score,Stage3_goodness(array0[0]),(int) suboptimal_score_float));
+    }
+
     if (Stage3_altloc_chr(&alias_start,&alias_end,array0[0]) == false) {
       i_primary = 1;
       i_altloc = 0;
@@ -961,6 +987,11 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
       }
       i++;
     }
+    while (i < norig_primary + norig_altloc) {
+      Stage3_free(&(array0[i]));
+      i++;
+    }
+
     *npaths_primary = i_primary;
     *npaths_altloc = i_altloc;
     *first_absmq = Stage3_absmq_score(array0[0]);
@@ -973,7 +1004,7 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
     return array0;
 
   } else {
-    debug2(printf("remove_overlaps_p is true\n"));
+    debug(printf("remove_overlaps_p is true\n"));
     Stage3_count_paths(&norig_primary,&norig_altloc,stage3list);
     eliminate = (bool *) CALLOCA(norig_primary + norig_altloc,sizeof(bool));
 
@@ -981,6 +1012,7 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
     array0 = (Stage3_T *) MALLOCA((norig_primary + norig_altloc) * sizeof(Stage3_T));
     List_fill_array_and_free((void **) array0,&stage3list);
     qsort(array0,norig_primary + norig_altloc,sizeof(Stage3_T),Stage3_cmp);
+
     for (i = 0; i < norig_primary + norig_altloc; i++) {
       x = array0[i];
       debug(printf("%d: chr %d:%u..%u, goodness %d, matches %d, npairs %d\n",
@@ -993,6 +1025,7 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
       }
     }
 
+
     *npaths_primary = *npaths_altloc = 0;
     for (i = 0; i < norig_primary + norig_altloc; i++) {
       if (eliminate[i] == false) {
@@ -1017,7 +1050,16 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
     FREEA(array0);
     FREEA(eliminate);
 
-    threshold_score = Stage3_goodness(array1[0]) - suboptimal_score;
+    if (suboptimal_score_float < 1.0) {
+      threshold_score = Stage3_goodness(array0[0]) * suboptimal_score_float;
+      debug(printf("threshold score %d = goodness %d * suboptimal score %f\n",
+		   threshold_score,Stage3_goodness(array0[0]),suboptimal_score_float));
+    } else {
+      threshold_score = Stage3_goodness(array0[0]) - (int) suboptimal_score_float;
+      debug(printf("threshold score %d = goodness %d - suboptimal score %d\n",
+		   threshold_score,Stage3_goodness(array0[0]),(int) suboptimal_score_float));
+    }
+
     if (Stage3_altloc_chr(&alias_start,&alias_end,array0[0]) == false) {
       i_primary = 1;
       i_altloc = 0;
@@ -1034,6 +1076,11 @@ stage3array_from_list (int *npaths_primary, int *npaths_altloc, int *first_absmq
       }
       i++;
     }
+    while (i < (*npaths_primary) + (*npaths_altloc)) {
+      Stage3_free(&(array1[i]));
+      i++;
+    }
+
     *npaths_primary = i_primary;
     *npaths_altloc = i_altloc;
     *first_absmq = Stage3_absmq_score(array1[0]);
@@ -1118,6 +1165,7 @@ update_stage3list (List_T stage3list, Sequence_T queryseq,
   }
 #endif
 
+  debug(printf("Entering update_stage3list with %d results\n",List_length(stage3list)));
   debug2(printf("Beginning Stage2_compute with chrstart %u and chrend %u and query_subseq_offset %d\n",
 		chrstart,chrend,Sequence_subseq_offset(queryseq)));
   all_stage2results = Stage2_compute(Sequence_trimpointer(queryseq),Sequence_trimpointer(queryuc),
@@ -1179,6 +1227,7 @@ update_stage3list (List_T stage3list, Sequence_T queryseq,
 				    /*skiplength*/Sequence_skiplength(queryseq),
 				    /*trimlength*/Sequence_trimlength(queryseq),
 				    straintype,strain,altstrain_iit)) != NULL) {
+      debug(printf("Pushing %p onto stage3list\n",stage3));
       stage3list = List_push(stage3list,(void *) stage3);
     }
 
@@ -1292,6 +1341,7 @@ stage3_from_usersegment (int *npaths_primary, int *npaths_altloc, int *first_abs
 }
 
 
+#if 0
 static List_T
 stage3list_remove_duplicates (List_T stage3list) {
   List_T unique = NULL;
@@ -1355,6 +1405,7 @@ stage3list_remove_duplicates (List_T stage3list) {
     return unique;
   }
 }
+#endif
 
 
 #if 0
@@ -1741,18 +1792,23 @@ middle_piece_chimera_p (int *querystart, int *queryend, Stage3_T from, Stage3_T
 }
 
 
-/* Returns nonjoinable */
 static List_T
 local_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1, 
 		      Stage3_T **stage3array_sub2, int *npaths_sub2,
 		      List_T stage3list) {
-  List_T nonjoinable = NULL, p;
+  List_T p;
   Stage3_T from, to, stage3;
   Stage3_T *by_queryend, *by_querystart;
-  int npaths, i, j, k;
+  Chrnum_T chrnum;
+  int npaths, i, j, k, kstart, kend;
   int queryend;
 
   debug2(printf("local_separate_paths called with list length %d\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    printf("%p\n",List_head(p));
+  }
+#endif
 
   if (stage3list == NULL) {
     *stage3array_sub1 = (Stage3_T *) NULL;
@@ -1767,52 +1823,70 @@ local_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
     }
   }
 
-  by_queryend = (Stage3_T *) List_to_array_n(&npaths,stage3list);
-  qsort(by_queryend,npaths,sizeof(Stage3_T),Stage3_queryend_cmp);
+  by_queryend = (Stage3_T *) List_to_array_out_n(&npaths,stage3list);
+  qsort(by_queryend,npaths,sizeof(Stage3_T),Stage3_chrnum_queryend_cmp);
 
-  by_querystart = (Stage3_T *) List_to_array_n(&npaths,stage3list);
-  qsort(by_querystart,npaths,sizeof(Stage3_T),Stage3_querystart_cmp);
+  by_querystart = (Stage3_T *) List_to_array_out_n(&npaths,stage3list);
+  qsort(by_querystart,npaths,sizeof(Stage3_T),Stage3_chrnum_querystart_cmp);
 
 #ifdef DEBUG2
   for (i = 0; i < npaths; i++) {
     stage3 = (Stage3_T) by_queryend[i];
-    printf("from: %p query %d..%d, genomic %u..%u\t",
+    printf("from: %p query %d..%d, chrnum %d, genomic %u..%u\t",
 	   stage3,Stage3_querystart(stage3),Stage3_queryend(stage3),
-	   Stage3_genomicstart(stage3),Stage3_genomicend(stage3));
+	   Stage3_chrnum(stage3),Stage3_genomicstart(stage3),Stage3_genomicend(stage3));
 
     stage3 = (Stage3_T) by_querystart[i];
-    printf("to: %p query %d..%d, genomic %u..%u\n",
+    printf("to: %p query %d..%d, chrnum %d, genomic %u..%u\n",
 	   stage3,Stage3_querystart(stage3),Stage3_queryend(stage3),
-	   Stage3_genomicstart(stage3),Stage3_genomicend(stage3));
+	   Stage3_chrnum(stage3),Stage3_genomicstart(stage3),Stage3_genomicend(stage3));
   }
 #endif
 
-  j = 0;
+  kend = 0;
   for (i = 0; i < npaths; i++) {
+    debug2(printf("queryend %d:",i));
     from = by_queryend[i];
-    queryend = Stage3_queryend(from);
 
-    while (j < npaths && Stage3_querystart(by_querystart[j]) < queryend + CHIMERA_SLOP) {
+    /* Find matching chromosomal bounds for querystart */
+    chrnum = Stage3_chrnum(from);
+    while (kend < npaths && Stage3_chrnum(by_querystart[kend]) == chrnum) {
+      kend++;
+    }
+    kstart = kend - 1;
+    while (kstart >= 0 && Stage3_chrnum(by_querystart[kstart]) == chrnum) {
+      kstart--;
+    }
+    kstart++;
+    debug2(printf(" querystart bounded by %d..%d:",kstart,kend));
+
+
+    /* Find matching querystart */
+    queryend = Stage3_queryend(from);
+    j = kstart;
+    while (j < kend && Stage3_querystart(by_querystart[j]) < queryend + CHIMERA_SLOP) {
       j++;
     }
     j--;
 
-    while (j >= 0 && Stage3_querystart(by_querystart[j]) > queryend - CHIMERA_SLOP) {
+    while (j >= kstart && Stage3_querystart(by_querystart[j]) > queryend - CHIMERA_SLOP) {
       j--;
     }
     j++;
 
-    while (j < npaths && Stage3_querystart(by_querystart[j]) < queryend + CHIMERA_SLOP) {
+    while (j < kend && Stage3_querystart(by_querystart[j]) < queryend + CHIMERA_SLOP) {
       to = by_querystart[j];
 
+      debug2(printf(" %d",j));
       if (Chimera_local_join_p(from,to,CHIMERA_SLOP) == true) {
-	debug2(printf("Found local join from %d to %d\n",i,j));
+	debug2(printf("(to %d)",i));
 	Stage3_set_joinable_left(from);
 	Stage3_set_joinable_right(to);
       }
 
       j++;
     }
+    debug2(printf("\n"));
   }
 
   FREE(by_querystart);
@@ -1822,6 +1896,8 @@ local_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
   *npaths_sub1 = *npaths_sub2 = 0;
   for (p = stage3list; p != NULL; p = List_next(p)) {
     stage3 = (Stage3_T) List_head(p);
+    debug2(printf("Stage3 %p.  joinable_left_p %d, joinable_right_p %d\n",
+		  stage3,Stage3_joinable_left_p(stage3),Stage3_joinable_right_p(stage3)));
     if (Stage3_joinable_left_p(stage3) == true) {
       debug2(printf("Putting stage3 %p into local sub1\n",stage3));
       (*npaths_sub1)++;
@@ -1837,42 +1913,55 @@ local_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
     *npaths_sub1 = 0;
     *stage3array_sub2 = (Stage3_T *) NULL;
     *npaths_sub2 = 0;
+    
   } else {
     *stage3array_sub1 = (Stage3_T *) MALLOC((*npaths_sub1) * sizeof(Stage3_T)); /* Return value */
     *stage3array_sub2 = (Stage3_T *) MALLOC((*npaths_sub2) * sizeof(Stage3_T)); /* Return value */
     j = k = 0;
     for (p = stage3list; p != NULL; p = List_next(p)) {
       stage3 = (Stage3_T) List_head(p);
-      if (Stage3_joinable_left_p(stage3) == false && Stage3_joinable_right_p(stage3) == false) {
-	nonjoinable = List_push(nonjoinable,stage3);
-      } else {
-	/* Note: it is possible that the same stage3 object gets put into both lists */
-	if (Stage3_joinable_left_p(stage3) == true) {
-	  (*stage3array_sub1)[j++] = stage3;
-	}
-	if (Stage3_joinable_right_p(stage3) == true) {
-	  (*stage3array_sub2)[k++] = stage3;
-	}
+      /* Note: it is possible that the same stage3 object gets put into both lists */
+      if (Stage3_joinable_left_p(stage3) == true) {
+	debug2(printf("Putting %p into sub1\n",stage3));
+	(*stage3array_sub1)[j++] = stage3;
+      }
+      if (Stage3_joinable_right_p(stage3) == true) {
+	debug2(printf("Putting %p into sub2\n",stage3));
+	(*stage3array_sub2)[k++] = stage3;
       }
     }
   }
 
-  return nonjoinable;
+  debug2(printf("local_separate_paths returning %d paths\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    stage3 = (Stage3_T) List_head(p);
+    printf("%p %p\n",stage3,Stage3_pairs(stage3));
+  }
+#endif
+
+  return stage3list;
 }
 
 
-/* Returns nonjoinable */
 static List_T
 distant_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1, 
 			Stage3_T **stage3array_sub2, int *npaths_sub2,
 			List_T stage3list) {
-  List_T nonjoinable = NULL, p;
+  List_T p;
   Stage3_T from, to, stage3;
   Stage3_T *by_queryend, *by_querystart;
   int npaths, i, j, k;
   int queryend;
 
   debug2(printf("distant_separate_paths called with list length %d\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    stage3 = (Stage3_T) List_head(p);
+    printf("%p %p\n",stage3,Stage3_pairs(stage3));
+  }
+#endif
+
 
   if (stage3list == NULL) {
     *stage3array_sub1 = (Stage3_T *) NULL;
@@ -1887,10 +1976,10 @@ distant_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
     }
   }
 
-  by_queryend = (Stage3_T *) List_to_array_n(&npaths,stage3list);
+  by_queryend = (Stage3_T *) List_to_array_out_n(&npaths,stage3list);
   qsort(by_queryend,npaths,sizeof(Stage3_T),Stage3_queryend_cmp);
 
-  by_querystart = (Stage3_T *) List_to_array_n(&npaths,stage3list);
+  by_querystart = (Stage3_T *) List_to_array_out_n(&npaths,stage3list);
   qsort(by_querystart,npaths,sizeof(Stage3_T),Stage3_querystart_cmp);
 
   j = 0;
@@ -1947,46 +2036,50 @@ distant_separate_paths (Stage3_T **stage3array_sub1, int *npaths_sub1,
     j = k = 0;
     for (p = stage3list; p != NULL; p = List_next(p)) {
       stage3 = (Stage3_T) List_head(p);
-      if (Stage3_joinable_left_p(stage3) == false && Stage3_joinable_right_p(stage3) == false) {
-	nonjoinable = List_push(nonjoinable,stage3);
-      } else {
-	/* Note: it is possible that the same stage3 object gets put into both lists */
-	if (Stage3_joinable_left_p(stage3) == true) {
-	  debug2(printf("Putting stage3 %p into distant sub1\n",stage3));
-	  (*stage3array_sub1)[j++] = stage3;
-	}
-	if (Stage3_joinable_right_p(stage3) == true) {
-	  debug2(printf("Putting stage3 %p into distant sub2\n",stage3));
-	  (*stage3array_sub2)[k++] = stage3;
-	}
+      /* Note: it is possible that the same stage3 object gets put into both lists */
+      if (Stage3_joinable_left_p(stage3) == true) {
+	debug2(printf("Putting stage3 %p into distant sub1\n",stage3));
+	(*stage3array_sub1)[j++] = stage3;
+      }
+      if (Stage3_joinable_right_p(stage3) == true) {
+	debug2(printf("Putting stage3 %p into distant sub2\n",stage3));
+	(*stage3array_sub2)[k++] = stage3;
       }
     }
   }
 
-  return nonjoinable;
+  debug2(printf("distant_separate_paths returning %d paths\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    stage3 = (Stage3_T) List_head(p);
+    printf("%p %p\n",stage3,Stage3_pairs(stage3));
+  }
+#endif
+
+  return stage3list;
 }
 
 
-/* Returns a list with only one Stage3_T object */
 static List_T
-merge_left_and_right_readthrough (bool *mergedp, Stage3_T *stage3array_sub1, int npaths_sub1, int bestfrom,
+merge_left_and_right_readthrough (bool *mergedp, List_T stage3list,
+				  Stage3_T *stage3array_sub1, int npaths_sub1, int bestfrom,
 				  Stage3_T *stage3array_sub2, int npaths_sub2, int bestto,
-				  List_T nonjoinable, int breakpoint, int queryntlength,
+				  int breakpoint, int queryntlength,
 #ifdef PMAP
 				  char *queryaaseq_ptr,
 #endif
 				  char *queryseq_ptr, char *queryuc_ptr,
 				  Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
 				  Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
-  List_T newstage3list, p;
-  Stage3_T best0, best1, *array, last, freed0 = NULL, freed1 = NULL;
+  List_T newstage3list = NULL, p;
+  Stage3_T best0, best1, stage3, *array;
   int i, k;
 
   best0 = stage3array_sub1[bestfrom];
   best1 = stage3array_sub2[bestto];
 
-  debug2(printf("\nEntering merge_left_and_right_readthrough with bestfrom %d: %p, bestto %d: %p, and nonjoinable %d\n",
-		bestfrom,best0,bestto,best1,List_length(nonjoinable)));
+  debug2(printf("\nEntering merge_left_and_right_readthrough with bestfrom %d: %p, bestto %d: %p\n",
+		bestfrom,best0,bestto,best1));
 
 #if 0
   /* Checked better by Stage3_mergeable */
@@ -1995,13 +2088,13 @@ merge_left_and_right_readthrough (bool *mergedp, Stage3_T *stage3array_sub1, int
     debug2(printf("sensedirs are not compatible: %d and %d\n",
 		  Stage3_sensedir(best0),Stage3_sensedir(best1)));
     if (Stage3_npairs(best0) > Stage3_npairs(best1)) {
-      newstage3list = (List_T) NULL;
-      newstage3list = List_push(newstage3list,(void *) best0);
+      debug(printf("Pushing %p onto stage3list\n",best0));
+      stage3list = List_push(stage3list,(void *) best0);
       freed1 = best1;
       Stage3_free(&best1);
     } else {
-      newstage3list = (List_T) NULL;
-      newstage3list = List_push(newstage3list,(void *) best1);
+      debug(printf("Pushing %p onto stage3list\n",best1));
+      stage3list = List_push(stage3list,(void *) best1);
       freed0 = best0;
       Stage3_free(&best0);
     }
@@ -2021,20 +2114,12 @@ merge_left_and_right_readthrough (bool *mergedp, Stage3_T *stage3array_sub1, int
 			 pairpool,dynprogL,dynprogM,dynprogR,
 			 maxpeelback,oligoindices_minor,diagpool,cellpool) == false) {
 
-    newstage3list = (List_T) NULL;
-    newstage3list = List_push(newstage3list,(void *) best0);
-    newstage3list = List_push(newstage3list,(void *) best1);
-    for (p = nonjoinable; p != NULL; p = List_next(p)) {
-      debug2(printf("1.  Pushing readthrough nonjoinable stage3 %p.  %d..%d\n",
-		    List_head(p),Stage3_querystart(List_head(p)),Stage3_queryend(List_head(p))));
-      if (List_head(p) == NULL) {
-	debug2(printf("Unexpected: Have a NULL stage3 in nonjoinable\n"));
-      } else {
-	newstage3list = List_push(newstage3list,(void *) List_head(p));
-      }
-    }
-    *mergedp = false;
-    return List_reverse(newstage3list);
+    /* debug(printf("Pushing %p onto stage3list\n",best0)); */
+    /* debug(printf("Pushing %p onto stage3list\n",best1)); */
+    /* stage3list = List_push(stage3list,(void *) best0); */
+    /* stage3list = List_push(stage3list,(void *) best1); */
+    /* *mergedp = false; */
+    return stage3list;
 
   } else {
     debug2(printf("done with Stage3_merge_local"));
@@ -2042,61 +2127,37 @@ merge_left_and_right_readthrough (bool *mergedp, Stage3_T *stage3array_sub1, int
     debug2(printf("Rearranging paths\n"));
     debug2(printf("Changing genomicend of merged stage3 from %u to %u\n",Stage3_genomicend(best0),Stage3_genomicend(best1)));
     Stage3_set_genomicend(best0,Stage3_genomicend(best1));
-    newstage3list = (List_T) NULL;
+    debug(printf("Pushing %p onto newstage3list\n",best0));
     newstage3list = List_push(newstage3list,(void *) best0);
     debug2(printf("Freeing best1 %p\n",best1));
-    freed1 = best1;
-    Stage3_free(&best1);
-    debug2(printf("Pushing stage3 %p: ",best0));
+    /* freed1 = best1; */
     debug2(Stage3_print_ends(best0));
     *mergedp = true;
 
-    if (npaths_sub1 + npaths_sub2 > 2) {
+    
+    if (List_length(stage3list) > 2) {
       /* Push rest of results, taking care not to have duplicates */
-
-      array = (Stage3_T *) MALLOCA((npaths_sub1 + npaths_sub2 - 2) * sizeof(Stage3_T));
+      array = (Stage3_T *) MALLOCA((List_length(stage3list) - 2) * sizeof(Stage3_T));
       k = 0;
-      for (i = 0; i < npaths_sub1; i++) {
-	if (i != bestfrom) {
-	  debug2(printf("array %d is now sub1 %d: %p\n",k,i,stage3array_sub1[i]));
-	  array[k++] = stage3array_sub1[i];
-	}
-      }
-      for (i = 0; i < npaths_sub2; i++) {
-	if (i != bestto) {
-	  debug2(printf("array %d is now sub2 %d: %p\n",k,i,stage3array_sub2[i]));
-	  array[k++] = stage3array_sub2[i];
-	}
-      }
-      qsort(array,npaths_sub1+npaths_sub2-2,sizeof(Stage3_T),Stage3_identity_cmp);
-
-      last = (Stage3_T) NULL;
-      for (i = 0; i < npaths_sub1+npaths_sub2-2; i++) {
-	if (array[i] == last) {
-	  /* Skip */
-	  debug2(printf("array %d: Skipping stage3 %p, because just pushed, so duplicate\n",i,array[i]));
-	} else if (array[i] == best0 || array[i] == best1) {
+      for (p = stage3list; p != NULL; p = List_next(p)) {
+	stage3 = (Stage3_T) List_head(p);
+	if (stage3 == best0) {
 	  /* Skip */
-	  debug2(printf("array %d: Skipping stage3 %p, because in chimera\n",i,array[i]));
-	} else if (array[i] == freed0 || array[i] == freed1) {
+	} else if (stage3 == best1) {
 	  /* Skip */
-	  debug2(printf("array %d: Skipping stage3 %p, because already freed\n",i,array[i]));
 	} else {
-	  debug2(printf("array %d: Pushing stage3 %p\n",i,array[i]));
-	  newstage3list = List_push(newstage3list,(void *) array[i]);
-	  last = array[i];
+	  array[k++] = stage3;
+	  debug(printf("Pushing %p onto newstage3list\n",stage3));
+	  newstage3list = List_push(newstage3list,(void *) stage3);
 	}
       }
-
+      qsort(array,k,sizeof(Stage3_T),Stage3_identity_cmp);
       FREEA(array);
     }
 
-    for (p = nonjoinable; p != NULL; p = List_next(p)) {
-      debug2(printf("2.  Pushing readthrough nonjoinable stage3 %p.  %d..%d\n",
-		    List_head(p),Stage3_querystart(List_head(p)),Stage3_queryend(List_head(p))));
-      newstage3list = List_push(newstage3list,(void *) List_head(p));
-    }
+    Stage3_free(&best1);
 
+    List_free(&stage3list);
     return List_reverse(newstage3list);
   }
 }
@@ -2106,16 +2167,16 @@ merge_left_and_right_readthrough (bool *mergedp, Stage3_T *stage3array_sub1, int
 static List_T
 merge_left_and_right_transloc (Stage3_T *stage3array_sub1, int npaths_sub1, int bestfrom,
 			       Stage3_T *stage3array_sub2, int npaths_sub2, int bestto,
-			       List_T nonjoinable) {
+			       List_T stage3list) {
   List_T newstage3list, p;
-  Stage3_T best0, best1, *array, last;
+  Stage3_T best0, best1, stage3, *array;
   int i, k;
 
   best0 = stage3array_sub1[bestfrom];
   best1 = stage3array_sub2[bestto];
 
-  debug2(printf("\nEntering merge_left_and_right_transloc with bestfrom %d: %p, bestto %d: %p, and nonjoinable %d\n",
-		bestfrom,best0,bestto,best1,List_length(nonjoinable)));
+  debug2(printf("\nEntering merge_left_and_right_transloc with bestfrom %d: %p, bestto %d: %p, and stage3list %d\n",
+		bestfrom,best0,bestto,best1,List_length(stage3list)));
 
   debug2(printf("Before Stage3_merge_chimera, best0 is %p, query %d..%d\n",
 		best0,Stage3_querystart(best0),Stage3_queryend(best0)));
@@ -2126,54 +2187,34 @@ merge_left_and_right_transloc (Stage3_T *stage3array_sub1, int npaths_sub1, int
 
   debug2(printf("Rearranging paths\n"));
   newstage3list = (List_T) NULL;
+
+  debug(printf("Pushing %p onto newstage3list\n",best0));
+  debug(printf("Pushing %p onto newstage3list\n",best1));
   newstage3list = List_push(newstage3list,(void *) best0);
   newstage3list = List_push(newstage3list,(void *) best1);
-  debug2(printf("Pushing stage3 %p, ",best0));
   debug2(Stage3_print_ends(best0));
-  debug2(printf("Pushing stage3 %p, ",best1));
   debug2(Stage3_print_ends(best1));
 
-  if (npaths_sub1 + npaths_sub2 > 2) {
+  
+  if (List_length(stage3list) > 2) {
     /* Push rest of results, taking care not to have duplicates */
-
-    array = (Stage3_T *) MALLOCA((npaths_sub1 + npaths_sub2 - 2) * sizeof(Stage3_T));
+    array = (Stage3_T *) MALLOCA((List_length(stage3list) - 2) * sizeof(Stage3_T));
     k = 0;
-    for (i = 0; i < npaths_sub1; i++) {
-      if (i != bestfrom) {
-	array[k++] = stage3array_sub1[i];
-      }
-    }
-    for (i = 0; i < npaths_sub2; i++) {
-      if (i != bestto) {
-	array[k++] = stage3array_sub2[i];
-      }
-    }
-    qsort(array,npaths_sub1+npaths_sub2-2,sizeof(Stage3_T),Stage3_identity_cmp);
-
-    last = (Stage3_T) NULL;
-    for (i = 0; i < npaths_sub1+npaths_sub2-2; i++) {
-      if (array[i] == last) {
-	/* Skip */
-	debug2(printf("Skipping stage3 %p, because just pushed\n",array[i]));
-      } else if (array[i] == best0 || array[i] == best1) {
+    for (p = stage3list; p != NULL; p = List_next(p)) {
+      stage3 = (Stage3_T) List_head(p);
+      if (stage3 == best0 || stage3 == best1) {
 	/* Skip */
-	debug2(printf("Skipping stage3 %p, because in chimera\n",array[i]));
       } else {
-	debug2(printf("Pushing stage3 %p.  ",array[i]));
-	debug2(Stage3_print_ends(array[i]));
-	newstage3list = List_push(newstage3list,(void *) array[i]);
-	last = array[i];
+	array[k++] = stage3;
+	debug(printf("Pushing %p onto newstage3list\n",stage3));
+	newstage3list = List_push(newstage3list,(void *) stage3);
       }
     }
-
+    qsort(array,k,sizeof(Stage3_T),Stage3_identity_cmp);
     FREEA(array);
   }
-
-  for (p = nonjoinable; p != NULL; p = List_next(p)) {
-    debug2(printf("Pushing transloc nonjoinable stage3 %p\n",List_head(p)));
-    newstage3list = List_push(newstage3list,(void *) List_head(p));
-  }
-
+    
+  List_free(&stage3list);
   return List_reverse(newstage3list);
 }
 
@@ -2189,7 +2230,7 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
 		 Sequence_T queryseq, Sequence_T queryuc,
 		 int queryntlength, Genome_T genome, Genome_T genomealt,
 		 Univ_IIT_T chromosome_iit, Pairpool_T pairpool) {
-  int breakpoint, leftpos, rightpos, midpos;
+  int breakpoint, rangelow, rangehigh, leftpos, rightpos, midpos;
   int maxpeelback_from, maxpeelback_to;
   int found_cdna_direction, try_cdna_direction;
   char comp;			/* Not really used anywhere */
@@ -2202,14 +2243,14 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
 
   if (Stage3_queryend(from) < Stage3_querystart(to)) {
     /* Gap exists between the two parts */
-    if ((leftpos = Stage3_queryend(from) - 8) < 0) {
+    if ((leftpos = Stage3_queryend(from) - CHIMERA_EXTEND) < 0) {
       leftpos = 0;
     }
-    if ((rightpos = Stage3_querystart(to) + 8) >= queryntlength) {
+    if ((rightpos = Stage3_querystart(to) + CHIMERA_EXTEND) >= queryntlength) {
       rightpos = queryntlength - 1;
     }
-    maxpeelback_from = 8;
-    maxpeelback_to = 8;
+    maxpeelback_from = CHIMERA_EXTEND;
+    maxpeelback_to = CHIMERA_EXTEND;
     debug2(printf("overlap: leftpos %d, rightpos %d, queryntlength %d, maxpeelback_from %d, maxpeelback_to %d\n",
 		  leftpos,rightpos,queryntlength,maxpeelback_from,maxpeelback_to));
 
@@ -2229,10 +2270,10 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
     
   } else {
     /* Two parts overlap */
-    if ((leftpos = Stage3_querystart(to) - 8) < 0) {
+    if ((leftpos = Stage3_querystart(to) - CHIMERA_EXTEND) < 0) {
       leftpos = 0;
     }
-    if ((rightpos = Stage3_queryend(from) + 8) >= queryntlength) {
+    if ((rightpos = Stage3_queryend(from) + CHIMERA_EXTEND) >= queryntlength) {
       rightpos = queryntlength - 1;
     }
     midpos = (leftpos+rightpos)/2;
@@ -2260,11 +2301,11 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
 #endif
   }
 
-  debug2(printf("Before Stage3_extend_right, bestfrom is %p, query %d..%d\n",
-		from,Stage3_querystart(from),Stage3_queryend(from)));
+  debug2(printf("Before Stage3_extend_right, bestfrom is %p, query %d..%d, rightpos %d, pairs %p\n",
+		from,Stage3_querystart(from),Stage3_queryend(from),rightpos,Stage3_pairs(from)));
   debug2(Stage3_print_ends(from));
-  debug2(printf("Before Stage3_extend_left, bestto is %p, query %d..%d\n",
-		to,Stage3_querystart(to),Stage3_queryend(to)));
+  debug2(printf("Before Stage3_extend_left, bestto is %p, query %d..%d, leftpos %d, pairs %p\n",
+		to,Stage3_querystart(to),Stage3_queryend(to),leftpos,Stage3_pairs(to)));
   debug2(Stage3_print_ends(to));
   
   Stage3_extend_right(from,/*goal*/rightpos,
@@ -2289,44 +2330,51 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
 #endif
 		     max_extend_p,pairpool,maxpeelback_to);
 
-  debug2(printf("Before Chimera_find_breakpoint, bestfrom is %p, query %d..%d\n",
-		from,Stage3_querystart(from),Stage3_queryend(from)));
+  debug2(printf("Before Chimera_find_breakpoint, bestfrom is %p, query %d..%d, pairs %p\n",
+                 from,Stage3_querystart(from),Stage3_queryend(from),Stage3_pairs(from)));
   debug2(Stage3_print_ends(from));
-  debug2(printf("Before Chimera_find_breakpoint, bestto is %p, query %d..%d\n",
-		to,Stage3_querystart(to),Stage3_queryend(to)));
+  debug2(printf("Before Chimera_find_breakpoint, bestto is %p, query %d..%d, pairs %p\n",
+                 to,Stage3_querystart(to),Stage3_queryend(to),Stage3_pairs(to)));
   debug2(Stage3_print_ends(to));
 
-  debug2(printf("Before Chimera_find_exonexon, bestfrom is %p, query %d..%d\n",
-		from,Stage3_querystart(from),Stage3_queryend(from)));
-  debug2(printf("Before Chimera_find_exonexon, bestto is %p, query %d..%d\n",
-		to,Stage3_querystart(to),Stage3_queryend(to)));
-
-  if ((*exonexonpos = Chimera_find_exonexon(&found_cdna_direction,&try_cdna_direction,
-					    &(*donor1),&(*donor2),&(*acceptor2),&(*acceptor1),
-					    &comp,&(*donor_watsonp),&(*acceptor_watsonp),&(*donor_prob),&(*acceptor_prob),
-					    /*left_part*/from,/*right_part*/to,genome,genomealt ? genomealt : genome,
-					    chromosome_iit,/*breakpoint_start*/Stage3_querystart(to),
-					    /*breakpoint_end*/Stage3_queryend(from))) > 0) {
-    breakpoint = *chimerapos = *chimeraequivpos = *exonexonpos;
-    *cdna_direction = found_cdna_direction;
-    debug2(printf("Exon-exon boundary found at %d, which is breakpoint.  Comp = %c\n",
-		  *exonexonpos,comp));
-    return breakpoint;
+  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&left_chrlength,chromosome_iit,Stage3_chrnum(from),circular_typeint);
+  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&right_chrlength,chromosome_iit,Stage3_chrnum(to),circular_typeint);
+
+  if ((*chimerapos = Chimera_find_breakpoint(&(*chimeraequivpos),&rangelow,&rangehigh,
+					     &(*donor1),&(*donor2),&(*acceptor2),&(*acceptor1),
+					     from,to,queryntlength,genome,left_chrlength,right_chrlength)) < 0) {
+    /* TODO: Allow finding a breakpoint for DNA-Seq, which needs no donor or acceptor nucleotides */
+    debug2(printf("Chimera_find_breakpoint returns no value\n"));
+    *donor_prob = *acceptor_prob = 0.0;
+    *donor_watsonp = *acceptor_watsonp = true;
+    *cdna_direction = 0;
+    return -1;
 
   } else {
-    Univ_IIT_interval_bounds(&chroffset,&chrhigh,&left_chrlength,chromosome_iit,Stage3_chrnum(from),circular_typeint);
-    Univ_IIT_interval_bounds(&chroffset,&chrhigh,&right_chrlength,chromosome_iit,Stage3_chrnum(to),circular_typeint);
+    debug2(printf("Chimera_find_breakpoint has chimerapos %d..%d\n",*chimerapos,*chimeraequivpos));
 
-    if ((*chimerapos = Chimera_find_breakpoint(&(*chimeraequivpos),&(*donor1),&(*donor2),&(*acceptor2),&(*acceptor1),
-					       from,to,queryntlength,genome,left_chrlength,right_chrlength)) < 0) {
-      /* TODO: Allow finding a breakpoint for DNA-Seq, which needs no donor or acceptor nucleotides */
-      debug2(printf("Chimera_find_breakpoint returns no value\n"));
-      *donor_prob = *acceptor_prob = 0.0;
-      *donor_watsonp = *acceptor_watsonp = true;
-      *cdna_direction = 0;
-      return -1;
+    Stage3_trim_right(from,/*goal*/rangehigh,
+		      /*queryseq_ptr*/Sequence_fullpointer(queryseq),
+		      /*queryuc_ptr*/Sequence_fullpointer(queryuc),
+		      pairpool);
 
-    } else {
+    Stage3_trim_left(to,/*goal*/rangelow,
+		     /*queryseq_ptr*/Sequence_fullpointer(queryseq),
+		     /*queryuc_ptr*/Sequence_fullpointer(queryuc),
+		     pairpool);
+
+    debug2(printf("Before Chimera_find_exonexon, bestfrom is %p, query %d..%d, pairs %p\n",
+                  from,Stage3_querystart(from),Stage3_queryend(from),Stage3_pairs(from)));
+    debug2(printf("Before Chimera_find_exonexon, bestto is %p, query %d..%d, pairs %p\n",
+                  to,Stage3_querystart(to),Stage3_queryend(to),Stage3_pairs(to)));
+
+    if ((*exonexonpos = Chimera_find_exonexon(&found_cdna_direction,&try_cdna_direction,
+					      &(*donor1),&(*donor2),&(*acceptor2),&(*acceptor1),
+					      &comp,&(*donor_watsonp),&(*acceptor_watsonp),&(*donor_prob),&(*acceptor_prob),
+					      /*left_part*/from,/*right_part*/to,genome,genomealt ? genomealt : genome,
+					      chromosome_iit,/*breakpoint_start*/Stage3_querystart(to),
+					      /*breakpoint_end*/Stage3_queryend(from))) <= 0) {
+      /* Couldn't find a good exon-exon junction, so rely on sequence */
       *donor_prob = *acceptor_prob = 0.0;
       *donor_watsonp = *acceptor_watsonp = true;
       
@@ -2337,6 +2385,14 @@ find_breakpoint (int *cdna_direction, int *chimerapos, int *chimeraequivpos, int
       *cdna_direction = try_cdna_direction;
       debug2(printf("Exon-exon boundary not found, but setting breakpoint to be %d\n",breakpoint));
       return breakpoint;
+      
+    } else {
+      /* Use the exon-exon solution */
+      breakpoint = *chimerapos = *chimeraequivpos = *exonexonpos;
+      *cdna_direction = found_cdna_direction;
+      debug2(printf("Exon-exon boundary found at %d, which is breakpoint.  Comp = %c\n",
+		    *exonexonpos,comp));
+      return breakpoint;
     }
   }
 }
@@ -2352,7 +2408,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
 		 Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
 		 Matchpool_T matchpool, Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
 		 Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR) {
-  List_T gregions = NULL, nonjoinable = NULL, p;
+  List_T gregions = NULL, p;
   Stage3_T *stage3array_sub1 = NULL, *stage3array_sub2 = NULL, from, to, stage3;
   Sequence_T querysubseq = NULL, querysubuc = NULL;
   Diagnostic_T diagnostic;
@@ -2369,6 +2425,12 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
   bool donor_watsonp, acceptor_watsonp;
   double donor_prob, acceptor_prob;
   
+  int kstart1, kstart2, kend1, kend2;
+  Chrnum_T chrnum;
+#ifdef DEBUG2
+  int k;
+#endif
+
 
 #ifdef PMAP
   five_margin = effective_start - 3*Sequence_trim_start(queryseq);
@@ -2403,12 +2465,11 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
   debug2(printf("max single goodness = %d\n",max_single_goodness));
 
 
-  /* List_free(&nonjoinable); */
   debug2(printf("Running local_separate_paths\n"));
-  nonjoinable = local_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
-				     stage3list);
-  debug2(printf("local: npaths_sub1 %d, npaths_sub2 %d, nonjoinable %d\n",
-		npaths_sub1,npaths_sub2,List_length(nonjoinable)));
+  stage3list = local_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
+				    stage3list);
+  debug2(printf("local: npaths_sub1 %d, npaths_sub2 %d, stage3list %d\n",
+		npaths_sub1,npaths_sub2,List_length(stage3list)));
 
   if (npaths_sub1 == 0 && npaths_sub2 == 0) {
     /* Need to compute on margin explicitly */
@@ -2433,7 +2494,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
 	  debug2(printf("5 margin > 3 margin.  "));
 	  debug2(printf("Beginning Stage1_compute on 5' margin from effective_start %d (%d..%d)\n",
 			effective_start,0,effective_start+extension));
-	  debug2a(Sequence_print(stdout,querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
+	  debug2a(Sequence_stdout(querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
 
 	  diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(querysubuc),Sequence_fulllength(querysubuc),
 				      Oligoindex_array_elt(oligoindices_major,0));
@@ -2475,7 +2536,7 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
 	  debug2(printf("Recomputing on original part.  "));
 	  debug2(printf("Beginning Stage1_compute on 5' margin from effective_start %d (%d..%d)\n",
 			effective_start,effective_start,queryntlength));
-	  debug2a(Sequence_print(stdout,querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
+	  debug2a(Sequence_stdout(querysubseq,/*uppercasep*/true,wraplength,/*trimmedp*/true));
 
 	  diagnostic = evaluate_query(&poorp,&repetitivep,Sequence_fullpointer(querysubuc),Sequence_fulllength(querysubuc),
 				      Oligoindex_array_elt(oligoindices_major,0));
@@ -2511,13 +2572,12 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
 	Sequence_free(&querysubseq);
       }
 
-      List_free(&nonjoinable);
       debug2(printf("Running local_separate_paths\n"));
-      nonjoinable = local_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
-					 stage3list);
-      debug2(printf("local: npaths_sub1 %d, npaths_sub2 %d, nonjoinable %d\n",
-		    npaths_sub1,npaths_sub2,List_length(nonjoinable)));
-
+      stage3list = local_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
+					stage3list);
+      debug2(printf("local: npaths_sub1 %d, npaths_sub2 %d, stage3list %d\n",
+		    npaths_sub1,npaths_sub2,List_length(stage3list)));
+      
     } else {
 #if 0
       /* extension makes it harder to find the other alignment.  The merging process will help fill in any gap. */
@@ -2617,76 +2677,132 @@ check_for_local (bool *mergedp, List_T stage3list, int effective_start, int effe
 	Sequence_free(&querysubseq);
       }
 
-      List_free(&nonjoinable);
       debug2(printf("Running local_separate_paths\n"));
-      nonjoinable = local_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
-					 stage3list);
-      debug2(printf("local: npaths_sub1 %d, npaths_sub2 %d, nonjoinable %d\n",
-		    npaths_sub1,npaths_sub2,List_length(nonjoinable)));
+      stage3list = local_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
+					stage3list);
+      debug2(printf("local: npaths_sub1 %d, npaths_sub2 %d, stage3list %d\n",
+		    npaths_sub1,npaths_sub2,List_length(stage3list)));
     }
   }
 
   *mergedp = false;
-  if (npaths_sub1 == 0 || npaths_sub2 == 0) {
+  if (npaths_sub1 == 0 && npaths_sub2 == 0) {
     /* Skip */
 
-  } else if (Chimera_bestpath(&five_score,&three_score,&chimerapos,&chimeraequivpos,&bestfrom,&bestto,
-			      stage3array_sub1,npaths_sub1,stage3array_sub2,npaths_sub2,queryntlength,
-			      CHIMERA_SLOP,/*localp*/true) == false) {
+  } else if (npaths_sub1 == 0) {
     /* Skip */
-    debug2(printf("Chimera_bestpath returns false\n"));
-
     FREE(stage3array_sub2);
+
+  } else if (npaths_sub2 == 0) {
+    /* Skip */
     FREE(stage3array_sub1);
 
   } else {
-    from = stage3array_sub1[bestfrom];
-    to = stage3array_sub2[bestto];
-    debug2(printf("Chimera_bestpath returns bestfrom %d (%d..%d, %u..%u) to bestto %d (%d..%d, %u..%u)\n",
-		  bestfrom,Stage3_querystart(from),Stage3_queryend(from),Stage3_genomicstart(from),Stage3_genomicend(from),
-		  bestto,Stage3_querystart(to),Stage3_queryend(to),Stage3_genomicstart(to),Stage3_genomicend(to)));
+    /* Iterate for each chromosome */
+    qsort(stage3array_sub1,npaths_sub1,sizeof(Stage3_T),Stage3_chrnum_cmp);
+    qsort(stage3array_sub2,npaths_sub2,sizeof(Stage3_T),Stage3_chrnum_cmp);
 
-    breakpoint = find_breakpoint(&chimera_cdna_direction,&chimerapos,&chimeraequivpos,&exonexonpos,
-				 &donor1,&donor2,&acceptor2,&acceptor1,
-				 &donor_watsonp,&acceptor_watsonp,&donor_prob,&acceptor_prob,from,to,
-#ifdef PMAP
-				 queryntseq,
+
+    kend1 = kend2 = 0;
+    *mergedp = false;
+    /* List_free(&stage3list); */
+
+    while (kend1 < npaths_sub1 && kend2 < npaths_sub2) {
+      kstart1 = kend1;
+      kstart2 = kend2;
+      chrnum = Stage3_chrnum(stage3array_sub1[kstart1]);
+      while (kend1 < npaths_sub1 && Stage3_chrnum(stage3array_sub1[kend1]) == chrnum) {
+	kend1++;
+      }
+      while (kend2 < npaths_sub2 && Stage3_chrnum(stage3array_sub2[kend2]) == chrnum) {
+	kend2++;
+      }
+
+#ifdef DEBUG2
+      printf("Chimera_bestpath left\n");
+      for (k = kstart1; k < kend1; k++) {
+	stage3 = stage3array_sub1[k];
+	printf("%d..%d, %d:%u..%u\n",
+	       Stage3_querystart(stage3),Stage3_queryend(stage3),
+	       Stage3_chrnum(stage3),Stage3_genomicstart(stage3),Stage3_genomicend(stage3));
+      }
+      printf("Chimera_bestpath right\n");
+      for (k = kstart2; k < kend2; k++) {
+	stage3 = stage3array_sub2[k];
+	printf("%d..%d, %d:%u..%u\n",
+	       Stage3_querystart(stage3),Stage3_queryend(stage3),
+	       Stage3_chrnum(stage3),Stage3_genomicstart(stage3),Stage3_genomicend(stage3));
+      }
 #endif
-				 queryseq,queryuc,queryntlength,
-				 genomecomp,genomecomp_alt,chromosome_iit,pairpool);
-    debug2(printf("find_breakpoint returns %d\n",breakpoint));
 
-    /* Check to see if we can merge chimeric parts */
-    debug2(printf("Before Stage3_mergeable, bestfrom is %p, query %d..%d\n",
-		  from,Stage3_querystart(from),Stage3_queryend(from)));
-    debug2(printf("Before Stage3_mergeable, bestto is %p, query %d..%d\n",
-		  to,Stage3_querystart(to),Stage3_queryend(to)));
+      if (Chimera_bestpath(&five_score,&three_score,&chimerapos,&chimeraequivpos,&bestfrom,&bestto,
+			   &(stage3array_sub1[kstart1]),/*npaths1*/kend1-kstart1,
+			   &(stage3array_sub2[kstart2]),/*npaths2*/kend2-kstart2,
+			   queryntlength,CHIMERA_SLOP,/*localp*/true) == false) {
+	/* Skip */
+	debug2(printf("Chimera_bestpath returns false\n"));
 
-    if (Stage3_mergeable(from,to,breakpoint,queryntlength) == true) {
-      debug2(printf("Mergeable! -- Merging left and right as a readthrough\n"));
-      List_free(&stage3list);
-      stage3list = merge_left_and_right_readthrough(&(*mergedp),stage3array_sub1,npaths_sub1,bestfrom,
-						    stage3array_sub2,npaths_sub2,bestto,
-						    nonjoinable,breakpoint,queryntlength,
+      } else {
+	from = stage3array_sub1[kstart1 + bestfrom];
+	to = stage3array_sub2[kstart2 + bestto];
+	debug2(printf("Chimera_bestpath returns bestfrom %d (%d..%d, %u..%u) to bestto %d (%d..%d, %u..%u)\n",
+		      bestfrom,Stage3_querystart(from),Stage3_queryend(from),Stage3_genomicstart(from),Stage3_genomicend(from),
+		      bestto,Stage3_querystart(to),Stage3_queryend(to),Stage3_genomicstart(to),Stage3_genomicend(to)));
+
+	breakpoint = find_breakpoint(&chimera_cdna_direction,&chimerapos,&chimeraequivpos,&exonexonpos,
+				     &donor1,&donor2,&acceptor2,&acceptor1,
+				     &donor_watsonp,&acceptor_watsonp,&donor_prob,&acceptor_prob,from,to,
+#ifdef PMAP
+				     queryntseq,
+#endif
+				     queryseq,queryuc,queryntlength,
+				     genomecomp,genomecomp_alt,chromosome_iit,pairpool);
+	debug2(printf("find_breakpoint returns %d\n",breakpoint));
+
+	/* Check to see if we can merge chimeric parts */
+	debug2(printf("Before Stage3_mergeable, bestfrom is %p, query %d..%d, pairs %p\n",
+	              from,Stage3_querystart(from),Stage3_queryend(from),Stage3_pairs(from)));
+	debug2(printf("Before Stage3_mergeable, bestto is %p, query %d..%d, pairs %p\n",
+               	      to,Stage3_querystart(to),Stage3_queryend(to),Stage3_pairs(to)));
+	
+	if (Stage3_mergeable(from,to,breakpoint,queryntlength) == true) {
+	  debug2(printf("Mergeable! -- Merging left and right as a readthrough\n"));
+	  stage3list = merge_left_and_right_readthrough(&(*mergedp),stage3list,
+							&(stage3array_sub1[kstart1]),/*npaths1*/kend1-kstart1,bestfrom,
+							&(stage3array_sub2[kstart2]),/*npaths2*/kend2-kstart2,bestto,
+							breakpoint,queryntlength,
 #ifdef PMAP
-						    /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
-						    /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
-						    /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
+							/*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
+							/*queryseq_ptr*/Sequence_fullpointer(queryntseq),
+							/*queryuc_ptr*/Sequence_fullpointer(queryntseq),
 #else
-						    /*queryseq_ptr*/Sequence_fullpointer(queryseq),
-						    /*queryuc_ptr*/Sequence_fullpointer(queryuc),
+							/*queryseq_ptr*/Sequence_fullpointer(queryseq),
+							/*queryuc_ptr*/Sequence_fullpointer(queryuc),
 #endif
-						    pairpool,dynprogL,dynprogM,dynprogR,
-						    oligoindices_minor,diagpool,cellpool);
+							pairpool,dynprogL,dynprogM,dynprogR,
+							oligoindices_minor,diagpool,cellpool);
+
+	  debug2(printf("After merge_left_and_right_readthrough, bestfrom is %p, query %d..%d, pairs %p\n",
+			from,Stage3_querystart(from),Stage3_queryend(from),Stage3_pairs(from)));
+	  debug2(printf("After merge_left_and_right_readthrough, bestto is %p, query %d..%d, pairs %p\n",
+			to,Stage3_querystart(to),Stage3_queryend(to),Stage3_pairs(to)));
+	}
+      }
     }
 
     FREE(stage3array_sub2);
     FREE(stage3array_sub1);
-  }
 
-  List_free(&nonjoinable);
+    /* stage3list = List_reverse(stage3list); */
+  }
 
   debug2(printf("check_for_local returning list of length %d\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    stage3 = (Stage3_T) List_head(p);
+    printf("%p %p\n",stage3,Stage3_pairs(stage3));
+  }
+#endif
 
   /* stage3list = stage3list_remove_empties(stage3list); */
 
@@ -2711,7 +2827,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
 		   Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
 		   Matchpool_T matchpool, Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
 		   Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR) {
-  List_T gregions = NULL, nonjoinable = NULL, p;
+  List_T gregions = NULL, p;
   Stage3_T *stage3array_sub1 = NULL, *stage3array_sub2 = NULL, from, to, stage3;
   Sequence_T querysubseq = NULL, querysubuc = NULL;
   Diagnostic_T diagnostic;
@@ -2729,6 +2845,16 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
   double donor_prob, acceptor_prob;
   
 
+  debug2(printf("check_for_chimera called with %d paths\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    stage3 = (Stage3_T) List_head(p);
+    printf("%p %p\n",stage3,Stage3_pairs(stage3));
+  }
+#endif
+
+
+
 #ifdef PMAP
   five_margin = effective_start - 3*Sequence_trim_start(queryseq);
   three_margin = 3*Sequence_trim_end(queryseq) - effective_end;
@@ -2762,12 +2888,11 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
   debug2(printf("max single goodness = %d\n",max_single_goodness));
 
 
-  /* List_free(&nonjoinable); */
   debug2(printf("Running distant_separate_paths\n"));
-  nonjoinable = distant_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
-				       stage3list);
-  debug2(printf("chimera: npaths_sub1 %d, npaths_sub2 %d, nonjoinable %d\n",
-		npaths_sub1,npaths_sub2,List_length(nonjoinable)));
+  stage3list = distant_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
+				      stage3list);
+  debug2(printf("chimera: npaths_sub1 %d, npaths_sub2 %d, stage3list %d\n",
+		npaths_sub1,npaths_sub2,List_length(stage3list)));
 
   if (npaths_sub1 == 0 && npaths_sub2 == 0) {
     /* Need to compute on margin explicitly */
@@ -2865,12 +2990,11 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
 	Sequence_free(&querysubseq);
       }
 
-      List_free(&nonjoinable);
       debug2(printf("Running distant_separate_paths\n"));
-      nonjoinable = distant_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
-					   stage3list);
-      debug2(printf("chimera: npaths_sub1 %d, npaths_sub2 %d, nonjoinable %d\n",
-		    npaths_sub1,npaths_sub2,List_length(nonjoinable)));
+      stage3list = distant_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
+					  stage3list);
+      debug2(printf("chimera: npaths_sub1 %d, npaths_sub2 %d, stage3list %d\n",
+		    npaths_sub1,npaths_sub2,List_length(stage3list)));
 
     } else {
       extension = CHIMERA_SLOP;
@@ -2966,12 +3090,11 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
 	Sequence_free(&querysubseq);
       }
 
-      List_free(&nonjoinable);
       debug2(printf("Running distant_separate_paths\n"));
-      nonjoinable = distant_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
-					   stage3list);
-      debug2(printf("chimera: npaths_sub1 %d, npaths_sub2 %d, nonjoinable %d\n",
-		    npaths_sub1,npaths_sub2,List_length(nonjoinable)));
+      stage3list = distant_separate_paths(&stage3array_sub1,&npaths_sub1,&stage3array_sub2,&npaths_sub2,
+					  stage3list);
+      debug2(printf("chimera: npaths_sub1 %d, npaths_sub2 %d, stage3list %d\n",
+		    npaths_sub1,npaths_sub2,List_length(stage3list)));
     }
   }
 
@@ -3022,10 +3145,10 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
       debug2(printf("find_breakpoint returns %d\n",breakpoint));
 
       /* Check to see if we can merge chimeric parts */
-      debug2(printf("Before Stage3_mergeable, bestfrom is %p, query %d..%d\n",
-		    from,Stage3_querystart(from),Stage3_queryend(from)));
-      debug2(printf("Before Stage3_mergeable, bestto is %p, query %d..%d\n",
-		    to,Stage3_querystart(to),Stage3_queryend(to)));
+      debug2(printf("Before Stage3_mergeable, bestfrom is %p, query %d..%d, pairs %p\n",
+		    from,Stage3_querystart(from),Stage3_queryend(from),Stage3_pairs(from)));
+      debug2(printf("Before Stage3_mergeable, bestto is %p, query %d..%d, pairs %p\n",
+		    to,Stage3_querystart(to),Stage3_queryend(to),Stage3_pairs(to)));
 
       if (Stage3_mergeable(from,to,breakpoint,queryntlength) == false &&
 	  Stage3_test_bounds(from,0,chimeraequivpos+chimera_overlap) == true &&
@@ -3046,7 +3169,7 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
 	  *chimera = Chimera_new(from,to,chimerapos,chimeraequivpos,exonexonpos,chimera_cdna_direction,
 				 donor1,donor2,acceptor2,acceptor1,donor_watsonp,acceptor_watsonp,
 				 donor_prob,acceptor_prob);
-	  List_free(&stage3list);
+	  /* List_free(&stage3list); */
 
 	  debug2(printf("Before merge_left_and_right_transloc, bestfrom is %p, query %d..%d\n",
 			from,Stage3_querystart(from),Stage3_queryend(from)));
@@ -3055,18 +3178,27 @@ check_for_chimera (bool *mergedp, Chimera_T *chimera, List_T stage3list, int eff
 	  
 	  stage3list = merge_left_and_right_transloc(stage3array_sub1,npaths_sub1,bestfrom,
 						     stage3array_sub2,npaths_sub2,bestto,
-						     nonjoinable);
+						     stage3list);
 	}
       }
+
+      debug2(printf("After Stage3_mergeable, bestfrom is %p, query %d..%d, pairs %p\n",
+		    from,Stage3_querystart(from),Stage3_queryend(from),Stage3_pairs(from)));
+      debug2(printf("After Stage3_mergeable, bestto is %p, query %d..%d, pairs %p\n",
+		    to,Stage3_querystart(to),Stage3_queryend(to),Stage3_pairs(to)));
     }
 
     FREE(stage3array_sub2);
     FREE(stage3array_sub1);
   }
 
-  List_free(&nonjoinable);
-
   debug2(printf("check_for_chimera returning list of length %d\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    stage3 = (Stage3_T) List_head(p);
+    printf("%p %p\n",stage3,Stage3_pairs(stage3));
+  }
+#endif
 
 #if 0
   /* Should be handled by apply_stage3 loop */
@@ -3090,27 +3222,16 @@ merge_middlepieces (List_T stage3list, Stage3_T from, Stage3_T to,
 		    Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
 		    Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
   List_T newstage3list = NULL, merged;
-  List_T nonjoinable, r;
+  List_T r, p;
   bool mergedAp, mergedBp;
   Stage3_T stage3;
 
-  nonjoinable = (List_T) NULL;
-  for (r = stage3list; r != NULL; r = List_next(r)) {
-    stage3 = (Stage3_T) List_head(r);
-    if (stage3 == from) {
-      /* Skip */
-    } else if (stage3 == to) {
-      /* Skip */
-    } else {
-      nonjoinable = List_push(nonjoinable,(void *) stage3);
-    }
-  }
-  
 
   if (mergeableAp == true && mergeableBp == true) {
-    merged = merge_left_and_right_readthrough(&mergedAp,/*stage3array_sub1*/&from,/*npaths_sub1*/1,/*bestfrom*/0,
+    stage3list = merge_left_and_right_readthrough(&mergedAp,stage3list,
+					      /*stage3array_sub1*/&from,/*npaths_sub1*/1,/*bestfrom*/0,
 					      /*stage3array_sub2*/&middle,/*npaths_sub2*/1,/*bestto*/0,
-					      /*nonjoinable*/NULL,breakpointA,queryntlength,
+					      breakpointA,queryntlength,
 #ifdef PMAP
 					      /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
 					      /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
@@ -3121,70 +3242,69 @@ merge_middlepieces (List_T stage3list, Stage3_T from, Stage3_T to,
 #endif
 					      pairpool,dynprogL,dynprogM,dynprogR,
 					      oligoindices_minor,diagpool,cellpool);
-    List_free(&merged);
+    /* List_free(&merged); */
 
-    newstage3list = merge_left_and_right_readthrough(&mergedBp,/*stage3array_sub1*/&from,/*npaths_sub1*/1,/*bestfrom*/0,
-						     /*stage3array_sub2*/&to,/*npaths_sub2*/1,/*bestto*/0,
-						     nonjoinable,breakpointB,queryntlength,
+    stage3list = merge_left_and_right_readthrough(&mergedBp,stage3list,
+						  /*stage3array_sub1*/&from,/*npaths_sub1*/1,/*bestfrom*/0,
+						  /*stage3array_sub2*/&to,/*npaths_sub2*/1,/*bestto*/0,
+						  breakpointB,queryntlength,
 #ifdef PMAP
-						     /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
-						     /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
-						     /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
+						  /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
+						  /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
+						  /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
 #else
-						     /*queryseq_ptr*/Sequence_fullpointer(queryseq),
-						     /*queryuc_ptr*/Sequence_fullpointer(queryuc),
+						  /*queryseq_ptr*/Sequence_fullpointer(queryseq),
+						  /*queryuc_ptr*/Sequence_fullpointer(queryuc),
 #endif
-						     pairpool,dynprogL,dynprogM,dynprogR,
-						     oligoindices_minor,diagpool,cellpool);
+						  pairpool,dynprogL,dynprogM,dynprogR,
+						  oligoindices_minor,diagpool,cellpool);
 
 #ifndef PMAP
     Stage3_guess_cdna_direction(from);
 #endif
-    List_free(&stage3list);
 
   } else if (mergeableBp == true) {
-    nonjoinable = List_push(nonjoinable,(void *) from);
-    newstage3list = merge_left_and_right_readthrough(&mergedBp,/*stage3array_sub1*/&middle,/*npaths_sub1*/1,/*bestfrom*/0,
-						     /*stage3array_sub2*/&to,/*npaths_sub2*/1,/*bestto*/0,
-						     nonjoinable,breakpointB,queryntlength,
+    stage3list = merge_left_and_right_readthrough(&mergedBp,stage3list,
+						  /*stage3array_sub1*/&middle,/*npaths_sub1*/1,/*bestfrom*/0,
+						  /*stage3array_sub2*/&to,/*npaths_sub2*/1,/*bestto*/0,
+						  breakpointB,queryntlength,
 #ifdef PMAP
-						     /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
-						     /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
-						     /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
+						  /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
+						  /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
+						  /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
 #else
-						     /*queryseq_ptr*/Sequence_fullpointer(queryseq),
-						     /*queryuc_ptr*/Sequence_fullpointer(queryuc),
+						  /*queryseq_ptr*/Sequence_fullpointer(queryseq),
+						  /*queryuc_ptr*/Sequence_fullpointer(queryuc),
 #endif
-						     pairpool,dynprogL,dynprogM,dynprogR,
-						     oligoindices_minor,diagpool,cellpool);
+						  pairpool,dynprogL,dynprogM,dynprogR,
+						  oligoindices_minor,diagpool,cellpool);
 #ifndef PMAP
     Stage3_guess_cdna_direction(middle);
 #endif
-    List_free(&stage3list);
 
   } else if (mergeableAp == true) {
-    nonjoinable = List_push(nonjoinable,(void *) to);
-    newstage3list = merge_left_and_right_readthrough(&mergedAp,/*stage3array_sub1*/&from,/*npaths_sub1*/1,/*bestfrom*/0,
-						     /*stage3array_sub2*/&middle,/*npaths_sub2*/1,/*bestto*/0,
-						     nonjoinable,breakpointA,queryntlength,
+    stage3list = merge_left_and_right_readthrough(&mergedAp,stage3list,
+						  /*stage3array_sub1*/&from,/*npaths_sub1*/1,/*bestfrom*/0,
+						  /*stage3array_sub2*/&middle,/*npaths_sub2*/1,/*bestto*/0,
+						  breakpointA,queryntlength,
 #ifdef PMAP
-						     /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
-						     /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
-						     /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
+						  /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
+						  /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
+						  /*queryuc_ptr*/Sequence_fullpointer(queryntseq),
 #else
-						     /*queryseq_ptr*/Sequence_fullpointer(queryseq),
-						     /*queryuc_ptr*/Sequence_fullpointer(queryuc),
+						  /*queryseq_ptr*/Sequence_fullpointer(queryseq),
+						  /*queryuc_ptr*/Sequence_fullpointer(queryuc),
 #endif
-						     pairpool,dynprogL,dynprogM,dynprogR,
-						     oligoindices_minor,diagpool,cellpool);
+						  pairpool,dynprogL,dynprogM,dynprogR,
+						  oligoindices_minor,diagpool,cellpool);
+
 #ifndef PMAP
     Stage3_guess_cdna_direction(from);
 #endif
-    List_free(&stage3list);
     
   } else {
-    newstage3list = stage3list;	/* Contains all entries from nonjoinable */
-    newstage3list = List_push(newstage3list,(void *) middle);
+    debug(printf("Pushing %p onto stage3list\n",middle));
+    stage3list = List_push(stage3list,(void *) middle);
   }
 
   for (r = middlepieces; r != NULL; r = List_next(r)) {
@@ -3194,12 +3314,12 @@ merge_middlepieces (List_T stage3list, Stage3_T from, Stage3_T to,
     } else if (stage3 == middle) {
       /* Don't add again */
     } else {
-      newstage3list = List_push(newstage3list,stage3);
+      debug(printf("Pushing %p onto stage3list\n",stage3));
+      stage3list = List_push(stage3list,stage3);
     }
   }
 
-  List_free(&nonjoinable);
-  return newstage3list;
+  return stage3list;
 }
 
 
@@ -3239,6 +3359,10 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
   bool mergeableAp, mergeableBp;
 
   List_T middlepieces;
+#ifdef DEBUG2A
+  List_T p;
+  Stage3_T stage3;
+#endif
 
 
 #ifdef DEBUG2A
@@ -3251,10 +3375,10 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
 
   *foundp = false;
 
-  by_queryend = (Stage3_T *) List_to_array_n(&npaths,stage3list);
+  by_queryend = (Stage3_T *) List_to_array_out_n(&npaths,stage3list);
   qsort(by_queryend,npaths,sizeof(Stage3_T),Stage3_queryend_cmp);
 
-  by_querystart = (Stage3_T *) List_to_array_n(&npaths,stage3list);
+  by_querystart = (Stage3_T *) List_to_array_out_n(&npaths,stage3list);
   qsort(by_querystart,npaths,sizeof(Stage3_T),Stage3_querystart_cmp);
 
   j = 0;
@@ -3284,6 +3408,7 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
 	  if ((querysubuc = Sequence_subsequence(queryuc,querystart,queryend)) != NULL) {
 	    debug2(printf("Performing Stage 3 on %d..%d against %u..%u\n",
 			  querystart,queryend,chrstart,chrend));
+	    /* Memory leak here */
 	    if ((middlepieces = update_stage3list(/*stage3list*/NULL,querysubseq,
 #ifdef PMAP
 						  queryntseq,
@@ -3327,7 +3452,7 @@ check_middle_piece_local (bool *foundp, List_T stage3list, Sequence_T queryseq,
 	      }	/* End of while loop looking for dual merge */
 
 	      if (mergeableAp == true && mergeableBp == true) {
-		debug2(printf("Middle segment found and mergeable locally with both! -- Merging three as a readthrough.\n"));
+		debug2(printf("Middle segment %p found and mergeable locally with both! -- Merging three as a readthrough.\n",middle));
 		*foundp = true;
 	      } else {
 		/* 2.  Look for middle piece that joins locally on one end */
@@ -3422,7 +3547,7 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
   int chimera_cdna_direction_A, chimera_cdna_direction_B;
   bool mergeableAp, mergeableBp, mergedAp, mergedBp;
 
-  List_T nonjoinable = NULL, middlepieces = NULL;
+  List_T middlepieces = NULL, p;
   Diagnostic_T diagnostic;
   List_T gregions;
   bool lowidentityp, poorp, repetitivep;
@@ -3436,10 +3561,10 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
   }
 #endif
 
-  by_queryend = (Stage3_T *) List_to_array_n(&npaths,stage3list);
+  by_queryend = (Stage3_T *) List_to_array_out_n(&npaths,stage3list);
   qsort(by_queryend,npaths,sizeof(Stage3_T),Stage3_queryend_cmp);
 
-  by_querystart = (Stage3_T *) List_to_array_n(&npaths,stage3list);
+  by_querystart = (Stage3_T *) List_to_array_out_n(&npaths,stage3list);
   qsort(by_querystart,npaths,sizeof(Stage3_T),Stage3_querystart_cmp);
 
   maxdist = 0;
@@ -3547,21 +3672,21 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
       }
 
       if (mergeableAp == true) {
-	debug2(printf("Middle segment found and mergeable locally with from! -- Merging as a readthrough.  cdna_direction = %d\n",
-		      chimera_cdna_direction_A));
+	debug2(printf("Middle segment %p found and mergeable locally with from! -- Merging as a readthrough.  cdna_direction = %d\n",
+ 	              middle,chimera_cdna_direction_A));
 
-	List_free(&nonjoinable);
-	nonjoinable = (List_T) NULL;
 	for (r = middlepieces; r != NULL; r = List_next(r)) {
 	  stage3 = (Stage3_T) List_head(r);
 	  if (stage3 == middle) {
 	    /* Skip */
 	  } else {
-	    nonjoinable = List_push(nonjoinable,(void *) stage3);
+	    debug(printf("Pushing %p onto stage3list\n",stage3));
+	    stage3list = List_push(stage3list,(void *) stage3);
 	  }
 	}
 	List_free(&middlepieces);
 
+#if 0
 	for (r = stage3list; r != NULL; r = List_next(r)) {
 	  stage3 = (Stage3_T) List_head(r);
 	  if (stage3 == bestfrom) {
@@ -3570,11 +3695,13 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
 	    nonjoinable = List_push(nonjoinable,(void *) stage3);
 	  }
 	}
+#endif
 
-	newstage3list =
-	  merge_left_and_right_readthrough(&mergedAp,/*stage3array_sub1*/&bestfrom,/*npaths_sub1*/1,/*bestfrom*/0,
+	stage3list =
+	  merge_left_and_right_readthrough(&mergedAp,stage3list,
+					   /*stage3array_sub1*/&bestfrom,/*npaths_sub1*/1,/*bestfrom*/0,
 					   /*stage3array_sub2*/&middle,/*npaths_sub2*/1,/*bestto*/0,
-					   nonjoinable,breakpointA,queryntlength,
+					   breakpointA,queryntlength,
 #ifdef PMAP
 					   /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
 					   /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
@@ -3585,31 +3712,31 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
 #endif
 					   pairpool,dynprogL,dynprogM,dynprogR,
 					   oligoindices_minor,diagpool,cellpool);
+
 #ifndef PMAP
 	Stage3_guess_cdna_direction(from);
 #endif
 
-	List_free(&nonjoinable);
 	if (mergedAp == true) {
 	  *foundp = true;
 	}
 
       } else if (mergeableBp == true) {
-	debug2(printf("Middle segment found and mergeable locally with to! -- Merging as a readthrough.  cdna_direction = %d\n",
-		      chimera_cdna_direction_B));
+	debug2(printf("Middle segment %p found and mergeable locally with to! -- Merging as a readthrough.  cdna_direction = %d\n",
+		      middle,chimera_cdna_direction_B));
 
-	List_free(&nonjoinable);
-	nonjoinable = (List_T) NULL;
 	for (r = middlepieces; r != NULL; r = List_next(r)) {
 	  stage3 = (Stage3_T) List_head(r);
 	  if (stage3 == middle) {
 	    /* Skip */
 	  } else {
-	    nonjoinable = List_push(nonjoinable,(void *) stage3);
+	    debug(printf("Pushing %p onto stage3list\n",stage3));
+	    stage3list = List_push(stage3list,(void *) stage3);
 	  }
 	}
 	List_free(&middlepieces);
 
+#if 0
 	for (r = stage3list; r != NULL; r = List_next(r)) {
 	  stage3 = (Stage3_T) List_head(r);
 	  if (stage3 == bestto) {
@@ -3618,11 +3745,13 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
 	    nonjoinable = List_push(nonjoinable,(void *) stage3);
 	  }
 	}
+#endif
 
-	newstage3list =
-	  merge_left_and_right_readthrough(&mergedBp,/*stage3array_sub1*/&middle,/*npaths_sub1*/1,/*bestfrom*/0,
+	stage3list =
+	  merge_left_and_right_readthrough(&mergedBp,stage3list,
+					   /*stage3array_sub1*/&middle,/*npaths_sub1*/1,/*bestfrom*/0,
 					   /*stage3array_sub2*/&bestto,/*npaths_sub2*/1,/*bestto*/0,
-					   nonjoinable,breakpointB,queryntlength,
+					   breakpointB,queryntlength,
 #ifdef PMAP
 					   /*queryaaseq_ptr*/Sequence_fullpointer(queryseq),
 					   /*queryseq_ptr*/Sequence_fullpointer(queryntseq),
@@ -3633,12 +3762,10 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
 #endif
 					   pairpool,dynprogL,dynprogM,dynprogR,
 					   oligoindices_minor,diagpool,cellpool);
-
 #ifndef PMAP
 	Stage3_guess_cdna_direction(middle);
 #endif
 
-	List_free(&nonjoinable);
 	if (mergedBp == true) {
 	  *foundp = true;
 	}
@@ -3657,12 +3784,7 @@ check_middle_piece_chimera (bool *foundp, List_T stage3list, Sequence_T queryseq
     }
   }
 
-  if (newstage3list == NULL) {
-    return stage3list;
-  } else {
-    List_free(&stage3list);
-    return newstage3list;
-  }
+  return stage3list;
 }
 
 
@@ -3676,11 +3798,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
 	      Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
 	      Matchpool_T matchpool, Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
 	      Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, Stopwatch_T worker_stopwatch) {
-#ifdef DEBUG2
-  List_T p;
-#endif
-  List_T stage3list;
-  Stage3_T nonchimericbest, chimera1, chimera2;
+  List_T stage3list, newstage3list, split_objects, p, q;
+  Stage3_T nonchimericbest, chimera1, chimera2, stage3, newstage3;
   bool testlocalp, testchimerap, foundp;
   int effective_start, effective_end;
   int queryntlength;
@@ -3720,7 +3839,7 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
 		    List_length(stage3list),iter));
 
       /* Stage3_recompute_goodness(stage3list); */
-      stage3list = stage3list_remove_duplicates(stage3list);
+      /* stage3list = stage3list_remove_duplicates(stage3list); */
       stage3list = stage3list_sort(stage3list);
 
 #ifdef DEBUG2
@@ -3768,13 +3887,14 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
 	testlocalp = false;
 	debug2(printf("Checking for local, starting with list length %d, effective_start %d, effective_end %d\n",
 		      List_length(stage3list),effective_start,effective_end));
-	stage3list = check_for_local(&(*mergedp),stage3list,effective_start,effective_end,
-				     queryseq,queryuc,
+	/* stage3list = */ stage3list = check_for_local(&(*mergedp),stage3list,effective_start,effective_end,
+							queryseq,queryuc,
 #ifdef PMAP
-				     queryntseq,
+							queryntseq,
 #endif
-				     queryntlength,usersegment,stage2_alloc,oligoindices_major,oligoindices_minor,
-				     matchpool,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
+							queryntlength,usersegment,stage2_alloc,oligoindices_major,oligoindices_minor,
+							matchpool,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
+	debug2(printf("After check for local, we have %d paths\n",List_length(stage3list)));
 	
 	if (*mergedp == true) {
 	  testlocalp = true;	/* Local merge */
@@ -3803,7 +3923,7 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
 		    List_length(stage3list),iter));
 
       /* Stage3_recompute_goodness(stage3list); */
-      stage3list = stage3list_remove_duplicates(stage3list);
+      /* stage3list = stage3list_remove_duplicates(stage3list); */
       stage3list = stage3list_sort(stage3list);
 
 #ifdef DEBUG2
@@ -3889,6 +4009,35 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
   }
 
   debug2(printf("apply_stage3 returning list of length %d\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    stage3 = (Stage3_T) List_head(p);
+    printf("%p %p\n",stage3,Stage3_pairs(stage3));
+  }
+#endif
+
+  /* Split on large introns */
+  if (split_large_introns_p == true) {
+    newstage3list = (List_T) NULL;
+    for (p = stage3list; p != NULL; p = List_next(p)) {
+      stage3 = (Stage3_T) List_head(p);
+      if ((split_objects = Stage3_split(stage3,queryseq,pairpool)) == NULL) {
+	debug(printf("Pushing %p onto newstage3list\n",stage3));
+	newstage3list = List_push(newstage3list,(void *) stage3);
+      } else {
+	for (q = split_objects; q != NULL; q = List_next(q)) {
+	  newstage3 = (Stage3_T) List_head(q);
+	  debug(printf("Pushing %p onto newstage3list\n",newstage3));
+	  newstage3list = List_push(newstage3list,(void *) newstage3);
+	}
+	List_free(&split_objects);
+	Stage3_free(&stage3);
+      }
+    }
+    List_free(&stage3list);
+    stage3list = newstage3list;
+  }
+
 
   /* Needed after call to stage3_from_gregions */
   /* Stage3_recompute_goodness(stage3list); */
@@ -3896,6 +4045,7 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
   /* Final call, so do both filtering and sorting */
   Stage3_recompute_coverage(stage3list,queryseq);
   stage3list = stage3list_filter_and_sort(&(*chimera),stage3list);
+  debug2(printf("After filter and sort, have %d paths\n",List_length(stage3list)));
 
   if (*chimera != NULL && List_length(stage3list) > 2) {
     /* Compare chimera against non-chimeric alignments */
@@ -3917,6 +4067,14 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
     }
   }
 
+  debug2(printf("apply_stage3 returning %d paths\n",List_length(stage3list)));
+#ifdef DEBUG2
+  for (p = stage3list; p != NULL; p = List_next(p)) {
+    stage3 = (Stage3_T) List_head(p);
+    printf("%p %p\n",stage3,Stage3_pairs(stage3));
+  }
+#endif
+
   return stage3list;
 }
 
@@ -4041,7 +4199,8 @@ process_request (Filestring_T *fp_failedinput, double *worker_runtime, Request_T
 				  queryntseq,
 #endif
 				  usersegment,stage2_alloc,oligoindices_major,oligoindices_minor,
-				  matchpool,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch);
+				  matchpool,pairpool,diagpool,cellpool,
+				  dynprogL,dynprogM,dynprogR,worker_stopwatch);
 	if (diag_debug == true) {
 #if 0
 	  result = Result_new_diag_debug(jobid,/*diagonals*/stage3list,diagnostic,NO_FAILURE);
@@ -4053,11 +4212,13 @@ process_request (Filestring_T *fp_failedinput, double *worker_runtime, Request_T
 	} else if (chimera == NULL) {
 	  stage3array = stage3array_from_list(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,
 					      stage3list,mergedp,/*chimerap*/false,/*remove_overlaps_p*/true);
+	  debug2(printf("chimera is NULL.  npaths_primary %d, npaths_altloc %d\n",npaths_primary,npaths_altloc));
 	  result = Result_new(jobid,mergedp,/*chimera*/NULL,stage3array,npaths_primary,npaths_altloc,
 			      first_absmq,second_absmq,diagnostic,NO_FAILURE);
 	} else {
 	  stage3array = stage3array_from_list(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,
 					      stage3list,mergedp,/*chimerap*/true,/*remove_overlaps_p*/false);
+	  debug2(printf("chimera is not NULL.  npaths_primary %d, npaths_altloc %d\n",npaths_primary,npaths_altloc));
 	  result = Result_new(jobid,mergedp,chimera,stage3array,npaths_primary,npaths_altloc,
 			      first_absmq,second_absmq,diagnostic,NO_FAILURE);
 	}
@@ -4238,8 +4399,9 @@ worker_mpi_process (int worker_id, Inbuffer_T inbuffer) {
 	genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment));
 	genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment));
 	Genome_user_setup(genomecomp_blocks);
-	Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false,
-			     /*genome_unk_mismatch_p*/true,/*mode*/STANDARD);
+	Genome_hr_setup(genomebits_blocks,/*snp_blocks*/NULL,
+			/*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/true,
+			/*mode*/STANDARD);
 	Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL);
 	Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks);
 #ifdef PMAP
@@ -4384,7 +4546,6 @@ single_thread () {
   Sequence_T usersegment, pairalign_segment;
   Filestring_T fp, fp_failedinput;
   Sequence_T queryseq;
-  int noutput = 0;
   int jobid = 0;
   double worker_runtime;
 
@@ -4424,8 +4585,9 @@ single_thread () {
       genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment));
       genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment));
       Genome_user_setup(genomecomp_blocks);
-      Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false,
-			   /*genome_unk_mismatch_p*/true,/*mode*/STANDARD);
+      Genome_hr_setup(genomebits_blocks,/*snp_blocks*/NULL,
+		      /*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/true,
+		      /*mode*/STANDARD);
       Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL);
       Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks);
 #ifdef PMAP
@@ -4598,8 +4760,9 @@ worker_thread (void *data) {
       genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment));
       genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment));
       Genome_user_setup(genomecomp_blocks);
-      Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false,
-			   /*genome_unk_mismatch_p*/true,/*mode*/STANDARD);
+      Genome_hr_setup(genomebits_blocks,/*snp_blocks*/NULL,
+		      /*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/true,
+		      /*mode*/STANDARD);
       Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL);
       Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks);
 #ifdef PMAP
@@ -4778,7 +4941,8 @@ align_relative (FILE *input, char **files, int nfiles, int nextchar,
 			    stutterhits,diagnostic,/*stopwatch*/NULL);
   stage3list = apply_stage3(&chimera,gregions,referenceseq,referenceuc,/*usersegment*/NULL,
 			    oligoindices_major,oligoindices_minor,
-			    matchpool,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,stopwatch);
+			    matchpool,pairpool,diagpool,cellpool,
+			    dynprogL,dynprogM,dynprogR,stopwatch);
   if (stage3list == NULL) {
     npaths_primary = npaths_altloc = 0;
     stage3array = (Stage3_T *) NULL;
@@ -4786,6 +4950,7 @@ align_relative (FILE *input, char **files, int nfiles, int nextchar,
     stage3array = stage3array_from_list(&npaths_primary,&npaths_altloc,stage3list,/*mergedp*/false,
 					/*chimerap*/false,/*remove_overlaps_p*/true);
   }
+  debug2(printf("npaths_primary %d, npaths_altloc %d\n",npaths_primary,npaths_altloc));
 
   Diagnostic_free(&diagnostic);
 
@@ -5111,6 +5276,58 @@ check_valid_float (char *string, const char *option) {
   }
 }
 
+static char *
+check_valid_float_or_int (char *string) {
+  char *p = string;
+
+  if (*p == '+' || *p == '-') {
+    p++;
+  }
+
+  while (*p != '\0' && isdigit(*p)) {
+    p++;
+  }
+  if (*p == '\0') {
+    return string;
+  }
+
+  if (*p == '.') {
+    p++;
+  }
+
+  if (!isdigit(*p)) {
+    fprintf(stderr,"value %s is not a valid float\n",string);
+    exit(9);
+    return NULL;
+  }
+  while (*p != '\0' && isdigit(*p)) {
+    p++;
+  }
+
+  if (*p == 'e') {
+    p++;
+    if (*p == '+' || *p == '-') {
+      p++;
+    }
+    if (!isdigit(*p)) {
+      fprintf(stderr,"value %s is not a valid float\n",string);
+      exit(9);
+      return NULL;
+    }
+    while (*p != '\0' && isdigit(*p)) {
+      p++;
+    }
+  }
+
+  if (*p == '\0') {
+    return string;
+  } else {
+    fprintf(stderr,"value %s is not a valid float\n",string);
+    exit(9);
+    return NULL;
+  }
+}
+
 
 static int
 parse_command_line (int argc, char *argv[], int optind) {
@@ -5133,9 +5350,9 @@ parse_command_line (int argc, char *argv[], int optind) {
 
   while ((opt = getopt_long(argc,argv,
 #ifdef PMAP
-			    "q:D:a:d:k:Gg:2B:K:w:L:x:1t:s:c:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQYNI:i:l:",
+			    "q:D:a:d:k:g:2B:K:w:L:x:1t:s:c:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQYNI:i:l:",
 #else
-			    "q:D:d:k:Gg:2B:K:w:L:x:1t:s:c:p:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQFa:Tz:j:YNI:i:l:",
+			    "q:D:d:k:g:2B:K:w:L:x:1t:s:c:p:SA03468:9n:f:ZO5o:V:v:M:m:ebu:E:PQFa:Tz:j:YNI:i:l:",
 #endif
 			    long_options, &long_option_index)) != -1) {
     switch (opt) {
@@ -5168,7 +5385,12 @@ parse_command_line (int argc, char *argv[], int optind) {
 	user_cmdline = optarg;
 
       } else if (!strcmp(long_name,"suboptimal-score")) {
-	suboptimal_score = atoi(check_valid_int(optarg));
+	suboptimal_score_float = atof(check_valid_float_or_int(optarg));
+	if (suboptimal_score_float > 1.0 && suboptimal_score_float != rint(suboptimal_score_float)) {
+	  fprintf(stderr,"Cannot specify fractional value %f for --suboptimal-score except between 0.0 and 1.0\n",
+		  suboptimal_score_float);
+	  return 9;
+	}
 
       } else if (!strcmp(long_name,"require-splicedir")) {
 	require_splicedir_p = true;
@@ -5194,6 +5416,9 @@ parse_command_line (int argc, char *argv[], int optind) {
       } else if (!strcmp(long_name,"max-intronlength-ends")) {
 	maxintronlen_ends = atoi(check_valid_int(optarg));
 
+      } else if (!strcmp(long_name,"split-large-introns")) {
+	split_large_introns_p = true;
+
       } else if (!strcmp(long_name,"trim-end-exons")) {
 	minendexon = atoi(check_valid_int(optarg));
 
@@ -5386,7 +5611,9 @@ parse_command_line (int argc, char *argv[], int optind) {
       }
       break;
 #endif
+#if 0
     case 'G': uncompressedp = true; break;
+#endif
     case 'g': user_genomicseg = optarg; break;
     case '1': user_selfalign_p = true; break;
     case '2': user_pairalign_p = true; break;
@@ -5549,6 +5776,8 @@ parse_command_line (int argc, char *argv[], int optind) {
       } else if (!strcmp(optarg,"sampe")) {
 	printtype = SAM;
 	sam_paired_p = true;
+      } else if (!strcmp(optarg,"bedpe")) {
+	printtype = BEDPE;
 #endif
       } else if (!strcmp(optarg,"2") || !strcmp(optarg,"gff3_gene")) {
 	printtype = GFF3_GENE;
@@ -5573,6 +5802,7 @@ parse_command_line (int argc, char *argv[], int optind) {
 	fprintf(stderr,"  introns\n");
 	fprintf(stderr,"  samse\n");
 	fprintf(stderr,"  sampe\n");
+	fprintf(stderr,"  bedpe\n");
 #endif
 	fprintf(stderr,"  gff3_gene (2)\n");
 	fprintf(stderr,"  gff3_match_cdna (3)\n");
@@ -6546,8 +6776,9 @@ main (int argc, char *argv[]) {
 
   } else if (usersegment != NULL) {
     Genome_user_setup(genomecomp_blocks);
-    Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false,
-			 /*genome_unk_mismatch_p*/true,/*mode*/STANDARD);
+    Genome_hr_setup(genomebits_blocks,/*snp_blocks*/NULL,
+		    /*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/true,
+		    /*mode*/STANDARD);
     Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL);
     Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks);
 #ifdef PMAP
@@ -6912,6 +7143,15 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
 ");
 #endif
 
+#if 0
+    /* No longer supported */
+    fprintf(stdout,"\
+    -G, --genomefull               Use full genome (all ASCII chars allowed;\n \
+                                   built explicitly during setup), not\n\
+                                   compressed version\n\
+");
+#endif
+
     fprintf(stdout,"\
   -k, --kmer=INT                 kmer size to use in genome database (allowed values: 16 or less).\n\
                                    If not specified, the program will find the highest available\n\
@@ -6919,9 +7159,6 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
   --sampling=INT                 Sampling to use in genome database.  If not specified, the program\n\
                                    will find the smallest available sampling value in the genome database\n\
                                    within selected k-mer size\n\
-  -G, --genomefull               Use full genome (all ASCII chars allowed;\n\
-                                   built explicitly during setup), not\n\
-                                   compressed version\n\
   -g, --gseg=filename            User-supplied genomic segment\n\
   -1, --selfalign                Align one sequence against itself in FASTA format via stdin\n\
                                    (Useful for getting protein translation of a nucleotide sequence)\n\
@@ -6982,7 +7219,7 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
     fprintf(stdout,"\
   --max-intronlength-middle=INT  Max length for one internal intron (default %d).  Note: for backward\n\
                                    compatibility, the -K flag will set both --max-intronlength-middle\n\
-                                   and --max-intronlength-ends.\n\
+                                   and --max-intronlength-ends.  Also see --split-large-introns below.\n\
 ",maxintronlen);
     fprintf(stdout,"\
   --max-intronlength-ends=INT    Max length for first or last intron (default %d).  Note: for backward\n\
@@ -6990,6 +7227,11 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
                                    and --max-intronlength-ends.\n\
 ",maxintronlen_ends);
     fprintf(stdout,"\
+  --split-large-introns          Sometimes GMAP will exceed the value for --max-intronlength-middle,\n\
+                                   if it finds a good single alignment.  However, you can force GMAP\n\
+                                   to split such alignments by using this flag\n\
+");
+    fprintf(stdout,"\
   --trim-end-exons=INT           Trim end exons with fewer than given number of matches\n\
                                    (in nt, default %d)\n\
 ",minendexon);
@@ -7130,7 +7372,8 @@ Output types\n\
                                    map_ranges (or 8) = IIT FASTA range map format,\n\
                                    coords (or 9) = coords in table format,\n\
                                    sampe = SAM format (setting paired_read bit in flag),\n\
-                                   samse = SAM format (without setting paired_read bit)\n\
+                                   samse = SAM format (without setting paired_read bit),\n\
+                                   bedpe = indels and gaps in BEDPE format\n\
 ");
 #endif
     fprintf(stdout,"\n");
@@ -7143,9 +7386,13 @@ Output options\n\
                                    alignments, then set this to be 0.\n\
 ",maxpaths_report);
     fprintf(stdout,"\
-  --suboptimal-score=INT         Report only paths whose score is within this value of the\n\
-                                   best path.  By default, if this option is not provided,\n\
-                                   the program prints all paths found.\n\
+  --suboptimal-score=FLOAT       Report only paths whose score is within this value of the\n\
+                                   best path.\n\
+                                 If specified between 0.0 and 1.0, then treated as a fraction\n\
+                                   of the score of the best alignment (matches minus penalties for\n\
+                                   mismatches and indels).  Otherwise, treated as an integer\n\
+                                   number to be subtracted from the score of the best alignment.\n\
+                                   Default value is 0.50.\n\
   -O, --ordered                  Print output in same order as input (relevant\n\
                                    only if there is more than one worker thread)\n\
   -5, --md5                      Print MD5 checksum for each query sequence\n\
diff --git a/src/gmap_select.c b/src/gmap_select.c
index 7bbffb9..eef22ff 100644
--- a/src/gmap_select.c
+++ b/src/gmap_select.c
@@ -45,6 +45,16 @@ main (int argc, char *argv[]) {
     /* Depend on path */
     /* Cannot use file_exists_p, since it won't search PATH */
 
+    if (avx512_support_p == true) {
+      new_argv[0] = "gmap.avx512";
+      if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
+	fprintf(stderr,"Note: %s does not exist.  For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]);
+      } else {
+	free(new_argv);
+	return rc;
+      }
+    }
+
     if (avx2_support_p == true) {
       new_argv[0] = "gmap.avx2";
       if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
@@ -105,6 +115,20 @@ main (int argc, char *argv[]) {
   } else {
     dir = dirname(argv[0]);
 
+    if (avx512_support_p == true) {
+      new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.avx512") + 1) * sizeof(char));
+      sprintf(new_argv[0],"%s/gmap.avx512",dir);
+      if (file_exists_p(new_argv[0]) == false) {
+	fprintf(stderr,"Note: %s does not exist.  For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]);
+	free(new_argv[0]);
+      } else {
+	rc = execvp(new_argv[0],new_argv);
+	free(new_argv[0]);
+	free(new_argv);
+	return rc;
+      }
+    }
+
     if (avx2_support_p == true) {
       new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.avx2") + 1) * sizeof(char));
       sprintf(new_argv[0],"%s/gmap.avx2",dir);
diff --git a/src/gmapl_select.c b/src/gmapl_select.c
index 816b356..29bd0c1 100644
--- a/src/gmapl_select.c
+++ b/src/gmapl_select.c
@@ -45,6 +45,16 @@ main (int argc, char *argv[]) {
     /* Depend on path */
     /* Cannot use file_exists_p, since it won't search PATH */
 
+    if (avx512_support_p == true) {
+      new_argv[0] = "gmapl.avx512";
+      if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
+	fprintf(stderr,"Note: %s does not exist.  For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]);
+      } else {
+	free(new_argv);
+	return rc;
+      }
+    }
+
     if (avx2_support_p == true) {
       new_argv[0] = "gmapl.avx2";
       if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
@@ -105,6 +115,20 @@ main (int argc, char *argv[]) {
   } else {
     dir = dirname(argv[0]);
 
+    if (avx512_support_p == true) {
+      new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.avx512") + 1) * sizeof(char));
+      sprintf(new_argv[0],"%s/gmapl.avx512",dir);
+      if (file_exists_p(new_argv[0]) == false) {
+	fprintf(stderr,"Note: %s does not exist.  For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]);
+	free(new_argv[0]);
+      } else {
+	rc = execvp(new_argv[0],new_argv);
+	free(new_argv[0]);
+	free(new_argv);
+	return rc;
+      }
+    }
+
     if (avx2_support_p == true) {
       new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.avx2") + 1) * sizeof(char));
       sprintf(new_argv[0],"%s/gmapl.avx2",dir);
diff --git a/src/gsnap.c b/src/gsnap.c
index d61e979..130a893 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 200234 2016-11-08 00:56:52Z twu $";
+static char rcsid[] = "$Id: gsnap.c 207853 2017-06-29 20:33:16Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -89,6 +89,7 @@ static char rcsid[] = "$Id: gsnap.c 200234 2016-11-08 00:56:52Z twu $";
 #include "stage2.h"		/* For Stage2_setup */
 #ifndef LARGE_GENOMES
 #include "sarray-read.h"
+#include "sarray-search.h"
 #endif
 #include "indel.h"		/* For Indel_setup */
 #include "dynprog.h"
@@ -103,6 +104,7 @@ static char rcsid[] = "$Id: gsnap.c 200234 2016-11-08 00:56:52Z twu $";
 #include "list.h"
 #include "listdef.h"
 #include "iit-read.h"
+#include "iit-read-univ.h"
 #include "datadir.h"
 #include "samprint.h"		/* For SAM_setup */
 
@@ -151,7 +153,7 @@ static char rcsid[] = "$Id: gsnap.c 200234 2016-11-08 00:56:52Z twu $";
  *   GMAP parameters
  ************************************************************************/
 
-static int gmap_mode = GMAP_PAIRSEARCH | GMAP_INDEL_KNOWNSPLICE | GMAP_TERMINAL | GMAP_IMPROVEMENT;
+static int gmap_mode = GMAP_PAIRSEARCH | GMAP_TERMINAL | GMAP_IMPROVEMENT;
 static int gmap_min_nconsecutive = 20;
 static int nullgap = 600;
 static int maxpeelback = 20;	/* Now controlled by defect_rate */
@@ -169,7 +171,6 @@ static int max_gmap_pairsearch = 50; /* Will perform GMAP on up to this many hit
 static int max_gmap_terminal = 50;   /* Will perform GMAP on up to this many terminals5 or terminals3 */
 static int max_gmap_improvement = 5;
 
-static double microexon_spliceprob = 0.95;
 static int suboptimal_score_start = -1; /* Determined by simulations to have minimal effect */
 static int suboptimal_score_end = 3; /* Determined by simulations to have diminishing returns above 3 */
 
@@ -212,6 +213,18 @@ static bool use_only_sarray_p = false;
 static Sarray_T sarray_fwd = NULL;
 static Sarray_T sarray_rev = NULL;
 #endif
+static bool user_set_sarray_p = false;
+static bool require_completeset_p = false;
+
+/* Speed levels
+   4: Suffix array only
+   3: Suffix array on, and complete set only if necessary
+   2: Suffix array on, as basis for doing complete set
+   1: Suffix array off
+*/
+static int speed_level = 3;
+static bool user_set_speed_level_p = false;
+
 
 #if 0
 static char STANDARD_CHARTABLE[4] = {'A','C','G','T'};
@@ -255,6 +268,8 @@ static bool genome_unk_mismatch_p = true;
 static bool novelsplicingp = false;
 static bool find_dna_chimeras_p = false;
 
+static bool user_trim_mismatch_score_p = false;
+static bool user_trim_indel_score_p = false;
 static int trim_mismatch_score = -3;
 static int trim_indel_score = -2; /* was -4 */
 
@@ -358,10 +373,13 @@ static int max_anchors = 10;
 
 /* Genes IIT */
 static char *genes_file = (char *) NULL;
-static IIT_T genes_iit = NULL;
+static IIT_T genes_iit = NULL;	    /* For genome alignment */
 static int *genes_divint_crosstable = NULL;
 static bool favor_multiexon_p = false;
 
+static char *genestruct_file = (char *) NULL;
+static IIT_T genestruct_iit = NULL; /* For transcript splicing */
+
 
 /* Splicing IIT */
 static bool knownsplicingp = false;
@@ -369,6 +387,7 @@ static bool distances_observed_p = false;
 static char *user_splicingdir = (char *) NULL;
 static char *splicing_file = (char *) NULL;
 static IIT_T splicing_iit = NULL;
+static bool transcript_splicing_p = false;
 static bool amb_closest_p = false;
 static bool amb_clip_p = true;
 
@@ -467,6 +486,9 @@ static bool force_xs_direction_p = false;
 static bool md_lowercase_variant_p = false;
 static bool hide_soft_clips_p = false;
 
+static bool omit_concordant_uniq_p = false;
+static bool omit_concordant_mult_p = false;
+
 
 /* Input/output */
 static char *split_output_root = NULL;
@@ -489,7 +511,7 @@ static struct option long_options[] = {
   /* Input options */
   {"dir", required_argument, 0, 'D'},	/* user_genomedir */
   {"db", required_argument, 0, 'd'}, /* dbroot */
-  {"use-sarray", required_argument, 0, 0}, /* use_sarray_p, use_only_sarray_p */
+  {"use-sarray", required_argument, 0, 0}, /* use_sarray_p, use_only_sarray_p, user_set_sarray_p */
   {"kmer", required_argument, 0, 'k'}, /* required_index1part, index1part */
   {"sampling", required_argument, 0, 0}, /* required_index1interval, index1interval */
   {"genomefull", no_argument, 0, 'G'}, /* uncompressedp */
@@ -518,6 +540,7 @@ static struct option long_options[] = {
 #ifdef HAVE_MMAP
   {"batch", required_argument, 0, 'B'}, /* offsetsstrm_access, positions_access, genome_access */
 #endif
+  {"speed", required_argument, 0, 0},	       /* speed_level, user_speed_level_p */
   {"expand-offsets", required_argument, 0, 0}, /* expand_offsets_p */
   {"pairmax-dna", required_argument, 0, 0}, /* pairmax_dna */
   {"pairmax-rna", required_argument, 0, 0}, /* pairmax_rna */
@@ -530,8 +553,8 @@ static struct option long_options[] = {
   {"query-unk-mismatch", required_argument, 0, 0}, /* query_unk_mismatch_p */
   {"genome-unk-mismatch", required_argument, 0, 0}, /* genome_unk_mismatch_p */
 
-  {"trim-mismatch-score", required_argument, 0, 0}, /* trim_mismatch_score */
-  {"trim-indel-score", required_argument, 0, 0}, /* trim_indel_score */
+  {"trim-mismatch-score", required_argument, 0, 0}, /* trim_mismatch_score, user_trim_mismatch_score_p */
+  {"trim-indel-score", required_argument, 0, 0}, /* trim_indel_score, user_trim_mismatch_score_p */
   {"novelsplicing", required_argument, 0, 'N'}, /* novelsplicingp */
   {"find-dna-chimeras", required_argument, 0, 0}, /* find_dna_chimeras */
 
@@ -557,8 +580,10 @@ static struct option long_options[] = {
   {"novelend-splicedist", required_argument, 0, 0}, /* shortsplicedist_novelend */
   {"splicingdir", required_argument, 0, 0},	  /* user_splicingdir */
   {"use-splicing", required_argument, 0, 's'}, /* splicing_iit, knownsplicingp, find_dna_chimeras_p */
+  {"transcript-splicing", required_argument, 0, 'r'}, /* splicing_iit, knownsplicingp, transcript_splicing_p */
   {"ambig-splice-noclip", no_argument, 0, 0},  /* amb_clip_p */
   {"genes", required_argument, 0, 'g'}, /* genes_iit */
+  {"genestruct", required_argument, 0, 0}, /* genestruct_iit */
   {"favor-multiexon", no_argument, 0, 0}, /* favor_multiexon_p */
   {"end-detail", required_argument, 0, 0}, /* end_detail */
 
@@ -591,7 +616,6 @@ static struct option long_options[] = {
   {"max-gmap-pairsearch", required_argument, 0, 0}, /* max_gmap_pairsearch */
   {"max-gmap-terminal", required_argument, 0, 0}, /* max_gmap_terminal */
   {"max-gmap-improvement", required_argument, 0, 0}, /* max_gmap_improvement */
-  {"microexon-spliceprob", required_argument, 0, 0}, /* microexon_spliceprob */
   {"stage2-start", required_argument, 0, 0},	     /* suboptimal_score_start */
   {"stage2-end", required_argument, 0, 0},	     /* suboptimal_score_end */
 
@@ -634,6 +658,9 @@ static struct option long_options[] = {
 
   {"order-among-best", required_argument, 0, 0}, /* want_random_p */
 
+  {"omit-concordant-uniq", no_argument, 0, 0}, /* omit_concordant_uniq_p */
+  {"omit-concordant-mult", no_argument, 0, 0}, /* omit_concordant_mult_p */
+
   /* Diagnostic options */
   {"time", no_argument, 0, 0},	/* timingp */
   {"unload", no_argument, 0, 0},	/* unloadp */
@@ -716,7 +743,7 @@ print_program_version () {
 #endif
   fprintf(stdout,"\n");
 
-  fprintf(stdout,"SIMD functions:");
+  fprintf(stdout,"SIMD functions compiled:");
 #ifdef HAVE_ALTIVEC
   fprintf(stdout," Altivec");
 #endif
@@ -741,8 +768,11 @@ print_program_version () {
 #ifdef HAVE_SSE4_2
   fprintf(stdout," SSE4.2");
 #endif
-#ifdef HAVE_AVX
-  fprintf(stdout," AVX");
+#ifdef HAVE_AVX2
+  fprintf(stdout," AVX2");
+#endif
+#ifdef HAVE_AVX512
+  fprintf(stdout," AVX512");
 #endif
   fprintf(stdout,"\n");
 
@@ -858,7 +888,7 @@ check_compiler_assumptions () {
 
 
 static Filestring_T
-process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+process_request (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
 		 double *worker_runtime, Request_T request, Floors_T *floors_array,
 		 Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
 		 Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
@@ -900,7 +930,7 @@ process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
 				     /*keep_floors_p*/true);
 
     result = Result_single_read_new(jobid,(void **) stage3array,npaths_primary,npaths_altloc,first_absmq,second_absmq);
-    fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+    fp = Output_filestring_fromresult(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
     *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
     Result_free(&result);
     return fp;
@@ -919,7 +949,7 @@ process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
     /* Paired or concordant hits found */
     result = Result_paired_read_new(jobid,(void **) stage3pairarray,npaths_primary,npaths_altloc,first_absmq,second_absmq,
 				    final_pairtype);
-    fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+    fp = Output_filestring_fromresult(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
     *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
     Result_free(&result);
     return fp;
@@ -929,7 +959,7 @@ process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
     /* Report ends as unpaired */
     result = Result_paired_as_singles_new(jobid,(void **) stage3array5,npaths5_primary,npaths5_altloc,first_absmq5,second_absmq5,
 					  (void **) stage3array3,npaths3_primary,npaths3_altloc,first_absmq3,second_absmq3);
-    fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+    fp = Output_filestring_fromresult(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
     *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
     Result_free(&result);
     return fp;
@@ -967,7 +997,7 @@ process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
 					    (void **) stage3array3,npaths3_primary,npaths3_altloc,first_absmq3,second_absmq3);
     }
 
-    fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+    fp = Output_filestring_fromresult(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
     *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch);
     Result_free(&result);
     return fp;
@@ -1055,7 +1085,7 @@ static void
 single_thread () {
   Floors_T *floors_array;
   Request_T request;
-  Filestring_T fp, fp_failedinput_1, fp_failedinput_2;
+  Filestring_T fp, fp_failedinput, fp_failedinput_1, fp_failedinput_2;
   Shortread_T queryseq1;
   int i;
   Stopwatch_T worker_stopwatch;
@@ -1111,7 +1141,7 @@ single_thread () {
 #endif
 
     TRY
-      fp = process_request(&fp_failedinput_1,&fp_failedinput_2,&worker_runtime,
+      fp = process_request(&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2,&worker_runtime,
 			   request,floors_array,oligoindices_major,oligoindices_minor,
 			   pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch);
       if (timingp == true) {
@@ -1144,7 +1174,7 @@ single_thread () {
     RERAISE;
     END_TRY;
 
-    Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+    Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 
     if (jobid % POOL_FREE_INTERVAL == 0) {
       Pairpool_free_memory(pairpool);
@@ -1217,7 +1247,7 @@ static void *
 worker_thread (void *data) {
   Floors_T *floors_array;
   Request_T request;
-  Filestring_T fp, fp_failedinput_1, fp_failedinput_2;
+  Filestring_T fp, fp_failedinput, fp_failedinput_1, fp_failedinput_2;
   Shortread_T queryseq1;
   int i;
   Stopwatch_T worker_stopwatch;
@@ -1287,7 +1317,7 @@ worker_thread (void *data) {
 #endif
 
     TRY
-      fp = process_request(&fp_failedinput_1,&fp_failedinput_2,&worker_runtime,
+      fp = process_request(&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2,&worker_runtime,
 			   request,floors_array,oligoindices_major,oligoindices_minor,
 			   pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch);
       if (timingp == true) {
@@ -1320,7 +1350,7 @@ worker_thread (void *data) {
     RERAISE;
     END_TRY;
 
-    Outbuffer_put_filestrings(outbuffer,fp,fp_failedinput_1,fp_failedinput_2);
+    Outbuffer_put_filestrings(outbuffer,fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 
     if (worker_jobid % POOL_FREE_INTERVAL == 0) {
       Pairpool_free_memory(pairpool);
@@ -1433,7 +1463,7 @@ add_gmap_mode (char *string) {
     gmap_mode = 0;
     return 0;
   } else if (!strcmp(string,"all")) {
-    gmap_mode = (GMAP_IMPROVEMENT | GMAP_TERMINAL | GMAP_INDEL_KNOWNSPLICE | GMAP_PAIRSEARCH);
+    gmap_mode = (GMAP_IMPROVEMENT | GMAP_TERMINAL | GMAP_PAIRSEARCH);
     return 1;
   } else {
     if (!strcmp(string,"improve")) {
@@ -1441,12 +1471,12 @@ add_gmap_mode (char *string) {
     } else if (!strcmp(string,"terminal")) {
       gmap_mode |= GMAP_TERMINAL;
     } else if (!strcmp(string,"indel_knownsplice")) {
-      gmap_mode |= GMAP_INDEL_KNOWNSPLICE;
+      fprintf(stderr,"--gmap-mode indel_knownsplice now obsolete.  Ignoring\n");
     } else if (!strcmp(string,"pairsearch")) {
       gmap_mode |= GMAP_PAIRSEARCH;
     } else {
       fprintf(stderr,"Don't recognize gmap-mode type %s\n",string);
-      fprintf(stderr,"Allowed values are: none, all, improve, terminal, indel_knownsplice, pairsearch\n");
+      fprintf(stderr,"Allowed values are: none, all, improve, terminal, pairsearch\n");
       exit(9);
     }
     return 1;
@@ -1627,7 +1657,7 @@ parse_command_line (int argc, char *argv[], int optind) {
   fprintf(stderr,"\n");
 
   while ((opt = getopt_long(argc,argv,
-			    "D:d:k:Gq:o:a:N:M:m:i:y:Y:z:Z:w:E:e:J:K:l:g:s:V:v:B:t:A:j:0n:QO",
+			    "D:d:k:Gq:o:a:N:M:m:i:y:Y:z:Z:w:E:e:J:K:l:g:r:s:V:v:B:t:A:j:0n:QO",
 			    long_options, &long_option_index)) != -1) {
     switch (opt) {
     case 0:
@@ -1644,6 +1674,7 @@ parse_command_line (int argc, char *argv[], int optind) {
 
 #ifdef LARGE_GENOMES
       } else if (!strcmp(long_name,"use-sarray")) {
+	user_set_sarray_p = true;
 	if (!strcmp(optarg,"0")) {
 	  use_sarray_p = false;
 	  use_only_sarray_p = false;
@@ -1654,6 +1685,7 @@ parse_command_line (int argc, char *argv[], int optind) {
 
 #else
       } else if (!strcmp(long_name,"use-sarray")) {
+	user_set_sarray_p = true;
 	if (!strcmp(optarg,"2")) {
 	  use_sarray_p = true;
 	  use_only_sarray_p = true;
@@ -1695,6 +1727,21 @@ parse_command_line (int argc, char *argv[], int optind) {
 	  return 9;
 	}
 
+      } else if (!strcmp(long_name,"speed")) {
+	user_set_speed_level_p = true;
+	if (!strcmp(optarg,"4")) {
+	  speed_level = 4;
+	} else if (!strcmp(optarg,"3")) {
+	  speed_level = 3;
+	} else if (!strcmp(optarg,"2")) {
+	  speed_level = 2;
+	} else if (!strcmp(optarg,"1")) {
+	  speed_level = 1;
+	} else {
+	  fprintf(stderr,"--speed flag must be 1, 2, 3, or 4\n");
+	  return 9;
+	}
+
       } else if (!strcmp(long_name,"sampling")) {
 	required_index1interval = atoi(check_valid_int(optarg));
 
@@ -1752,6 +1799,9 @@ parse_command_line (int argc, char *argv[], int optind) {
 	  exit(9);
 	}
 
+      } else if (!strcmp(long_name,"genestruct")) {
+	genestruct_file = optarg;
+
       } else if (!strcmp(long_name,"tallydir")) {
 	user_tallydir = optarg;
 
@@ -1793,9 +1843,6 @@ parse_command_line (int argc, char *argv[], int optind) {
       } else if (!strcmp(long_name,"max-gmap-improvement")) {
 	max_gmap_improvement = atoi(check_valid_int(optarg));
 
-      } else if (!strcmp(long_name,"microexon-spliceprob")) {
-	microexon_spliceprob = check_valid_float(optarg,long_name);
-
       } else if (!strcmp(long_name,"stage2-start")) {
 	/* No longer used by stage 2 */
 	suboptimal_score_start = atoi(check_valid_int(optarg));
@@ -1958,9 +2005,11 @@ parse_command_line (int argc, char *argv[], int optind) {
 
       } else if (!strcmp(long_name,"trim-mismatch-score")) {
 	trim_mismatch_score = atoi(check_valid_int(optarg));
+	user_trim_mismatch_score_p = true;
 
       } else if (!strcmp(long_name,"trim-indel-score")) {
 	trim_indel_score = atoi(check_valid_int(optarg));
+	user_trim_indel_score_p = true;
 
       } else if (!strcmp(long_name,"distant-splice-identity")) {
 	min_distantsplicing_identity = check_valid_float(optarg,long_name);
@@ -2073,6 +2122,12 @@ parse_command_line (int argc, char *argv[], int optind) {
 	} else {
 	  nofailsp = true;
 	}
+
+      } else if (!strcmp(long_name,"omit-concordant-uniq")) {
+	omit_concordant_uniq_p = true;
+      } else if (!strcmp(long_name,"omit-concordant-mult")) {
+	omit_concordant_mult_p = true;
+
       } else {
 	/* Shouldn't reach here */
 	fprintf(stderr,"Don't recognize option %s.  For usage, run 'gsnap --help'",long_name);
@@ -2173,9 +2228,16 @@ parse_command_line (int argc, char *argv[], int optind) {
 
     case 'g': genes_file = optarg; break;
 
+    case 'r':
+      splicing_file = optarg;
+      knownsplicingp = true;
+      transcript_splicing_p = true;
+      break;
+
     case 's':
       splicing_file = optarg;
       knownsplicingp = true;
+      transcript_splicing_p = false;
       break;
 
     case 'V': user_snpsdir = optarg; break;
@@ -2319,36 +2381,56 @@ parse_command_line (int argc, char *argv[], int optind) {
     return 9;
   }
 
-  if (novelsplicingp == true && knownsplicingp == true) {
+
+  if (find_dna_chimeras_p == true) {
+    /* DNA-Seq, but need to trim to find chimeras */
+    fprintf(stderr,"Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic)\n");
+    pairmax_linear = pairmax_dna;
+    pairmax_circular = pairmax_dna;
+    shortsplicedist = shortsplicedist_known = 0U;
+    shortsplicedist_novelend = 0U;
+
+  } else if (novelsplicingp == true && knownsplicingp == true) {
     fprintf(stderr,"Novel splicing (-N) and known splicing (-s) both turned on => assume reads are RNA-Seq\n");
-    find_dna_chimeras_p = false;
+    /* find_dna_chimeras_p = false; */
     pairmax_linear = pairmax_rna;
     pairmax_circular = pairmax_dna;
     shortsplicedist_known = shortsplicedist;
 
   } else if (knownsplicingp == true) {
     fprintf(stderr,"Known splicing (-s) turned on => assume reads are RNA-Seq\n");
-    find_dna_chimeras_p = false;
+    /* find_dna_chimeras_p = false; */
     pairmax_linear = pairmax_rna;
     pairmax_circular = pairmax_dna;
     shortsplicedist_known = shortsplicedist;
 
   } else if (novelsplicingp == true) {
     fprintf(stderr,"Novel splicing (-N) turned on => assume reads are RNA-Seq\n");
-    find_dna_chimeras_p = false;
+    /* find_dna_chimeras_p = false; */
     pairmax_linear = pairmax_rna;
     pairmax_circular = pairmax_dna;
     shortsplicedist_known = 0;
 
   } else {
-    /* Appears to be DNA-Seq */
+    /* Straight DNA-Seq */
     fprintf(stderr,"Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic)\n");
     pairmax_linear = pairmax_dna;
     pairmax_circular = pairmax_dna;
     shortsplicedist = shortsplicedist_known = 0U;
     shortsplicedist_novelend = 0U;
+
+#if 0
+    /* This ignores datasets where sequence ends are indeed bad, or have primers.  User can set values to 0 if desired */
+    if (user_trim_mismatch_score_p == false) {
+      trim_mismatch_score = 0;
+    }
+    if (user_trim_indel_score_p == false) {
+      trim_indel_score = 0;
+    }
+#endif
   }
 
+
   if (shortsplicedist_novelend > shortsplicedist) {
     fprintf(stderr,"The novelend-splicedist %d is greater than the localsplicedist %d.  Resetting novelend-splicedist to be %d\n",
 	    shortsplicedist_novelend,shortsplicedist,shortsplicedist);
@@ -2384,6 +2466,29 @@ parse_command_line (int argc, char *argv[], int optind) {
     }
   }
 
+  if (user_set_speed_level_p == true) {
+    if (user_set_sarray_p == true) {
+      fprintf(stderr,"Note: --speed value takes precedence over --use-sarray value\n");
+    }
+    if (speed_level == 4) {
+      use_sarray_p = true;
+      use_only_sarray_p = true;
+      require_completeset_p = false;
+    } else if (speed_level == 3) {
+      use_sarray_p = true;
+      use_only_sarray_p = false;
+      require_completeset_p = false;
+    } else if (speed_level == 2) {
+      use_sarray_p = true;
+      use_only_sarray_p = false;
+      require_completeset_p = true;
+    } else {
+      use_sarray_p = false;
+      use_only_sarray_p = false;
+      require_completeset_p = true;
+    }
+  }
+
 #ifdef USE_MPI
   /* Code does allow for MPI output to stdout, but appears not to work
      yet, and may not work if rank 0 is also a worker */
@@ -3064,19 +3169,50 @@ worker_setup (char *genomesubdir, char *fileroot) {
     genes_divint_crosstable = Univ_IIT_divint_crosstable(chromosome_iit,genes_iit);
   }
 
+  if (genestruct_file != NULL) {
+    if ((genestruct_iit = IIT_read(genestruct_file,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
+				   /*divstring*/NULL,/*add_iit_p*/true)) != NULL) {
+      fprintf(stderr,"Reading genestruct file %s locally...",genestruct_file);
+    } else {
+      mapdir = Datadir_find_mapdir(/*user_mapdir*/NULL,genomesubdir,fileroot);
+      iitfile = (char *) CALLOC(strlen(mapdir)+strlen("/")+
+				strlen(genestruct_file)+1,sizeof(char));
+      sprintf(iitfile,"%s/%s",mapdir,genestruct_file);
+      if ((genestruct_iit = IIT_read(iitfile,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
+				     /*divstring*/NULL,/*add_iit_p*/true)) != NULL) {
+	fprintf(stderr,"Reading genestruct file %s...",iitfile);
+	FREE(iitfile);
+	FREE(mapdir);
+      } else {
+	fprintf(stderr,"Genes file %s.iit not found locally or in %s.  Available files:\n",genestruct_file,mapdir);
+	Datadir_list_directory(stderr,mapdir);
+	fprintf(stderr,"Either install file %s or specify a full directory path\n",genestruct_file);
+	exit(9);
+      }
+    }
+  }
+
 
   if (splicing_file != NULL) {
     if (user_splicingdir == NULL) {
       if ((splicing_iit = IIT_read(splicing_file,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
 				   /*divstring*/NULL,/*add_iit_p*/true)) != NULL) {
-	fprintf(stderr,"Reading splicing file %s locally...",splicing_file);
+	if (transcript_splicing_p == true) {
+	  fprintf(stderr,"Reading transcript splicing file %s locally...",splicing_file);
+	} else {
+	  fprintf(stderr,"Reading splicing file %s locally...",splicing_file);
+	}
       }
     } else {
       iitfile = (char *) CALLOC(strlen(user_splicingdir)+strlen("/")+strlen(splicing_file)+1,sizeof(char));
       sprintf(iitfile,"%s/%s",user_splicingdir,splicing_file);
       if ((splicing_iit = IIT_read(iitfile,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
 				   /*divstring*/NULL,/*add_iit_p*/true)) != NULL) {
-	fprintf(stderr,"Reading splicing file %s...",iitfile);
+	if (transcript_splicing_p == true) {
+	  fprintf(stderr,"Reading transcript splicing file %s...",iitfile);
+	} else {
+	  fprintf(stderr,"Reading splicing file %s...",iitfile);
+	}
 	FREE(iitfile);
       }
     }
@@ -3088,7 +3224,11 @@ worker_setup (char *genomesubdir, char *fileroot) {
       sprintf(iitfile,"%s/%s",mapdir,splicing_file);
       if ((splicing_iit = IIT_read(iitfile,/*name*/NULL,/*readonlyp*/true,/*divread*/READ_ALL,
 				   /*divstring*/NULL,/*add_iit_p*/true)) != NULL) {
-	fprintf(stderr,"Reading splicing file %s...",iitfile);
+	if (transcript_splicing_p == true) {
+	  fprintf(stderr,"Reading transcript splicing file %s...",iitfile);
+	} else {
+	  fprintf(stderr,"Reading splicing file %s...",iitfile);
+	}
 	FREE(iitfile);
 	FREE(mapdir);
       } else {
@@ -3275,10 +3415,10 @@ worker_setup (char *genomesubdir, char *fileroot) {
   Genome_setup(genomecomp,genomecomp_alt,mode,circular_typeint);
 #ifndef LARGE_GENOMES
   if (sarray_fwd != NULL && sarray_rev != NULL) {
-    Sarray_setup(sarray_fwd,sarray_rev,genomecomp,mode,chromosome_iit,circular_typeint,circularp,
-		 shortsplicedist,localsplicing_penalty,
-		 max_deletionlength,max_end_deletions,max_middle_insertions,max_end_insertions,
-		 splicesites,splicetypes,splicedists,nsplicesites);
+    Sarray_search_setup(sarray_fwd,sarray_rev,genomecomp,mode,chromosome_iit,circular_typeint,circularp,
+			shortsplicedist,localsplicing_penalty,
+			min_intronlength,max_deletionlength,max_end_deletions,max_middle_insertions,max_end_insertions,
+			splicesites,splicetypes,splicedists,nsplicesites);
   }
 #endif
 
@@ -3330,12 +3470,14 @@ worker_setup (char *genomesubdir, char *fileroot) {
 		   /*snpp*/snps_iit ? true : false,amb_closest_p,amb_clip_p,min_shortend);
   Splice_setup(min_shortend);
   Indel_setup(min_indel_end_matches,indel_penalty_middle);
-  Stage1hr_setup(use_sarray_p,use_only_sarray_p,index1part,index1interval,spansize,max_anchors,chromosome_iit,nchromosomes,
+  Stage1hr_setup(use_sarray_p,use_only_sarray_p,require_completeset_p,
+		 index1part,index1interval,spansize,max_anchors,chromosome_iit,nchromosomes,
 		 genomecomp,genomecomp_alt,mode,maxpaths_search,
 		 splicesites,splicetypes,splicedists,nsplicesites,
 		 novelsplicingp,knownsplicingp,find_dna_chimeras_p,distances_observed_p,
 		 subopt_levels,min_indel_end_matches,max_middle_insertions,max_middle_deletions,
 		 shortsplicedist,shortsplicedist_known,shortsplicedist_novelend,min_intronlength,
+		 expected_pairlength,pairlength_deviation,
 		 min_distantsplicing_end_matches,min_distantsplicing_identity,
 		 nullgap,maxpeelback,maxpeelback_distalmedial,
 		 extramaterial_end,extramaterial_paired,gmap_mode,
@@ -3353,20 +3495,25 @@ worker_setup (char *genomesubdir, char *fileroot) {
 		 genes_iit,genes_divint_crosstable,
 		 tally_iit,tally_divint_crosstable,runlength_iit,runlength_divint_crosstable,
 		 distances_observed_p,pairmax_linear,pairmax_circular,
-		 expected_pairlength,pairlength_deviation,
+		 expected_pairlength,pairlength_deviation,maxpeelback,
 		 localsplicing_penalty,indel_penalty_middle,antistranded_penalty,
 		 favor_multiexon_p,gmap_min_nconsecutive,end_detail,subopt_levels,
 		 max_middle_insertions,max_middle_deletions,
 		 novelsplicingp,shortsplicedist,
-		 merge_samechr_p,circularp,altlocp,alias_starts,alias_ends,failedinput_root,print_m8_p,want_random_p);
+		 merge_samechr_p,circularp,altlocp,alias_starts,alias_ends,
+		 omit_concordant_uniq_p,omit_concordant_mult_p,
+		 failedinput_root,print_m8_p,want_random_p);
   SAM_setup(add_paired_nomappers_p,paired_flag_means_concordant_p,
+	    omit_concordant_uniq_p,omit_concordant_mult_p,
 	    quiet_if_excessive_p,maxpaths_report,failedinput_root,fastq_format_p,hide_soft_clips_p,
-	    clip_overlap_p,merge_overlap_p,sam_multiple_primaries_p,
-	    force_xs_direction_p,md_lowercase_variant_p,snps_iit,chromosome_iit,genomecomp);
+	    clip_overlap_p,merge_overlap_p,merge_samechr_p,sam_multiple_primaries_p,
+	    force_xs_direction_p,md_lowercase_variant_p,snps_iit,find_dna_chimeras_p,splicing_iit,
+	    donor_typeint,acceptor_typeint,transcript_splicing_p,genestruct_iit,
+	    chromosome_iit,genomecomp);
   Output_setup(chromosome_iit,nofailsp,failsonlyp,quiet_if_excessive_p,maxpaths_report,
 	       failedinput_root,quality_shift,
 	       output_sam_p,print_m8_p,invert_first_p,invert_second_p,
-	       merge_samechr_p,sam_read_group_id);
+	       sam_read_group_id);
 
   return;
 }
@@ -3447,6 +3594,10 @@ worker_cleanup () {
     IIT_free(&splicing_iit);
   }
 
+  if (genestruct_iit != NULL) {
+    IIT_free(&genestruct_iit);
+  }
+
   if (genes_iit != NULL) {
     FREE(genes_divint_crosstable);
     IIT_free(&genes_iit);
@@ -4003,7 +4154,8 @@ Usage: gsnap [OPTIONS...] <FASTA file>, or\n\
                                    Allowed values: 0 (no), 1 (yes, plus GSNAP/GMAP algorithm, default),\n\
                                    or 2 (yes, and use only suffix array algorithm).\n\
                                    Note that suffix arrays will bias against SNP alleles in\n\
-                                   SNP-tolerant alignment.\n\
+                                   SNP-tolerant alignment.  If there is a conflict between this flag\n\
+                                   and the flag --speed, the --speed flag takes precedence\n\
   -k, --kmer=INT                 kmer size to use in genome database (allowed values: 16 or less)\n\
                                    If not specified, the program will find the highest available\n\
                                    kmer size in the genome database\n\
@@ -4060,6 +4212,7 @@ Usage: gsnap [OPTIONS...] <FASTA file>, or\n\
 
   /* Computation options */
   fprintf(stdout,"Computation options\n");
+#if 0
   fprintf(stdout,"\
 \n\
   Note: GSNAP has an ultrafast algorithm for calculating mismatches up to and including\n\
@@ -4069,6 +4222,22 @@ Also, indels, especially end indels, take longer to compute, although the algori
 is still designed to be fast.\n\
 \n\
 ");
+#endif
+  fprintf(stdout,"\
+  --speed=INT                    Speed mode (default = 3)\n\
+                                 Mode     Suffix array   Hash table\n\
+                                   4      On             Off\n\
+                                   3      On             Only if suffix array yields incomplete answers\n\
+                                   2      On             In addition to suffix array\n\
+                                   1      Off            Yes\n\
+\n\
+                                 Note: There is a tradeoff between speed and accuracy, so slower speed\n\
+                                 can give better answers.  Levels 1 and 2 are about the same, while level 3\n\
+                                 is about 4 times faster, and then level 4 is 5 times faster than level 3\n\
+                                 However, accuracy of level 3 is better than level 4 and almost the same as\n\
+                                 level 2, so mode 3 is generally recommended and the default.  If there is a\n\
+                                 conflict between this value and the flag --use-sarray, this takes precedence\n\
+");
 #ifdef HAVE_MMAP
   fprintf(stdout,"\
   -B, --batch=INT                Batch mode (default = 2)\n\
@@ -4193,14 +4362,16 @@ is still designed to be fast.\n\
                                    from paired-end reads if they appear to be present.\n\
 ");
   fprintf(stdout,"\
-  --trim-mismatch-score=INT      Score to use for mismatches when trimming at ends (default is %d;\n\
-                                   to turn off trimming, specify 0).  Warning: turning trimming off\n\
-                                   will give false positive mismatches at the ends of reads\n\
+  --trim-mismatch-score=INT      Score to use for mismatches when trimming at ends.  To turn off trimming,\n\
+                                   specify 0.  Default is %d for both RNA-Seq and DNA-Seq.  Warning:\n\
+                                   Turning trimming off in RNA-Seq can give false positive mismatches\n\
+                                   at the ends of reads\n\
 ",trim_mismatch_score);
   fprintf(stdout,"\
-  --trim-indel-score=INT         Score to use for indels when trimming at ends (default is %d;\n\
-                                   to turn off trimming, specify 0).  Warning: turning trimming off\n\
-                                   will give false positive indels at the ends of reads\n\
+  --trim-indel-score=INT         Score to use for indels when trimming at ends.  To turn off trimming,\n\
+                                   specify 0.  Default is %d for both RNA-Seq and DNA-Seq.  Warning:\n\
+                                   Turning trimming off in RNA-Seq can give false positive indels\n\
+                                   at the ends of reads\n\
 ",trim_indel_score);
 
   fprintf(stdout,"\
@@ -4269,9 +4440,9 @@ is still designed to be fast.\n\
   fprintf(stdout,"Options for GMAP alignment within GSNAP\n");
   fprintf(stdout,"\
   --gmap-mode=STRING             Cases to use GMAP for complex alignments containing multiple splices or indels\n\
-                                 Allowed values: none, all, pairsearch, indel_knownsplice, terminal, improve\n\
+                                 Allowed values: none, all, pairsearch, terminal, improve\n\
                                    (or multiple values, separated by commas).\n\
-                                   Default: all, i.e., pairsearch,indel_knownsplice,terminal,improve\n\
+                                   Default: all, i.e., pairsearch,terminal,improve\n\
 ");
   fprintf(stdout,"\
   --trigger-score-for-gmap=INT   Try GMAP pairsearch on nearby genomic regions if best score (the total\n\
@@ -4295,10 +4466,6 @@ is still designed to be fast.\n\
   --max-gmap-improvement=INT     Perform GMAP improvement on nearby genomic regions up to this many\n\
                                    candidate ends (default %d).  Requires improve in --gmap-mode\n\
 ",max_gmap_improvement);
-  fprintf(stdout,"\
-  --microexon-spliceprob=FLOAT   Allow microexons only if one of the splice site probabilities is\n\
-                                   greater than this value (default %.2f)\n\
-",microexon_spliceprob);
   fprintf(stdout,"\n");
 
 
diff --git a/src/gsnap_select.c b/src/gsnap_select.c
index 788a29a..7f1985d 100644
--- a/src/gsnap_select.c
+++ b/src/gsnap_select.c
@@ -45,6 +45,16 @@ main (int argc, char *argv[]) {
     /* Depend on path */
     /* Cannot use file_exists_p, since it won't search PATH */
 
+    if (avx512_support_p == true) {
+      new_argv[0] = "gsnap.avx512";
+      if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
+	fprintf(stderr,"Note: %s does not exist.  For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]);
+      } else {
+	free(new_argv);
+	return rc;
+      }
+    }
+
     if (avx2_support_p == true) {
       new_argv[0] = "gsnap.avx2";
       if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
@@ -105,6 +115,20 @@ main (int argc, char *argv[]) {
   } else {
     dir = dirname(argv[0]);
 
+    if (avx512_support_p == true) {
+      new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.avx512") + 1) * sizeof(char));
+      sprintf(new_argv[0],"%s/gsnap.avx512",dir);
+      if (file_exists_p(new_argv[0]) == false) {
+	fprintf(stderr,"Note: %s does not exist.  For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]);
+	free(new_argv[0]);
+      } else {
+	rc = execvp(new_argv[0],new_argv);
+	free(new_argv[0]);
+	free(new_argv);
+	return rc;
+      }
+    }
+
     if (avx2_support_p == true) {
       new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.avx2") + 1) * sizeof(char));
       sprintf(new_argv[0],"%s/gsnap.avx2",dir);
diff --git a/src/gsnapl_select.c b/src/gsnapl_select.c
index d2d20dc..5fba060 100644
--- a/src/gsnapl_select.c
+++ b/src/gsnapl_select.c
@@ -45,6 +45,16 @@ main (int argc, char *argv[]) {
     /* Depend on path */
     /* Cannot use file_exists_p, since it won't search PATH */
 
+    if (avx512_support_p == true) {
+      new_argv[0] = "gsnapl.avx512";
+      if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
+	fprintf(stderr,"Note: %s does not exist.  For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]);
+      } else {
+	free(new_argv);
+	return rc;
+      }
+    }
+
     if (avx2_support_p == true) {
       new_argv[0] = "gsnapl.avx2";
       if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) {
@@ -105,6 +115,20 @@ main (int argc, char *argv[]) {
   } else {
     dir = dirname(argv[0]);
 
+    if (avx512_support_p == true) {
+      new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.avx512") + 1) * sizeof(char));
+      sprintf(new_argv[0],"%s/gsnapl.avx512",dir);
+      if (file_exists_p(new_argv[0]) == false) {
+	fprintf(stderr,"Note: %s does not exist.  For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]);
+	free(new_argv[0]);
+      } else {
+	rc = execvp(new_argv[0],new_argv);
+	free(new_argv[0]);
+	free(new_argv);
+	return rc;
+      }
+    }
+
     if (avx2_support_p == true) {
       new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.avx2") + 1) * sizeof(char));
       sprintf(new_argv[0],"%s/gsnapl.avx2",dir);
diff --git a/src/iit-read.c b/src/iit-read.c
index 6ee0e2f..d20bcc7 100644
--- a/src/iit-read.c
+++ b/src/iit-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read.c 186665 2016-03-29 21:51:34Z twu $";
+static char rcsid[] = "$Id: iit-read.c 207317 2017-06-14 19:32:26Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -39,8 +39,6 @@ static char rcsid[] = "$Id: iit-read.c 186665 2016-03-29 21:51:34Z twu $";
 #include "mem.h"
 #include "access.h"
 #include "fopen.h"
-#include "uintlist.h"
-#include "intlist.h"
 
 /* Note: if sizeof(int) or sizeof(unsigned int) are not 4, then the below code is faulty */
 
@@ -2814,7 +2812,8 @@ IIT_load (char *memory, char *name) {
 
   if (new->total_nintervals != 0) {
     /* Need to use Univ_IIT_read instead */
-    fprintf(stderr,"Unexpected error.  Using IIT_read code on a version 1 IIT\n");
+    fprintf(stderr,"Unexpected error in IIT_load.  First int is %d.  Using IIT_read code on a version 1 IIT\n",
+	    new->total_nintervals);
     abort();
 
   } else {
@@ -3098,7 +3097,8 @@ IIT_read (char *filename, char *name, bool readonlyp, Divread_T divread, char *d
 
   if (new->total_nintervals != 0) {
     /* Need to use Univ_IIT_read instead */
-    fprintf(stderr,"Unexpected error.  Using IIT_read code on a version 1 IIT\n");
+    fprintf(stderr,"Unexpected error in IIT_read of %s.  First int is %d.  Using IIT_read code on a version 1 IIT\n",
+	    filename,new->total_nintervals);
     abort();
 
   } else {
@@ -4450,6 +4450,7 @@ IIT_get_lows_signed (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, i
     }
 
     if (nfound == 0) {
+      FREE(matches);
       return (int *) NULL;
     } else {
       /* Eliminate duplicates */
@@ -4465,6 +4466,7 @@ IIT_get_lows_signed (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y, i
 	}
       }
       debug(printf("\n"));
+      FREE(matches);
 
       /* No need to check for interval overlap */
     }
@@ -4522,6 +4524,7 @@ IIT_get_highs_signed (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y,
     }
 
     if (nfound == 0) {
+      FREE(matches);
       return (int *) NULL;
     } else {
       /* Eliminate duplicates */
@@ -4537,6 +4540,7 @@ IIT_get_highs_signed (int *nmatches, T this, int divno, Chrpos_T x, Chrpos_T y,
 	}
       }
       debug(printf("\n"));
+      FREE(matches);
 
       /* No need to check for interval overlap */
     }
@@ -6652,3 +6656,807 @@ IIT_gene_overlap (T map_iit, int divno, Chrpos_T x, Chrpos_T y, bool favor_multi
 }
 
 
+Chrpos_T
+IIT_genestruct_chrpos (char *strand, char **divstring, char **gene,
+		       T map_iit, char *transcript, int querypos) {
+  Interval_T interval0;
+  int index0;
+  Chrpos_T exonstart0, exonend0, exonlength;
+  char *annot, *restofheader, *p;
+  bool allocp = false;
+
+
+  if ((index0 = IIT_find_one(map_iit,transcript)) < 0) {
+    fprintf(stderr,"Could not find transcript %s in genes map\n",transcript);
+    return (Chrpos_T) 0;
+  } else {
+    *divstring = IIT_divstring_from_index(map_iit,index0);
+    interval0 = &(map_iit->intervals[0][index0-1]);
+    annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
+  }
+
+
+  /* Get gene from header */
+  p = annot;
+  while (*p != '\0' && *p != '\n' && *p != ' ') {
+    p++;
+  }
+  *gene = (char *) MALLOC((p - annot + 1)*sizeof(char));
+  strncpy(*gene,annot,p - annot);
+  (*gene)[p - annot] = '\0';
+  while (*p != '\0' && *p != '\n') {
+    p++;
+  }
+  if (*p == '\n') p++;
+
+
+  if (Interval_sign(interval0) > 0) {
+    *strand = '+';
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
+	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+	abort();
+      } else {
+	exonlength = exonend0 - exonstart0 + 1;
+	if (exonlength < (Chrpos_T) querypos) {
+	  querypos -= exonlength;
+	} else {
+	  if (allocp) {
+	    FREE(restofheader);
+	  }
+	  return exonstart0 + querypos - 1; /* Because both exonstart0 and querypos are 1-based */
+	}
+      }
+
+      /* Advance to the next exon */
+      while (*p != '\0' && *p != '\n') p++;
+      if (*p == '\n') p++;
+    }
+
+  } else {
+    *strand = '-';
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
+	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+	abort();
+      } else {
+	exonlength = exonstart0 - exonend0 + 1;
+	if (exonlength < (Chrpos_T) querypos) {
+	  querypos -= exonlength;
+	} else {
+	  if (allocp) {
+	    FREE(restofheader);
+	  }
+	  return exonstart0 - querypos + 1; /* Because both exonstart and querypos are 1-based */
+	}
+      }
+
+      /* Advance to the next exon */
+      while (*p != '\0' && *p != '\n') p++;
+      if (*p == '\n') p++;
+    }
+  }
+
+  if (allocp) {
+    FREE(restofheader);
+  }
+
+  fprintf(stderr,"querypos is too long\n");
+  return (Chrpos_T) 0;
+}
+
+
+bool
+IIT_gene_overlapp (T map_iit, int index, Chrpos_T x, Chrpos_T y) {
+  Chrpos_T exonstart, exonend;
+  int observed_genestrand;
+  char *annot, *restofheader, *p;
+  bool allocp = false;
+
+  observed_genestrand = IIT_interval_sign(map_iit,index);
+  annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
+
+  /* Skip header */
+  p = annot;
+  while (*p != '\0' && *p != '\n') {
+    p++;
+  }
+  if (*p == '\n') p++;
+    
+  if (observed_genestrand > 0) {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
+	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+	abort();
+      } else {
+	/* Advance to next exon */
+	while (*p != '\0' && *p != '\n') p++;
+	if (*p == '\n') p++;
+	  
+	if (exonend < x) {
+	  /* No overlap */
+	} else if (exonstart > y) {
+	  /* No overlap */
+	} else {
+	  if (allocp) FREE(annot);
+	  return true;
+	}
+      }
+	
+    }
+      
+  } else {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u",&exonstart,&exonend) != 2) {
+	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+	abort();
+      } else {
+	/* Advance to next exon */
+	while (*p != '\0' && *p != '\n') p++;
+	if (*p == '\n') p++;
+
+	if (exonstart < x) {
+	  /* No overlap */
+	} else if (exonend > y) {
+	  /* No overlap */
+	} else {
+	  if (allocp) FREE(annot);
+	  return true;
+	}
+      }
+    }
+  }
+  
+  if (allocp) FREE(annot);
+  return false;
+}
+
+
+/* Can handle only genes with the same direction as the given gene */
+Intlist_T
+IIT_unique_positions (T map_iit, int index0, int divno) {
+  Intlist_T uniques = (Intlist_T) NULL;
+  int nunique;
+  Interval_T interval0;
+  int *matches, index;
+  int nmatches, i;
+  Chrpos_T exonstart0, exonend0, exonstart, exonend, pos;
+  char *annot, *restofheader, *p, *q;
+  char **pointers;
+  int npointers, ptri;
+  bool allocp = false;
+  bool uniquep;
+
+
+  interval0 = &(map_iit->intervals[0][index0-1]);
+  matches = IIT_get_signed_with_divno(&nmatches,map_iit,divno,Interval_low(interval0),Interval_high(interval0),
+				      /*sortp*/false,Interval_sign(interval0));
+  if (nmatches == 0) {
+    /* No overlapping genes found */
+    pointers = (char **) NULL;
+    npointers = 0;
+  } else {
+    pointers = (char **) MALLOC(nmatches * sizeof(char *));
+    npointers = 0;
+    for (i = 0; i < nmatches; i++) {
+      index = matches[i];
+      if (index != index0) {
+	annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
+	
+	/* Skip header */
+	p = annot;
+	while (*p != '\0' && *p != '\n') {
+	  p++;
+	}
+	if (*p == '\n') p++;
+	
+	pointers[npointers++] = p;
+      }
+    }
+    FREE(matches);
+  }
+
+  annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
+  /* Skip header */
+  p = annot;
+  while (*p != '\0' && *p != '\n') {
+    p++;
+  }
+  if (*p == '\n') p++;
+
+  nunique = -1;
+  if (Interval_sign(interval0) > 0) {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
+	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+	abort();
+      } else {
+	if (nunique >= 0) {
+	  uniques = Intlist_push(uniques,nunique);
+	}
+	nunique = 0;
+	
+	for (pos = exonstart0; pos <= exonend0; pos++) {
+	  uniquep = true;
+	  for (ptri = 0; ptri < npointers; ptri++) {
+	    q = pointers[ptri];
+	    if (*q == '\0') {
+	      /* Skip */
+	      exonstart = exonend = -1U;
+	    } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
+	      fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
+	      abort();
+	    }
+
+	    /* Advance to appropriate exon if necessary */
+	    while (pos > exonend) {
+	      while (*q != '\0' && *q != '\n') q++;
+	      if (*q == '\n') q++;
+
+	      if (*q == '\0') {
+		exonstart = exonend = -1U;
+	      } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
+		fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
+		abort();
+	      }
+	    }
+
+	    if (pos >= exonstart && pos <= exonend) {
+	      uniquep = false;
+	    }
+	      
+	    pointers[ptri] = q;
+	  }
+	  if (uniquep == true) {
+	    nunique += 1;
+	  }
+	}
+
+	/* Advance to the next exon */
+	while (*p != '\0' && *p != '\n') p++;
+	if (*p == '\n') p++;
+      }
+    }
+
+  } else {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
+	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+	abort();
+      } else {
+	if (nunique >= 0) {
+	  uniques = Intlist_push(uniques,nunique);
+	}
+	nunique = 0;
+	
+	for (pos = exonstart0; pos >= exonend0; --pos) {
+	  uniquep = true;
+	  for (ptri = 0; ptri < npointers; ptri++) {
+	    q = pointers[ptri];
+	    if (*q == '\0') {
+	      /* Skip */
+	      exonstart = exonend = 0;
+	    } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
+	      fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
+	      abort();
+	    }
+
+	    /* Advance to appropriate exon if necessary */
+	    while (pos < exonend) {
+	      while (*q != '\0' && *q != '\n') q++;
+	      if (*q == '\n') q++;
+
+	      if (*q == '\0') {
+		exonstart = exonend = 0;
+	      } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
+		fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
+		abort();
+	      }
+	    }
+
+	    if (pos <= exonstart && pos >= exonend) {
+	      uniquep = false;
+	    }
+	      
+	    pointers[ptri] = q;
+	  }
+	  if (uniquep == true) {
+	    nunique += 1;
+	  }
+	}
+
+	/* Advance to the next exon */
+	while (*p != '\0' && *p != '\n') p++;
+	if (*p == '\n') p++;
+      }
+    }
+  }
+
+
+  if (nunique >= 0) {
+    uniques = Intlist_push(uniques,nunique);
+  }
+
+  FREE(pointers);
+  return Intlist_reverse(uniques);
+}
+
+
+/* Needed for a second round of gene expression assignment */
+Intlist_T
+IIT_unique_positions_given_others (T map_iit, int index0, int *matches, int nmatches) {
+  Intlist_T uniques = (Intlist_T) NULL;
+  int nunique;
+  Interval_T interval0;
+  int index;
+  int i;
+  Chrpos_T exonstart0, exonend0, exonstart, exonend, pos;
+  char *annot, *restofheader, *p, *q;
+  char **pointers;
+  int npointers, ptri;
+  bool allocp = false;
+  bool uniquep;
+
+
+  interval0 = &(map_iit->intervals[0][index0-1]);
+
+  pointers = MALLOC(nmatches * sizeof(char *));
+  npointers = 0;
+  for (i = 0; i < nmatches; i++) {
+    index = matches[i];
+    if (index != index0) {
+      annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
+      
+      /* Skip header */
+      p = annot;
+      while (*p != '\0' && *p != '\n') {
+	p++;
+      }
+      if (*p == '\n') p++;
+
+      pointers[npointers++] = p;
+    }
+  }
+  /* FREE(matches); */
+
+  annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
+  /* Skip header */
+  p = annot;
+  while (*p != '\0' && *p != '\n') {
+    p++;
+  }
+  if (*p == '\n') p++;
+
+  nunique = -1;
+  if (Interval_sign(interval0) > 0) {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
+	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+	abort();
+      } else {
+	if (nunique >= 0) {
+	  uniques = Intlist_push(uniques,nunique);
+	}
+	nunique = 0;
+	
+	for (pos = exonstart0; pos <= exonend0; pos++) {
+	  uniquep = true;
+	  for (ptri = 0; ptri < npointers; ptri++) {
+	    q = pointers[ptri];
+	    if (*q == '\0') {
+	      /* Skip */
+	      exonstart = exonend = -1U;
+	    } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
+	      fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
+	      abort();
+	    }
+
+	    /* Advance to appropriate exon if necessary */
+	    while (pos > exonend) {
+	      while (*q != '\0' && *q != '\n') q++;
+	      if (*q == '\n') q++;
+
+	      if (*q == '\0') {
+		exonstart = exonend = -1U;
+	      } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
+		fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
+		abort();
+	      }
+	    }
+
+	    if (pos >= exonstart && pos <= exonend) {
+	      uniquep = false;
+	    }
+	      
+	    pointers[ptri] = q;
+	  }
+	  if (uniquep == true) {
+	    nunique += 1;
+	  }
+	}
+
+	/* Advance to the next exon */
+	while (*p != '\0' && *p != '\n') p++;
+	if (*p == '\n') p++;
+      }
+    }
+
+  } else {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u",&exonstart0,&exonend0) != 2) {
+	fprintf(stderr,"Can't parse exon coordinates in %s\n",p);
+	abort();
+      } else {
+	if (nunique >= 0) {
+	  uniques = Intlist_push(uniques,nunique);
+	}
+	nunique = 0;
+	
+	for (pos = exonstart0; pos >= exonend0; --pos) {
+	  uniquep = true;
+	  for (ptri = 0; ptri < npointers; ptri++) {
+	    q = pointers[ptri];
+	    if (*q == '\0') {
+	      /* Skip */
+	      exonstart = exonend = 0;
+	    } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
+	      fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
+	      abort();
+	    }
+
+	    /* Advance to appropriate exon if necessary */
+	    while (pos < exonend) {
+	      while (*q != '\0' && *q != '\n') q++;
+	      if (*q == '\n') q++;
+
+	      if (*q == '\0') {
+		exonstart = exonend = 0;
+	      } else if (sscanf(q,"%u %u",&exonstart,&exonend) != 2) {
+		fprintf(stderr,"Can't parse exon coordinates in %s\n",q);
+		abort();
+	      }
+	    }
+
+	    if (pos <= exonstart && pos >= exonend) {
+	      uniquep = false;
+	    }
+	      
+	    pointers[ptri] = q;
+	  }
+	  if (uniquep == true) {
+	    nunique += 1;
+	  }
+	}
+
+	/* Advance to the next exon */
+	while (*p != '\0' && *p != '\n') p++;
+	if (*p == '\n') p++;
+      }
+    }
+  }
+
+
+  if (nunique >= 0) {
+    uniques = Intlist_push(uniques,nunique);
+  }
+
+  FREE(pointers);
+  return Intlist_reverse(uniques);
+}
+
+
+/* Can handle only genes with the same direction as the given gene */
+/* Values or either 1 (unique) or 0 (not unique) */
+Intlist_T
+IIT_unique_splicep (T map_iit, int index0, int divno) {
+  Intlist_T uniques = (Intlist_T) NULL;
+  Interval_T interval0;
+  int *matches, index;
+  int nmatches, i;
+  Chrpos_T exonstart0, intronstart0, intronend0, exonend0,
+    exonstart, intronstart, intronend, exonend;
+  char *annot, *restofheader, *p, *q;
+  char **pointers;
+  int npointers, ptri;
+  bool allocp = false;
+  bool uniquep, firstp;
+
+
+  interval0 = &(map_iit->intervals[0][index0-1]);
+  matches = IIT_get_signed_with_divno(&nmatches,map_iit,divno,Interval_low(interval0),Interval_high(interval0),
+				      /*sortp*/false,Interval_sign(interval0));
+  if (nmatches == 0) {
+    /* No overlapping genes found */
+    pointers = (char **) NULL;
+    npointers = 0;
+  } else {
+    pointers = (char **) MALLOC(nmatches * sizeof(char *));
+    npointers = 0;
+    for (i = 0; i < nmatches; i++) {
+      index = matches[i];
+      if (index != index0) {
+	annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
+      
+	/* Skip header */
+	p = annot;
+	while (*p != '\0' && *p != '\n') {
+	  p++;
+	}
+	if (*p == '\n') p++;
+
+	pointers[npointers++] = p;
+      }
+    }
+    FREE(matches);
+  }
+
+  annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
+  /* Skip header */
+  p = annot;
+  while (*p != '\0' && *p != '\n') {
+    p++;
+  }
+  if (*p == '\n') p++;
+
+  firstp = true;
+  if (Interval_sign(interval0) > 0) {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
+	/* Passed last intron */
+	while (*p != '\0') p++;
+      } else {
+	if (firstp == false) {
+	  uniques = Intlist_push(uniques,(int) uniquep);
+	}
+	firstp = false;
+	
+	uniquep = true;
+	for (ptri = 0; ptri < npointers; ptri++) {
+	  q = pointers[ptri];
+	  if (*q == '\0') {
+	    /* Skip */
+	    intronstart = intronend = -1U;
+	  } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
+	    /* Passed last intron */
+	    intronstart = intronend = 0;
+	    while (*q != '\0') q++;
+	  }
+
+	  /* Advance to appropriate exon if necessary */
+	  while (intronstart0 > intronstart) {
+	    while (*q != '\0' && *q != '\n') q++;
+	    if (*q == '\n') q++;
+	    
+	    if (*q == '\0') {
+	      intronstart = intronend = -1U;
+	    } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
+	      intronstart = intronend = 0;
+	      while (*q != '\0') q++;
+	    }
+	  }
+
+	  if (intronstart == intronstart0 && intronend == intronend0) {
+	    uniquep = false;
+	  }
+	      
+	  pointers[ptri] = q;
+	}
+      }
+
+      /* Advance to the next exon */
+      while (*p != '\0' && *p != '\n') p++;
+      if (*p == '\n') p++;
+    }
+
+  } else {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
+	/* Passed last intron */
+	while (*p != '\0') p++;
+      } else {
+	if (firstp == false) {
+	  uniques = Intlist_push(uniques,(int) uniquep);
+	}
+	firstp = false;
+	
+	uniquep = true;
+	for (ptri = 0; ptri < npointers; ptri++) {
+	  q = pointers[ptri];
+	  if (*q == '\0') {
+	    /* Skip */
+	    intronstart = intronend = 0;
+	  } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
+	    /* Passed last intron */
+	    intronstart = intronend = 0;
+	    while (*q != '\0') q++;
+	  }
+
+	  /* Advance to appropriate exon if necessary */
+	  while (intronstart0 < intronstart) {
+	    while (*q != '\0' && *q != '\n') q++;
+	    if (*q == '\n') q++;
+
+	    if (*q == '\0') {
+	      intronstart = intronend = 0;
+	    } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
+	      intronstart = intronend = 0;
+	      while (*q != '\0') q++;
+	    }
+	  }
+
+	  if (intronstart == intronstart0 && intronend == intronend0) {
+	    uniquep = false;
+	  }
+	      
+	  pointers[ptri] = q;
+	}
+      }
+
+      /* Advance to the next exon */
+      while (*p != '\0' && *p != '\n') p++;
+      if (*p == '\n') p++;
+    }
+  }
+
+  if (firstp == false) {
+    uniques = Intlist_push(uniques,(int) uniquep);
+  }
+
+  FREE(pointers);
+  return Intlist_reverse(uniques);
+}
+
+
+/* Can handle only genes with the same direction as the given gene */
+/* Values or either 1 (unique) or 0 (not unique) */
+Intlist_T
+IIT_unique_splicep_given_others (T map_iit, int index0, int *matches, int nmatches) {
+  Intlist_T uniques = (Intlist_T) NULL;
+  Interval_T interval0;
+  int index;
+  int i;
+  Chrpos_T exonstart0, intronstart0, intronend0, exonend0,
+    exonstart, intronstart, intronend, exonend;
+  char *annot, *restofheader, *p, *q;
+  char **pointers;
+  int npointers, ptri;
+  bool allocp = false;
+  bool uniquep, firstp;
+
+
+  interval0 = &(map_iit->intervals[0][index0-1]);
+
+  pointers = MALLOC(nmatches * sizeof(char *));
+  npointers = 0;
+  for (i = 0; i < nmatches; i++) {
+    index = matches[i];
+    if (index != index0) {
+      annot = IIT_annotation(&restofheader,map_iit,index,&allocp);
+      
+      /* Skip header */
+      p = annot;
+      while (*p != '\0' && *p != '\n') {
+	p++;
+      }
+      if (*p == '\n') p++;
+
+      pointers[npointers++] = p;
+    }
+  }
+  /* FREE(matches); */
+
+  annot = IIT_annotation(&restofheader,map_iit,index0,&allocp);
+  /* Skip header */
+  p = annot;
+  while (*p != '\0' && *p != '\n') {
+    p++;
+  }
+  if (*p == '\n') p++;
+
+  firstp = true;
+  if (Interval_sign(interval0) > 0) {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
+	/* Passed last intron */
+	while (*p != '\0') p++;
+      } else {
+	if (firstp == false) {
+	  uniques = Intlist_push(uniques,(int) uniquep);
+	}
+	firstp = false;
+	
+	uniquep = true;
+	for (ptri = 0; ptri < npointers; ptri++) {
+	  q = pointers[ptri];
+	  if (*q == '\0') {
+	    /* Skip */
+	    intronstart = intronend = -1U;
+	  } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
+	    /* Passed last intron */
+	    intronstart = intronend = 0;
+	    while (*q != '\0') q++;
+	  }
+
+	  /* Advance to appropriate exon if necessary */
+	  while (intronstart0 > intronstart) {
+	    while (*q != '\0' && *q != '\n') q++;
+	    if (*q == '\n') q++;
+	    
+	    if (*q == '\0') {
+	      intronstart = intronend = -1U;
+	    } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
+	      intronstart = intronend = 0;
+	      while (*q != '\0') q++;
+	    }
+	  }
+
+	  if (intronstart == intronstart0 && intronend == intronend0) {
+	    uniquep = false;
+	  }
+	      
+	  pointers[ptri] = q;
+	}
+      }
+
+      /* Advance to the next exon */
+      while (*p != '\0' && *p != '\n') p++;
+      if (*p == '\n') p++;
+    }
+
+  } else {
+    while (*p != '\0') {
+      if (sscanf(p,"%u %u\n%u %u",&exonstart0,&intronstart0,&intronend0,&exonend0) != 4) {
+	/* Passed last intron */
+	while (*p != '\0') p++;
+      } else {
+	if (firstp == false) {
+	  uniques = Intlist_push(uniques,(int) uniquep);
+	}
+	firstp = false;
+	
+	uniquep = true;
+	for (ptri = 0; ptri < npointers; ptri++) {
+	  q = pointers[ptri];
+	  if (*q == '\0') {
+	    /* Skip */
+	    intronstart = intronend = 0;
+	  } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
+	    /* Passed last intron */
+	    intronstart = intronend = 0;
+	    while (*q != '\0') q++;
+	  }
+
+	  /* Advance to appropriate exon if necessary */
+	  while (intronstart0 < intronstart) {
+	    while (*q != '\0' && *q != '\n') q++;
+	    if (*q == '\n') q++;
+
+	    if (*q == '\0') {
+	      intronstart = intronend = 0;
+	    } else if (sscanf(q,"%u %u\n%u %u",&exonstart,&intronstart,&intronend,&exonend) != 4) {
+	      intronstart = intronend = 0;
+	      while (*q != '\0') q++;
+	    }
+	  }
+
+	  if (intronstart == intronstart0 && intronend == intronend0) {
+	    uniquep = false;
+	  }
+	      
+	  pointers[ptri] = q;
+	}
+      }
+
+      /* Advance to the next exon */
+      while (*p != '\0' && *p != '\n') p++;
+      if (*p == '\n') p++;
+    }
+  }
+
+  if (firstp == false) {
+    uniques = Intlist_push(uniques,(int) uniquep);
+  }
+
+  FREE(pointers);
+  return Intlist_reverse(uniques);
+}
diff --git a/src/iit-read.h b/src/iit-read.h
index 6bb1dca..2629d96 100644
--- a/src/iit-read.h
+++ b/src/iit-read.h
@@ -1,4 +1,4 @@
-/* $Id: iit-read.h 184169 2016-02-12 19:35:45Z twu $ */
+/* $Id: iit-read.h 207317 2017-06-14 19:32:26Z twu $ */
 #ifndef IIT_READ_INCLUDED
 #define IIT_READ_INCLUDED
 #ifdef HAVE_CONFIG_H
@@ -8,6 +8,7 @@
 #include <stdio.h>
 #include "bool.h"
 #include "uintlist.h"
+#include "intlist.h"
 #include "list.h"
 #include "interval.h"
 #include "types.h"
@@ -224,6 +225,22 @@ IIT_print_header (Filestring_T fp, T this, int *matches, int nmatches,
 
 extern Overlap_T
 IIT_gene_overlap (T map_iit, int divno, Chrpos_T x, Chrpos_T y, bool favor_multiexon_p);
+extern bool
+IIT_gene_overlapp (T map_iit, int index, Chrpos_T x, Chrpos_T y);
+
+extern Chrpos_T
+IIT_genestruct_chrpos (char *strand, char **divstring, char **gene,
+		       T map_iit, char *transcript, int querypos);
+
+extern Intlist_T
+IIT_unique_positions (T map_iit, int index0, int divno);
+extern Intlist_T
+IIT_unique_positions_given_others (T map_iit, int index0, int *matches, int nmatches);
+extern Intlist_T
+IIT_unique_splicep (T map_iit, int index0, int divno);
+extern Intlist_T
+IIT_unique_splicep_given_others (T map_iit, int index0, int *matches, int nmatches);
+
 
 #undef T
 #endif
diff --git a/src/iit-write.c b/src/iit-write.c
index 453e75a..2f579f9 100644
--- a/src/iit-write.c
+++ b/src/iit-write.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-write.c 184463 2016-02-18 00:08:41Z twu $";
+static char rcsid[] = "$Id: iit-write.c 207854 2017-06-29 20:33:52Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -197,7 +197,8 @@ node_select (int *index, Chrpos_T *value, int i, int j,
 
 /* Makes node out of sigmas[i..j], and recurses. */
 static Node_T
-Node_make (int *nnodes, int i, int j, int *sigmas, int *omegas, struct Interval_T *intervals) {
+Node_make (int *nnodes, int i, int j, int *sigmas, int *omegas, struct Interval_T *intervals,
+	   bool presortedp) {
   Node_T node;
   int lambda, iota;
   int q, r;
@@ -236,7 +237,7 @@ Node_make (int *nnodes, int i, int j, int *sigmas, int *omegas, struct Interval_
     for (lambda = q+1; lambda <= r; lambda++) {
       sigmas[lambda] = omegas[lambda];
     }
-    Interval_qsort_by_omega(omegas,q+1,r,intervals);
+    Interval_qsort_by_omega(omegas,q+1,r,intervals,presortedp);
     node->a = q + 1;
     node->b = r;
 
@@ -248,8 +249,8 @@ Node_make (int *nnodes, int i, int j, int *sigmas, int *omegas, struct Interval_
     assert(Node_is_valid_output (node, i, j, sigmas, omegas, intervals));
 
     /* recurse */
-    node->left  = Node_make(&(*nnodes),i,q,sigmas,omegas,intervals);
-    node->right = Node_make(&(*nnodes),r+1,j,sigmas,omegas,intervals);
+    node->left  = Node_make(&(*nnodes),i,q,sigmas,omegas,intervals,presortedp);
+    node->right = Node_make(&(*nnodes),r+1,j,sigmas,omegas,intervals,presortedp);
     
     return node;
   }
@@ -269,7 +270,7 @@ Node_index (Node_T node, int *index) {
 
 
 static int
-IIT_count_nnodes (List_T intervallist) {
+IIT_count_nnodes (List_T intervallist, bool presortedp) {
   int nnodes;
   Node_T root;
   int nintervals, i;
@@ -294,12 +295,12 @@ IIT_count_nnodes (List_T intervallist) {
     }
 
     /* Sort sigmas with respect to Interval_array_low */
-    Interval_qsort_by_sigma(sigmas,1,nintervals,intervals);
+    Interval_qsort_by_sigma(sigmas,1,nintervals,intervals,presortedp);
     
     omegas = (int *) CALLOC(nintervals+1,sizeof(int));
 
     /* make first node, and recurse... */
-    root = Node_make(&nnodes,1,nintervals,sigmas,omegas,intervals);
+    root = Node_make(&nnodes,1,nintervals,sigmas,omegas,intervals,presortedp);
 
     Node_gc(&root);
     FREE(omegas);
@@ -312,7 +313,7 @@ IIT_count_nnodes (List_T intervallist) {
 
 static void
 IIT_build_one_div (Node_T *root, struct Interval_T **intervals, int **alphas, int **betas, int **sigmas, int **omegas,
-		   int *nnodes, List_T intervallist, int nintervals) {
+		   int *nnodes, List_T intervallist, int nintervals, bool presortedp) {
   int index = 0;		/* Must be initialized to 0 */
   int i;
   List_T p;
@@ -333,8 +334,8 @@ IIT_build_one_div (Node_T *root, struct Interval_T **intervals, int **alphas, in
   for (i = 1; i <= nintervals; i++) {
     (*alphas)[i] = (*betas)[i] = i;
   }
-  Interval_qsort_by_sigma(*alphas,1,nintervals,*intervals);
-  Interval_qsort_by_omega(*betas,1,nintervals,*intervals);
+  Interval_qsort_by_sigma(*alphas,1,nintervals,*intervals,presortedp);
+  Interval_qsort_by_omega(*betas,1,nintervals,*intervals,presortedp);
 
 
   /* IIT ordering of intervals */
@@ -345,12 +346,12 @@ IIT_build_one_div (Node_T *root, struct Interval_T **intervals, int **alphas, in
   }
 
   /* Sort sigmas with respect to Interval_array_low */
-  Interval_qsort_by_sigma(*sigmas,1,nintervals,*intervals);
+  Interval_qsort_by_sigma(*sigmas,1,nintervals,*intervals,presortedp);
 
   *omegas = (int *) CALLOC(nintervals+1,sizeof(int));
 
   /* make first node, and recurse... */
-  *root = Node_make(&(*nnodes),1,nintervals,*sigmas,*omegas,*intervals);
+  *root = Node_make(&(*nnodes),1,nintervals,*sigmas,*omegas,*intervals,presortedp);
   Node_index(*root,&index);
 
   return;
@@ -1485,7 +1486,7 @@ IIT_write (char *iitfile, List_T divlist, List_T typelist, List_T fieldlist,
       divstring = (char *) List_head(d);
       intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
       nintervals[divno] = List_length(intervallist);
-      nnodes[divno] = IIT_count_nnodes(intervallist);
+      nnodes[divno] = IIT_count_nnodes(intervallist,/*presortedp*/false);
     }
 
     cum_nintervals = (int *) CALLOC(ndivs+1,sizeof(int));
@@ -1524,7 +1525,8 @@ IIT_write (char *iitfile, List_T divlist, List_T typelist, List_T fieldlist,
       } else {
 	fprintf(stderr,"Processing division/chromosome %s...sorting...",divstring);
       }
-      IIT_build_one_div(&root,&intervals,&alphas,&betas,&sigmas,&omegas,&nnodes_one_div,intervallist,nintervals[divno]);
+      IIT_build_one_div(&root,&intervals,&alphas,&betas,&sigmas,&omegas,&nnodes_one_div,intervallist,nintervals[divno],
+			/*presortedp*/false);
 
       fprintf(stderr,"writing...");
       IIT_write_one_div(fp,root,alphas,betas,sigmas,omegas,nintervals[divno],version);
@@ -1557,7 +1559,8 @@ IIT_write (char *iitfile, List_T divlist, List_T typelist, List_T fieldlist,
 /* If annotlist is NULL, X's are written */
 T
 IIT_create (List_T divlist, List_T typelist, List_T fieldlist, Table_T intervaltable,
-	    Table_T labeltable, Table_T datatable, Sorttype_T divsort, int version) {
+	    Table_T labeltable, Table_T datatable, Sorttype_T divsort, int version,
+	    bool presortedp) {
   T new;
   Node_T root;
   List_T intervallist, d;
@@ -1574,7 +1577,7 @@ IIT_create (List_T divlist, List_T typelist, List_T fieldlist, Table_T intervalt
     divstring = (char *) List_head(d);
     intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
     nintervals[divno] = List_length(intervallist);
-    nnodes[divno] = IIT_count_nnodes(intervallist);
+    nnodes[divno] = IIT_count_nnodes(intervallist,presortedp);
   }
 
   cum_nintervals = (int *) CALLOC(ndivs+1,sizeof(int));
@@ -1611,8 +1614,8 @@ IIT_create (List_T divlist, List_T typelist, List_T fieldlist, Table_T intervalt
   for (d = divlist, divno = 0; d != NULL; d = List_next(d), divno++) {
     divstring = (char *) List_head(d);
     intervallist = (List_T) Table_get(intervaltable,(void *) divstring);
-
-    IIT_build_one_div(&root,&intervals,&alphas,&betas,&sigmas,&omegas,&nnodes_one_div,intervallist,nintervals[divno]);
+    IIT_build_one_div(&root,&intervals,&alphas,&betas,&sigmas,&omegas,&nnodes_one_div,intervallist,nintervals[divno],
+		      presortedp);
     IIT_create_one_div(new,divno,root,alphas,betas,sigmas,omegas,nintervals[divno]);
 
     Node_gc(&root);
diff --git a/src/iit-write.h b/src/iit-write.h
index a974ad4..8332041 100644
--- a/src/iit-write.h
+++ b/src/iit-write.h
@@ -1,4 +1,4 @@
-/* $Id: iit-write.h 157221 2015-01-22 18:38:57Z twu $ */
+/* $Id: iit-write.h 207854 2017-06-29 20:33:52Z twu $ */
 #ifndef IIT_WRITE_INCLUDED
 #define IIT_WRITE_INCLUDED
 
@@ -18,7 +18,8 @@ IIT_write (char *iitfile, List_T divlist, List_T typelist, List_T fieldlist, Tab
 	   bool label_pointers_8p, bool annot_pointers_8p);
 extern T
 IIT_create (List_T divlist, List_T typelist, List_T fieldlist, Table_T intervaltable,
-	    Table_T labeltable, Table_T datatable, Sorttype_T divsort, int version);
+	    Table_T labeltable, Table_T datatable, Sorttype_T divsort, int version,
+	    bool presortedp);
 extern T
 IIT_new (List_T intervallist);
 extern void
diff --git a/src/iit_get.c b/src/iit_get.c
index 39f24c1..c1bd1f7 100644
--- a/src/iit_get.c
+++ b/src/iit_get.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit_get.c 184172 2016-02-12 19:41:14Z twu $";
+static char rcsid[] = "$Id: iit_get.c 206178 2017-05-11 20:31:41Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1191,7 +1191,7 @@ main (int argc, char *argv[]) {
 	FREE(divstring);
       }
       FREE(matches);
-      printf("%ld\n",total);
+      /* printf("%ld\n",total); -- Not sure why this was here */
       fprintf(stdout,"# End\n");
       fflush(stdout);
     }
diff --git a/src/indel.c b/src/indel.c
index 92ea133..7b3875d 100644
--- a/src/indel.c
+++ b/src/indel.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indel.c 197550 2016-09-08 01:15:16Z twu $";
+static char rcsid[] = "$Id: indel.c 204387 2017-03-18 00:02:54Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -9,6 +9,7 @@ static char rcsid[] = "$Id: indel.c 197550 2016-09-08 01:15:16Z twu $";
 #include "mem.h"
 #include "genome128_hr.h"
 #include "stage3hr.h"
+#include "intron.h"
 
 
 /* Indels */ 
@@ -212,18 +213,25 @@ Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j
 /* Called only by sarray-read.c, where plusp is always true */
 /* indels is negative here */
 int
-Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
+Indel_resolve_middle_deletion (int *best_introntype,
+			       int *best_nmismatches_i, int *best_nmismatches_j,
 			       Univcoord_T left, int indels, Compress_T query_compress,
 			       int querystart, int queryend, int querylength,
-			       int max_mismatches_allowed, bool plusp, int genestrand) {
+			       int max_mismatches_allowed, bool plusp, int genestrand,
+			       int min_intronlength) {
   int best_indel_pos = -1, indel_pos;
+  char *gbuffer;
 #ifdef DEBUG2
   int i;
-  char *gbuffer;
 #endif
   int nmismatches_left, nmismatches_right, nmismatches_lefti, nmismatches_righti;
   int best_sum, sum, lefti, righti;
   int *mismatch_positions_left, *mismatch_positions_right;
+  char left1, left2, right2, right1;
+  int introntype, intron_level, best_intron_level;
+
+  *best_introntype = NONINTRON;
+  best_intron_level = Intron_level(NONINTRON);
 
 #ifdef HAVE_ALLOCA
   if (querylength <= MAX_STACK_READLENGTH) {
@@ -246,13 +254,19 @@ Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
   /* left = ptr->diagonal - querylength; */
 
   assert(indels < 0);
-  debug2(gbuffer = (char *) CALLOC(querylength-indels+1,sizeof(char)));
-  debug2(Genome_fill_buffer_blocks(left,querylength-indels,gbuffer));
+#ifdef DEBUG2
+  gbuffer = (char *) CALLOC(querylength-indels+1,sizeof(char));
+  Genome_fill_buffer_blocks(left,querylength-indels,gbuffer);
+#else
+  if (-indels >= min_intronlength) {
+    gbuffer = (char *) CALLOC(querylength-indels+1,sizeof(char));
+    Genome_fill_buffer_blocks(left,querylength-indels,gbuffer);
+  }  
+#endif
   debug2(printf("solve_middle_indel, plus, deletion (indels %d), max_mismatches_allowed %d: Getting genome at diagonal - querylength %d = %llu\n",
 		indels,max_mismatches_allowed,querylength,(unsigned long long) left));
   debug2(printf("g1: %s\n",gbuffer));
   debug2(printf("g2: %s\n",&(gbuffer[-indels])));
-  debug2(FREE(gbuffer));
 
   /* No need to check chromosome bounds */
   nmismatches_left = Genome_mismatches_left(mismatch_positions_left,max_mismatches_allowed,
@@ -293,16 +307,44 @@ Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
       lefti--;
     }
     sum = righti + lefti + 1;
-    debug2(printf("  (Case C1) sum %d=%d+%d at indel_pos %d.",
-		  sum,righti,lefti+1,mismatch_positions_right[righti]+1));
-    if (sum <= best_sum) {
+
+    if (-indels >= min_intronlength) {
+      /* Account for introntype in cases of ties */
       indel_pos = mismatch_positions_right[righti] + 1;
-      if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
-	best_indel_pos = indel_pos;
-	nmismatches_righti = righti;
-	nmismatches_lefti = lefti + 1;
-	debug2(printf("**"));
-	best_sum = sum;
+      left1 = gbuffer[indel_pos];
+      left2 = gbuffer[indel_pos+1];
+      right2 = gbuffer[indel_pos-indels-2];
+      right1 = gbuffer[indel_pos-indels-1];
+      introntype = Intron_type(left1,left2,right2,right1,left1,left2,right2,right1,/*cdna_direction*/0);
+      intron_level = Intron_level(introntype);
+      debug2(printf("  (Case C1) sum %d=%d+%d at indel_pos %d (%c%c-%c%c, type %s).",
+		    sum,righti,lefti+1,mismatch_positions_right[righti]+1,
+		    left1,left2,right2,right1,Intron_type_string(introntype)));
+      if (sum < best_sum ||
+	  (sum == best_sum && intron_level > best_intron_level)) {
+	if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
+	  best_indel_pos = indel_pos;
+	  nmismatches_righti = righti;
+	  nmismatches_lefti = lefti + 1;
+	  debug2(printf("**"));
+	  best_sum = sum;
+	  *best_introntype = introntype;
+	  best_intron_level = intron_level;
+	}
+      }
+
+    } else {
+      debug2(printf("  (Case C1) sum %d=%d+%d at indel_pos %d.",
+		    sum,righti,lefti+1,mismatch_positions_right[righti]+1));
+      if (sum <= best_sum) {
+	indel_pos = mismatch_positions_right[righti] + 1;
+	if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
+	  best_indel_pos = indel_pos;
+	  nmismatches_righti = righti;
+	  nmismatches_lefti = lefti + 1;
+	  debug2(printf("**"));
+	  best_sum = sum;
+	}
       }
     }
     righti++;
@@ -318,26 +360,45 @@ Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
       righti--;
     }
     sum = lefti + righti + 1;
-    debug2(printf("  (Case C2) sum %d=%d+%d at indel_pos %d.",
-		  sum,lefti,righti+1,mismatch_positions_left[lefti]));
-    if (sum < best_sum) {
+
+    if (-indels >= min_intronlength) {
+      /* Account for introntype in cases of ties */
       indel_pos = mismatch_positions_left[lefti];
-      if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
-	best_indel_pos = indel_pos;
-	nmismatches_lefti = lefti;
-	nmismatches_righti = righti + 1;
-	debug2(printf("**"));
-	best_sum = sum;
+      left1 = gbuffer[indel_pos];
+      left2 = gbuffer[indel_pos+1];
+      right2 = gbuffer[indel_pos-indels-2];
+      right1 = gbuffer[indel_pos-indels-1];
+      introntype = Intron_type(left1,left2,right2,right1,left1,left2,right2,right1,/*cdna_direction*/0);
+      intron_level = Intron_level(introntype);
+      debug2(printf("  (Case C2) sum %d=%d+%d at indel_pos %d (%c%c-%c%c).",
+		    sum,lefti,righti+1,mismatch_positions_left[lefti],
+		    gbuffer[indel_pos],gbuffer[indel_pos+1],gbuffer[indel_pos-indels-2],gbuffer[indel_pos-indels-1]));
+      if (sum < best_sum ||
+	  (sum == best_sum && intron_level > best_intron_level) ||
+	  (sum == best_sum && intron_level == best_intron_level && indel_pos < best_indel_pos)) {
+	if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
+	  best_indel_pos = indel_pos;
+	  nmismatches_lefti = lefti;
+	  nmismatches_righti = righti + 1;
+	  debug2(printf("**"));
+	  best_sum = sum;
+	  *best_introntype = introntype;
+	  best_intron_level = intron_level;
+	}
       }
-    } else if (sum == best_sum) {
+
+    } else {
+      debug2(printf("  (Case C2) sum %d=%d+%d at indel_pos %d.",
+		    sum,lefti,righti+1,mismatch_positions_left[lefti]));
       indel_pos = mismatch_positions_left[lefti];
-      if (indel_pos < best_indel_pos) {
+      if (sum < best_sum ||
+	  (sum == best_sum && indel_pos < best_indel_pos)) {
 	if (indel_pos >= min_indel_end_matches && indel_pos <= querylength - min_indel_end_matches) {
 	  best_indel_pos = indel_pos;
 	  nmismatches_lefti = lefti;
 	  nmismatches_righti = righti + 1;
 	  debug2(printf("**"));
-	  /* best_sum = sum; */
+	  best_sum = sum;
 	}
       }
     }
@@ -345,6 +406,14 @@ Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
   }
   debug2(printf("\n"));
 
+#ifdef DEBUG2
+  FREE(gbuffer);
+#else
+  if (-indels >= min_intronlength) {
+    FREE(gbuffer);
+  }
+#endif
+
 #ifdef HAVE_ALLOCA
   if (querylength <= MAX_STACK_READLENGTH) {
     FREEA(mismatch_positions_left);
diff --git a/src/indel.h b/src/indel.h
index 055c5b7..36750f2 100644
--- a/src/indel.h
+++ b/src/indel.h
@@ -1,4 +1,4 @@
-/* $Id: indel.h 184464 2016-02-18 00:09:13Z twu $ */
+/* $Id: indel.h 204387 2017-03-18 00:02:54Z twu $ */
 #ifndef INDEL_INCLUDED
 #define INDEL_INCLUDED
 
@@ -19,10 +19,11 @@ Indel_resolve_middle_insertion (int *best_nmismatches_i, int *best_nmismatches_j
 				int max_mismatches_allowed, bool plusp, int genestrand);
 
 extern int
-Indel_resolve_middle_deletion (int *best_nmismatches_i, int *best_nmismatches_j,
+Indel_resolve_middle_deletion (int *best_introntype, int *best_nmismatches_i, int *best_nmismatches_j,
 			       Univcoord_T left, int indels, Compress_T query_compress,
 			       int querystart, int queryend, int querylength,
-			       int max_mismatches_allowed, bool plusp, int genestrand);
+			       int max_mismatches_allowed, bool plusp, int genestrand,
+			       int min_intronlength);
 
 
 extern List_T
diff --git a/src/indexdb.c b/src/indexdb.c
index 47c98a6..ef05ef1 100644
--- a/src/indexdb.c
+++ b/src/indexdb.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb.c 200235 2016-11-08 00:57:16Z twu $";
+static char rcsid[] = "$Id: indexdb.c 203557 2017-02-15 20:10:39Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1172,10 +1172,9 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
   Filenames_T filenames;
   Oligospace_T basespace, base;
 
-  Oligospace_T poly_T;
+  /* Oligospace_T poly_T; */
   /* Positionsptr_T ptr0; -- UINT8 or UINT4 */
-  Positionsptr_T end0;	/* UINT8 or UINT4 */
-  size_t filesize;
+  /* Positionsptr_T end0; -- UINT8 or UINT4 */
 
 #ifdef LARGE_GENOMES
   int offsetspages_fd;
@@ -1555,6 +1554,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
     if (multiple_sequences_p == false && unload_shared_memory_p == false) {
       new->positions_high = (unsigned char *) Access_mmap(&new->positions_high_fd,&new->positions_high_len,
 							  filenames->positions_high_filename,/*randomp*/false);
+      new->positions_high_access = MMAPPED;
     } else
 #endif
       if (sharedp == true) {
@@ -1643,6 +1643,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
 
 #ifdef HAVE_MMAP
   } else if (positions_access == USE_MMAP_PRELOAD) {
+
     if (snps_root) {
       fprintf(stderr,"Pre-loading %s (%s) positions, kmer %d, interval %d...",
 	      idx_filesuffix,snps_root,new->index1part,new->index1interval);
@@ -1657,6 +1658,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
       fprintf(stderr,"insufficient memory (will use disk file instead, but program will be slow)\n");
       new->positions_high_access = FILEIO;
     } else {
+      new->positions_high_access = MMAPPED;
       comma = Genomicpos_commafmt(new->positions_high_len);
       fprintf(stderr,"done (%s bytes",comma);
       FREE(comma);
@@ -1709,6 +1711,7 @@ Indexdb_new_genome (Width_T *index1part, Width_T *index1interval,
 	      filenames->positions_high_filename);
       new->positions_high_access = FILEIO;
     } else {
+      new->positions_high_access = MMAPPED;
       new->positions_low = (UINT4 *) Access_mmap(&new->positions_low_fd,&new->positions_low_len,
 						 filenames->positions_low_filename,/*randomp*/true);
       if (new->positions_low == NULL) {
@@ -1997,8 +2000,14 @@ Indexdb_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
   UINT4 *offsetsstrm;
   size_t offsetsmeta_len, offsetsstrm_len;
   Positionsptr_T *offsets = NULL;
-  Oligospace_T oligospace, oligoi;
+  Oligospace_T oligospace;
+#ifdef PMAP
+  Oligospace_T oligoi;
+#elif defined(LARGE_GENOMES)
+#else
+  Oligospace_T oligoi;
   Blocksize_T blocksize;
+#endif
 #ifdef HAVE_MMAP
   int offsetsmeta_fd, offsetsstrm_fd;
 #else
@@ -2012,7 +2021,9 @@ Indexdb_offsets_from_bitpack (char *offsetsmetafile, char *offsetsstrmfile,
 #else
   oligospace = power(4,index1part);
 #endif
+#if !defined(PMAP) && !defined(LARGE_GENOMES)
   blocksize = 64; /* Used to be determined by 4^(kmer - basesize), but now fixed at 64 */
+#endif
 
 
 #ifdef HAVE_MMAP
diff --git a/src/indexdb_hr.c b/src/indexdb_hr.c
index 63bb08a..231790c 100644
--- a/src/indexdb_hr.c
+++ b/src/indexdb_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb_hr.c 184203 2016-02-13 03:46:21Z twu $";
+static char rcsid[] = "$Id: indexdb_hr.c 205966 2017-05-04 00:48:54Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -13,11 +13,16 @@ static char rcsid[] = "$Id: indexdb_hr.c 184203 2016-02-13 03:46:21Z twu $";
 #include <pthread.h>
 #endif
 
+#if defined(HAVE_SSE4_1)
+#include <smmintrin.h>
+#endif
+
 #include "indexdb_hr.h"
 #include "indexdbdef.h"
 #include "genome128_hr.h"
 #include "bitpack64-read.h"
 #include "bitpack64-readtwo.h"
+#include "merge.h"
 
 
 #ifdef WORDS_BIGENDIAN
@@ -32,6 +37,7 @@ static char rcsid[] = "$Id: indexdb_hr.c 184203 2016-02-13 03:46:21Z twu $";
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>		/* For memcpy */
+#include "assert.h"
 #include "mem.h"
 #include "listdef.h"
 
@@ -204,1617 +210,1622 @@ check_heap_even (Batch_T *heap, int heapsize) {
 }
 #endif
 
-#define READ_THEN_WRITE 1
-
 #ifdef LARGE_GENOMES
 static unsigned char sentinel_position_high = (unsigned char) -1;
 static UINT4 sentinel_position_low = (UINT4) -1;
 #endif
 
 
-static Univcoord_T *
-merge_batches_one_heap_16_existing (int *nmerged, struct Batch_T *batchpool, int nentries, int diagterm) {
-  Univcoord_T *positions, *ptr, position, last_position, this_position;
-  struct Batch_T sentinel_struct;
-  Batch_T batch, sentinel, heap[17];
-  int heapsize;
-  unsigned int i;
-#ifdef READ_THEN_WRITE
-  unsigned int smallesti_1, smallesti_2, smallesti;
+
+/************************************************************************
+ *  The following positions functions are taken from indexdb.c
+ ************************************************************************/
+
+#ifndef LARGE_GENOMES
+static void
+positions_move_absolute (int positions_fd, Positionsptr_T ptr) {
+  off_t offset = ptr*((off_t) sizeof(Univcoord_T));
+
+  if (lseek(positions_fd,offset,SEEK_SET) < 0) {
+    fprintf(stderr,"Attempted to do lseek on offset %u*%d=%zd\n",
+	    ptr,(int) sizeof(Univcoord_T),offset);
+    perror("Error in indexdb.c, positions_move_absolute_4");
+    exit(9);
+  }
+  return;
+}
+
+static void
+positions_read_multiple (int positions_fd, Univcoord_T *values, int n) {
+  int i;
+  Univcoord_T value;
+  unsigned char buffer[4];
+
+#ifdef WORDS_BIGENDIAN
+  /* Need to keep in bigendian format */
+  for (i = 0; i < n; i++) {
+    read(positions_fd,buffer,4);
+
+    value = (buffer[0] & 0xff);
+    value <<= 8;
+    value |= (buffer[1] & 0xff);
+    value <<= 8;
+    value |= (buffer[2] & 0xff);
+    value <<= 8;
+    value |= (buffer[3] & 0xff);
+
+    values[i] = value;
+  }
 #else
-  unsigned int parenti, smallesti;
+  for (i = 0; i < n; i++) {
+    read(positions_fd,buffer,4);
+
+    value = (buffer[3] & 0xff);
+    value <<= 8;
+    value |= (buffer[2] & 0xff);
+    value <<= 8;
+    value |= (buffer[1] & 0xff);
+    value <<= 8;
+    value |= (buffer[0] & 0xff);
+
+    values[i] = value;
+  }
 #endif
 
-  debug3(printf("starting merge_batches_one_heap_16_existing\n"));
+  return;
+}
+#endif
 
-  debug0(int nentries_save = nentries);
 
-  ptr = positions = (Univcoord_T *) CALLOC(nentries,sizeof(Univcoord_T));
 
-  /* Set up heap */
-  heapsize = 0;
-  for (i = 0; i < 16; i++) {
-    batch = &(batchpool[i]);
-    if (batch->nentries > 0) {
 #ifdef LARGE_GENOMES
-      batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++);
-#elif defined(WORDS_BIGENDIAN)
-      batch->position = Bigendian_convert_univcoord(*batch->positionptr++);
-#else
-      batch->position = *batch->positionptr++;
+static UINT4 *
+point_one_shift (int *nentries, unsigned char **positions_high, T this, Oligospace_T subst) {
+  UINT4 *positions_low;
+  Positionsptr_T ptr0, end0;
+#ifdef DEBUG
+  int i;
 #endif
-      heap_insert_even(heap,&heapsize,batch,batch->position);
-    }
-  }
 
-  sentinel_struct.position = (Univcoord_T) -1; /* infinity */
-#ifdef LARGE_GENOMES
-  sentinel_struct.positionptr_high = &sentinel_position_high;
-  sentinel_struct.positionptr_low = &sentinel_position_low;
+  if (this->compression_type == NO_COMPRESSION) {
+#ifdef WORDS_BIGENDIAN
+    abort();
 #else
-  sentinel_struct.positionptr = &(sentinel_struct.position);
+    ptr0 = this->offsetsstrm[subst];
+    end0 = this->offsetsstrm[subst+1];
 #endif
-  sentinel = &sentinel_struct;
 
-  for (i = heapsize+1; i <= 16; i++) {
-    heap[i] = sentinel;
+  } else if (this->compression_type == BITPACK64_COMPRESSION) {
+    ptr0 = Bitpack64_read_two_huge(&end0,subst,this->offsetspages,this->offsetsmeta,this->offsetsstrm);
   }
 
-  last_position = 0U;
-  while (--nentries >= 1) {
-    debug3(printf("nentries = %d, top of heap is %u (%d)\n",
-		  nentries+1,heap[1]->position,heapsize));
 
-    /* Get minimum */
-    batch = heap[1];
-#ifdef CONVERT_TO_LITTLEENDIAN
-    this_position = Bigendian_convert_univcoord(batch->position) + diagterm;
+  debug(printf("point_one_shift: %08X %u %u\n",subst,ptr0,end0));
+
+  if ((*nentries = end0 - ptr0) == 0) {
+    *positions_high = (unsigned char *) NULL;
+    return (UINT4 *) NULL;
+  } else {
+    if (this->positions_high_access == FILEIO || this->positions_low_access == FILEIO) {
+      abort();
+
+    } else {
+      /* ALLOCATED or MMAPPED */
+      *positions_high = &(this->positions_high[ptr0]);
+      positions_low = &(this->positions_low[ptr0]);
+    }
+  }
+      
+  debug(
+	printf("%d entries:",*nentries);
+	for (i = 0; i < *nentries; i++) {
+	  printf(" %u",(Univcoord_T) positions_high[i] << 32 + positions_low[i]);
+	}
+	printf("\n");
+	);
+  
+  return positions_low;
+}
+
 #else
-    this_position = batch->position + diagterm;
+
+static Univcoord_T *
+point_one_shift (int *nentries, T this, Oligospace_T subst) {
+  Univcoord_T *positions;
+  Positionsptr_T ptr0, end0;
+#ifdef DEBUG
+  int i;
 #endif
-    if (this_position != last_position) {
-      *ptr++ = this_position;
+
+  if (this->compression_type == NO_COMPRESSION) {
+#ifdef WORDS_BIGENDIAN
+#if 0
+    if (this->offsetsstrm_access == ALLOCATED) {
+      ptr0 = this->offsetsstrm[subst];
+      end0 = this->offsetsstrm[subst+1];
+    } else {
+      ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]);
+      end0 = Bigendian_convert_uint(this->offsetsstrm[subst+1]);
     }
-    last_position = this_position;
+#else
+    abort();
+#endif
+#else
+    ptr0 = this->offsetsstrm[subst];
+    end0 = this->offsetsstrm[subst+1];
+#endif
 
-    if (--batch->nentries <= 0) {
-      /* Use last batch (or sentinel) in heap for insertion */
-      heap[1] = batch = (heapsize == 1) ? sentinel : heap[heapsize];
-      heap[heapsize--] = sentinel;
+  } else if (this->compression_type == BITPACK64_COMPRESSION) {
+    ptr0 = Bitpack64_read_two(&end0,subst,this->offsetsmeta,this->offsetsstrm);
+  }
+
+
+  debug(printf("point_one_shift: %08X %u %u\n",subst,ptr0,end0));
+
+  if ((*nentries = end0 - ptr0) == 0) {
+    return (Univcoord_T *) NULL;
+  } else {
+    if (this->positions_access == FILEIO) {
+      positions = (Univcoord_T *) CALLOC(*nentries,sizeof(Univcoord_T));
+#ifdef HAVE_PTHREAD
+      pthread_mutex_lock(&this->positions_read_mutex);
+#endif
+      positions_move_absolute(this->positions_fd,ptr0);
+      positions_read_multiple(this->positions_fd,positions,*nentries);
+#ifdef HAVE_PTHREAD
+      pthread_mutex_unlock(&this->positions_read_mutex);
+#endif
 
     } else {
-      /* Advance heap, and use this batch for insertion */
-#ifdef LARGE_GENOMES
-      batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++);
-#elif defined(WORDS_BIGENDIAN)
-      batch->position = Bigendian_convert_univcoord(*batch->positionptr++);
+      /* ALLOCATED or MMAPPED */
+      positions = &(this->positions[ptr0]);
+    }
+  }
+      
+#ifdef WORDS_BIGENDIAN
+  debug(
+	printf("%d entries:",*nentries);
+	for (i = 0; i < *nentries; i++) {
+	  printf(" %u",Bigendian_convert_univcoord(positions[i]));
+	}
+	printf("\n");
+	);
 #else
-      batch->position = *batch->positionptr++;
+  debug(
+	printf("%d entries:",*nentries);
+	for (i = 0; i < *nentries; i++) {
+	  printf(" %u",positions[i]);
+	}
+	printf("\n");
+	);
+#endif
+  
+  return positions;
+}
+
 #endif
+
+
+/*                      87654321 */
+#define LOW_TWO_BITS  0x00000003
+
+#ifdef DEBUG
+static char *
+shortoligo_nt (Oligospace_T oligo, int oligosize) {
+  char *nt;
+  int i, j;
+  Oligospace_T lowbits;
+
+  nt = (char *) CALLOC(oligosize+1,sizeof(char));
+  j = oligosize-1;
+  for (i = 0; i < oligosize; i++) {
+    lowbits = oligo & LOW_TWO_BITS;
+    switch (lowbits) {
+    case RIGHT_A: nt[j] = 'A'; break;
+    case RIGHT_C: nt[j] = 'C'; break;
+    case RIGHT_G: nt[j] = 'G'; break;
+    case RIGHT_T: nt[j] = 'T'; break;
     }
+    oligo >>= 2;
+    j--;
+  }
 
-    position = batch->position;
-    debug3(printf("starting heapify with %u\n",position));
+  return nt;
+}
+#endif
 
-#ifdef READ_THEN_WRITE
-    /* Comparison 0/3 */
-    debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
-    if (position <= heap[2]->position) {
-      debug3(printf("Inserting at 1\n"));
-      /* heap[1] = batch; -- not necessary because batch is already at heap[1] */
+
+#ifdef LARGE_GENOMES
+static int
+count_one_shift (T this, Oligospace_T subst, int nadjacent) {
+  Positionsptr_T ptr0, end0;
+
+  if (this->compression_type == NO_COMPRESSION) {
+#ifdef WORDS_BIGENDIAN
+#if 0
+    if (this->offsetsstrm_access == ALLOCATED) {
+      ptr0 = this->offsetsstrm[subst];
+      end0 = this->offsetsstrm[subst+nadjacent];
     } else {
-      /* Comparison 1/3 */
-      debug3(printf("Comparing left %d/right %d: %u and %u\n",
-		    3,4,heap[3]->position,heap[4]->position));
-      smallesti = 4 - (heap[3]->position < heap[4]->position);
-      if (position <= heap[smallesti]->position) {
-	debug3(printf("Inserting at 2\n"));
-	heap[1] = heap[2];
-	heap[2] = batch;
-      } else {
-	smallesti_1 = smallesti;
-	smallesti <<= 1;
-	/* Comparison 2/3 */
-	debug3(printf("Comparing left %d/right %d: %u and %u\n",
-		      smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
-	smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
-	if (position <= heap[smallesti]->position) {
-	  debug3(printf("Inserting at %d\n",smallesti_1));
-	  heap[1] = heap[2];
-	  heap[2] = heap[smallesti_1];
-	  heap[smallesti_1] = batch;
-	} else {
-	  smallesti_2 = smallesti;
-	  smallesti <<= 1;
-	  /* Comparison 3/3 */
-	  debug3(printf("Comparing left %d/right %d: %u and %u\n",
-			smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
-	  smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
-	  if (position <= heap[smallesti]->position) {
-	    debug3(printf("Inserting at %d\n",smallesti_2));
-	    heap[1] = heap[2];
-	    heap[2] = heap[smallesti_1];
-	    heap[smallesti_1] = heap[smallesti_2];
-	    heap[smallesti_2] = batch;
-	  } else {
-	    debug3(printf("Inserting at %d\n",smallesti));
-	    heap[1] = heap[2];
-	    heap[2] = heap[smallesti_1];
-	    heap[smallesti_1] = heap[smallesti_2];
-	    heap[smallesti_2] = heap[smallesti];
-	    heap[smallesti] = batch;
-	  }
-	}
-      }
+      ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]);
+      end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]);
     }
 #else
-    /* Comparison 0/3 */
-    debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
-    if (position <= heap[2]->position) {
-      debug3(printf("Inserting at 1\n"));
-      /* heap[1] = batch; -- not necessary because batch is already at heap[1] */
-    } else {
-      heap[1] = heap[2];
-      /* Comparison 1/3 */
-      debug3(printf("Comparing left %d/right %d: %u and %u\n",
-		    3,4,heap[3]->position,heap[4]->position));
-      smallesti = 4 - (heap[3]->position < heap[4]->position);
-      if (position <= heap[smallesti]->position) {
-	debug3(printf("Inserting at 2\n"));
-	heap[2] = batch;
-      } else {
-	heap[2] = heap[smallesti];
-	parenti = smallesti;
-	smallesti <<= 1;
-	/* Comparison 2/3 */
-	debug3(printf("Comparing left %d/right %d: %u and %u\n",
-		      smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
-	smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
-	if (position <= heap[smallesti]->position) {
-	  debug3(printf("Inserting at %d\n",parenti));
-	  heap[parenti] = batch;
-	} else {
-	  heap[parenti] = heap[smallesti];
-	  parenti = smallesti;
-	  smallesti <<= 1;
-	  /* Comparison 3/3 */
-	  debug3(printf("Comparing left %d/right %d: %u and %u\n",
-			smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
-	  smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
-	  if (position <= heap[smallesti]->position) {
-	    debug3(printf("Inserting at %d\n",parenti));
-	    heap[parenti] = batch;
-	  } else {
-	    heap[parenti] = heap[smallesti];
-	    debug3(printf("Inserting at %d\n",smallesti));
-	    heap[smallesti] = batch;
-	  }
-	}
-      }
-    }
+    abort();
 #endif
-  }
-
-#ifdef CONVERT_TO_LITTLEENDIAN
-  this_position = Bigendian_convert_univcoord(heap[1]->position) + diagterm;
 #else
-  this_position = heap[1]->position + diagterm;
+    ptr0 = this->offsetsstrm[subst];
+    end0 = this->offsetsstrm[subst+nadjacent];
 #endif
-  if (this_position != last_position) {
-    *ptr++ = this_position;
-  }
 
-  *nmerged = (ptr - positions);
+  } else if (this->compression_type == BITPACK64_COMPRESSION) {
+    ptr0 = Bitpack64_read_one_huge(subst,this->offsetspages,this->offsetsmeta,this->offsetsstrm);
+    end0 = Bitpack64_read_one_huge(subst+nadjacent,this->offsetspages,this->offsetsmeta,this->offsetsstrm);
 
-#if 0
-  position = positions[0];
-  for (i = 1; i < nentries_save; i++) {
-    if (positions[i] <= position) {
-      abort();
-    }
-    position = positions[i];
+  } else {
+    abort();
   }
-#endif
 
-  debug0(
-	 for (i = 0; i < nentries_save; i++) {
-	   printf("%u\n",positions[i]);
-	 }
-	 printf("\n");
-	 )
+  debug(printf("count_one_shift: oligo = %06X (%s), %u - %u = %u\n",
+	       subst,shortoligo_nt(subst,index1part),end0,ptr0,end0-ptr0));
+  return (end0 - ptr0);
 
-  return positions;
 }
 
-
-static Univcoord_T *
-merge_batches_one_heap_4_existing (int *nmerged, struct Batch_T *batchpool, int nentries, int diagterm) {
-  Univcoord_T *positions, *ptr, position, last_position, this_position;
-  struct Batch_T sentinel_struct;
-  Batch_T batch, sentinel, heap[5];
-  int heapsize;
-  unsigned int i;
-#ifdef READ_THEN_WRITE
-  unsigned int smallesti;
 #else
-  unsigned int parenti, smallesti;
-#endif
-
-  debug3(printf("starting merge_batches_one_heap_4_existing\n"));
-
-  debug0(int nentries_save = nentries);
-
-  ptr = positions = (Univcoord_T *) CALLOC(nentries,sizeof(Univcoord_T));
+static int
+count_one_shift (T this, Oligospace_T subst, int nadjacent) {
+  Positionsptr_T ptr0, end0;
 
-  /* Set up heap */
-  heapsize = 0;
-  for (i = 0; i < 4; i++) {
-    batch = &(batchpool[i]);
-    if (batch->nentries > 0) {
-#ifdef LARGE_GENOMES
-      batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++);
-#elif defined(WORDS_BIGENDIAN)
-      batch->position = Bigendian_convert_univcoord(*batch->positionptr++);
+  if (this->compression_type == NO_COMPRESSION) {
+#ifdef WORDS_BIGENDIAN
+#if 0
+    if (this->offsetsstrm_access == ALLOCATED) {
+      ptr0 = this->offsetsstrm[subst];
+      end0 = this->offsetsstrm[subst+nadjacent];
+    } else {
+      ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]);
+      end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]);
+    }
 #else
-      batch->position = *batch->positionptr++;
+    abort();
 #endif
-      heap_insert_even(heap,&heapsize,batch,batch->position);
-    }
-  }
-
-  sentinel_struct.position = (Univcoord_T) -1; /* infinity */
-#ifdef LARGE_GENOMES
-  sentinel_struct.positionptr_high = &sentinel_position_high;
-  sentinel_struct.positionptr_low = &sentinel_position_low;
 #else
-  sentinel_struct.positionptr = &(sentinel_struct.position);
+    ptr0 = this->offsetsstrm[subst];
+    end0 = this->offsetsstrm[subst+nadjacent];
 #endif
-  sentinel = &sentinel_struct;
 
-  for (i = heapsize+1; i <= 4; i++) {
-    heap[i] = sentinel;
+  } else if (this->compression_type == BITPACK64_COMPRESSION) {
+    ptr0 = Bitpack64_read_one(subst,this->offsetsmeta,this->offsetsstrm);
+    end0 = Bitpack64_read_one(subst+nadjacent,this->offsetsmeta,this->offsetsstrm);
+
+  } else {
+    abort();
   }
 
-  last_position = 0U;
-  while (--nentries >= 1) {
-    debug3(printf("nentries = %d, top of heap is %u (%d)\n",
-		  nentries+1,heap[1]->position,heapsize));
+  debug(printf("count_one_shift: oligo = %06X (%s), %u - %u = %u\n",
+	       subst,shortoligo_nt(subst,index1part),end0,ptr0,end0-ptr0));
+  return (end0 - ptr0);
+
+}
 
-    /* Get minimum */
-    batch = heap[1];
-#ifdef CONVERT_TO_LITTLEENDIAN
-    this_position = Bigendian_convert_univcoord(batch->position) + diagterm;
-#else
-    this_position = batch->position + diagterm;
 #endif
-    if (this_position != last_position) {
-      *ptr++ = this_position;
-    }
-    last_position = this_position;
 
 
-    if (--batch->nentries <= 0) {
-      /* Use last batch (or sentinel) in heap for insertion */
-      heap[1] = batch = (heapsize == 1) ? sentinel : heap[heapsize];
-      heap[heapsize--] = sentinel;
+/************************************************************************
+ *   Counting procedures
+ ************************************************************************/
 
-    } else {
-      /* Advance heap, and use this batch for insertion */
-#ifdef LARGE_GENOMES
-      batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++);
-#elif defined(WORDS_BIGENDIAN)
-      batch->position = Bigendian_convert_univcoord(*batch->positionptr++);
-#else
-      batch->position = *batch->positionptr++;
-#endif
-    }
+/* Don't mask out leftmost nucleotides with LOWXXMER */
+/* TODO: Eliminate the loop by implementing a single procedure in bitpack64-readtwo that counts over the base */
+int
+Indexdb_count_left_subst_2 (T this, Oligospace_T oligo) {
+  int nentries = 0;
+  Oligospace_T base;
+  int i;
 
-    position = batch->position;
-    debug3(printf("starting heapify with %u\n",position));
-
-#ifdef READ_THEN_WRITE
-    /* Comparison 0/3 */
-    debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
-    if (position <= heap[2]->position) {
-      debug3(printf("Inserting at 1\n"));
-      /* heap[1] = batch; -- not necessary because batch is already at heap[1] */
-    } else {
-      /* Comparison 1/3 */
-      debug3(printf("Comparing left %d/right %d: %u and %u\n",
-		    3,4,heap[3]->position,heap[4]->position));
-      smallesti = 4 - (heap[3]->position < heap[4]->position);
-      if (position <= heap[smallesti]->position) {
-	debug3(printf("Inserting at 2\n"));
-	heap[1] = heap[2];
-	heap[2] = batch;
-      } else {
-	debug3(printf("Inserting at %d\n",smallesti));
-	heap[1] = heap[2];
-	heap[2] = heap[smallesti];
-	heap[smallesti] = batch;
-      }
-    }
+  debug(printf("count_left_subst_2: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
 
+#ifdef ALLOW_DUPLICATES
+  /* Right shift */
+  base = (oligo >> 4);
+  for (i = 0; i < 16; i++, base += left_subst) {
+    nentries += count_one_shift(this,base);
+  }
 #else
-    /* Comparison 0/3 */
-    debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
-    if (position <= heap[2]->position) {
-      debug3(printf("Inserting at 1\n"));
-      /* heap[1] = batch; -- not necessary because batch is already at heap[1] */
-    } else {
-      heap[1] = heap[2];
-      /* Comparison 1/3 */
-      debug3(printf("Comparing left %d/right %d: %u and %u\n",
-		    3,4,heap[3]->position,heap[4]->position));
-      smallesti = 4 - (heap[3]->position < heap[4]->position);
-      if (position <= heap[smallesti]->position) {
-	debug3(printf("Inserting at 2\n"));
-	heap[2] = batch;
-      } else {
-	heap[2] = heap[smallesti];
-	heap[smallesti] = batch;
-      }
-    }
-
+  /* Right shift */
+  base = (oligo >> 4);
+  debug(printf("shift right => %06X (%s)\n",base,shortoligo_nt(base,index1part)));
+  for (i = 0; i < 16; i++, base += left_subst) {
+#if 0
+    nentries += count_one_shift(this,base,/*nadjacent*/1);
+#else
+    nentries += Indexdb_count_no_subst(this,base);
 #endif
   }
+#endif
+      
+  return nentries;
+}
 
-#ifdef CONVERT_TO_LITTLEENDIAN
-  this_position = Bigendian_convert_univcoord(heap[1]->position) + diagterm;
+
+/* Don't mask out leftmost nucleotides with LOWXXMER */
+/* TODO: Eliminate the loop by implementing a single procedure in bitpack64-readtwo that counts over the base */
+int
+Indexdb_count_left_subst_1 (T this, Oligospace_T oligo) {
+  int nentries = 0;
+  Oligospace_T base;
+  int i;
+
+  debug(printf("count_left_subst_1: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
+
+#ifdef ALLOW_DUPLICATES
+  /* Zero shift. */
+  base = (oligo >> 2);
+  for (i = 0; i < 4; i++, base += top_subst) {
+    nentries += count_one_shift(this,base);
+  }
 #else
-  this_position = heap[1]->position + diagterm;
+  /* Zero shift. */
+  base = (oligo >> 2);
+  for (i = 0; i < 4; i++, base += top_subst) {
+#if 0
+    nentries += count_one_shift(this,base,/*nadjacent*/1);
+#else
+    nentries += Indexdb_count_no_subst(this,base);
 #endif
-  if (this_position != last_position) {
-    *ptr++ = this_position;
   }
+#endif
+      
+  return nentries;
+}
 
-  *nmerged = (ptr - positions);
 
-#if 0
-  position = positions[0];
-  for (i = 1; i < nentries_save; i++) {
-    if (positions[i] <= position) {
-      abort();
-    }
-    position = positions[i];
-  }
+int
+Indexdb_count_right_subst_2 (T this, Oligospace_T oligo) {
+  int nentries;
+  Oligospace_T base;
+#ifdef ALLOW_DUPLICATES
+  int i;
+#endif
+#ifdef DEBUG
+  int i;
 #endif
 
-  debug0(
-	 for (i = 0; i < nentries_save; i++) {
-	   printf("%u\n",positions[i]);
-	 }
-	 printf("\n");
-	 )
+  debug(printf("count_right_subst_2: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
 
+#ifdef ALLOW_DUPLICATES
+  /* Left shift */
+  base = (oligo << 4) & kmer_mask;
+  nentries = 0;
+  for (i = 0; i < 16; i++, base += right_subst) {
+    nentries += count_one_shift(this,base);
+  }
+#else
+  /* Left shift */
+  base = (oligo << 4) & kmer_mask;
+  nentries = count_one_shift(this,base,/*nadjacent*/16);
 
-  return positions;
+  debug(
+	printf("Details\n");
+	nentries = 0;
+	for (i = 0; i < 16; i++, base += right_subst) {
+	  nentries += count_one_shift(this,base,/*nadjacent*/1);
+	}
+	);
+#endif
+      
+  return nentries;
 }
 
 
-/************************************************************************
- *  The following positions functions are taken from indexdb.c
- ************************************************************************/
+int
+Indexdb_count_right_subst_1 (T this, Oligospace_T oligo) {
+  int nentries;
+  Oligospace_T base;
+#ifdef ALLOW_DUPLICATES
+  int i;
+#endif
+#ifdef DEBUG
+  int i;
+#endif
 
-#ifndef LARGE_GENOMES
-static void
-positions_move_absolute (int positions_fd, Positionsptr_T ptr) {
-  off_t offset = ptr*((off_t) sizeof(Univcoord_T));
+  debug(printf("count_right_subst_1: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
 
-  if (lseek(positions_fd,offset,SEEK_SET) < 0) {
-    fprintf(stderr,"Attempted to do lseek on offset %u*%d=%zd\n",
-	    ptr,(int) sizeof(Univcoord_T),offset);
-    perror("Error in indexdb.c, positions_move_absolute_4");
-    exit(9);
+#ifdef ALLOW_DUPLICATES
+  /* Zero shift */
+  base = (oligo << 2) & kmer_mask;
+  nentries = 0;
+  for (i = 0; i < 4; i++, base += right_subst) {
+    nentries += count_one_shift(this,base);
   }
-  return;
-}
+#else
+  /* Zero shift */
+  base = (oligo << 2) & kmer_mask;
+  nentries = count_one_shift(this,base,/*nadjacent*/4);
 
-static void
-positions_read_multiple (int positions_fd, Univcoord_T *values, int n) {
-  int i;
-  Univcoord_T value;
-  unsigned char buffer[4];
+  debug(
+	printf("Details\n");
+	nentries = 0;
+	for (i = 0; i < 4; i++, base += right_subst) {
+	  nentries += count_one_shift(this,base,/*nadjacent*/1);
+	}
+	);
+#endif
+      
+  return nentries;
+}
 
-#ifdef WORDS_BIGENDIAN
-  /* Need to keep in bigendian format */
-  for (i = 0; i < n; i++) {
-    read(positions_fd,buffer,4);
 
-    value = (buffer[0] & 0xff);
-    value <<= 8;
-    value |= (buffer[1] & 0xff);
-    value <<= 8;
-    value |= (buffer[2] & 0xff);
-    value <<= 8;
-    value |= (buffer[3] & 0xff);
+/************************************************************************/
 
-    values[i] = value;
-  }
-#else
-  for (i = 0; i < n; i++) {
-    read(positions_fd,buffer,4);
 
-    value = (buffer[3] & 0xff);
-    value <<= 8;
-    value |= (buffer[2] & 0xff);
-    value <<= 8;
-    value |= (buffer[1] & 0xff);
-    value <<= 8;
-    value |= (buffer[0] & 0xff);
+static bool free_positions_p;	/* Needs to be true if Indexdb positions are FILEIO */
 
-    values[i] = value;
+void
+Compoundpos_init_positions_free (bool positions_fileio_p) {
+  if (positions_fileio_p == true) {
+    free_positions_p = true;
+  } else {
+    free_positions_p = false;
   }
-#endif
-
   return;
 }
-#endif
 
 
 
+struct Compoundpos_T {
+  int n;
+
 #ifdef LARGE_GENOMES
-static UINT4 *
-point_one_shift (int *nentries, unsigned char **positions_high, T this, Oligospace_T subst) {
-  UINT4 *positions_low;
-  Positionsptr_T ptr0, end0;
-#ifdef DEBUG
-  int i;
+  unsigned char *positions_high[16];
+  UINT4 *positions_low[16];
+#else
+  Univcoord_T *positions[16];
 #endif
+  int npositions[16];
 
-  if (this->compression_type == NO_COMPRESSION) {
-#ifdef WORDS_BIGENDIAN
-    abort();
+  struct Batch_T batchpool[16];
+  Batch_T heap[17];
+  int heapsize;
+  struct Batch_T sentinel_struct;
+  Batch_T sentinel;
+
+#ifdef LARGE_GENOMES
+  unsigned char *positions_high_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */
+  UINT4 *positions_low_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */
 #else
-    ptr0 = this->offsetsstrm[subst];
-    end0 = this->offsetsstrm[subst+1];
+  Univcoord_T *positions_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */
 #endif
-
-  } else if (this->compression_type == BITPACK64_COMPRESSION) {
-    ptr0 = Bitpack64_read_two_huge(&end0,subst,this->offsetspages,this->offsetsmeta,this->offsetsstrm);
-  }
+  int npositions_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */
+};
 
 
-  debug(printf("point_one_shift: %08X %u %u\n",subst,ptr0,end0));
+void
+Compoundpos_set (Compoundpos_T compoundpos) {
+  int i;
 
-  if ((*nentries = end0 - ptr0) == 0) {
-    *positions_high = (unsigned char *) NULL;
-    return (UINT4 *) NULL;
-  } else {
-    if (this->positions_high_access == FILEIO || this->positions_low_access == FILEIO) {
-      abort();
-
-    } else {
-      /* ALLOCATED or MMAPPED */
-      *positions_high = &(this->positions_high[ptr0]);
-      positions_low = &(this->positions_low[ptr0]);
-    }
+  for (i = 0; i < compoundpos->n; i++) {
+#ifdef LARGE_GENOMES
+    compoundpos->positions_high_reset[i] = compoundpos->positions_high[i];
+    compoundpos->positions_low_reset[i] = compoundpos->positions_low[i];
+#else
+    compoundpos->positions_reset[i] = compoundpos->positions[i];
+#endif
+    compoundpos->npositions_reset[i] = compoundpos->npositions[i];
   }
-      
-  debug(
-	printf("%d entries:",*nentries);
-	for (i = 0; i < *nentries; i++) {
-	  printf(" %u",(Univcoord_T) positions_high[i] << 32 + positions_low[i]);
-	}
-	printf("\n");
-	);
-  
-  return positions_low;
+  return;
 }
 
-#else
-
-static Univcoord_T *
-point_one_shift (int *nentries, T this, Oligospace_T subst) {
-  Univcoord_T *positions;
-  Positionsptr_T ptr0, end0;
-#ifdef DEBUG
+void
+Compoundpos_reset (Compoundpos_T compoundpos) {
   int i;
-#endif
 
-  if (this->compression_type == NO_COMPRESSION) {
-#ifdef WORDS_BIGENDIAN
-#if 0
-    if (this->offsetsstrm_access == ALLOCATED) {
-      ptr0 = this->offsetsstrm[subst];
-      end0 = this->offsetsstrm[subst+1];
-    } else {
-      ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]);
-      end0 = Bigendian_convert_uint(this->offsetsstrm[subst+1]);
-    }
-#else
-    abort();
-#endif
+  for (i = 0; i < compoundpos->n; i++) {
+#ifdef LARGE_GENOMES
+    compoundpos->positions_high[i] = compoundpos->positions_high_reset[i];
+    compoundpos->positions_low[i] = compoundpos->positions_low_reset[i];
 #else
-    ptr0 = this->offsetsstrm[subst];
-    end0 = this->offsetsstrm[subst+1];
+    compoundpos->positions[i] = compoundpos->positions_reset[i];
 #endif
-
-  } else if (this->compression_type == BITPACK64_COMPRESSION) {
-    ptr0 = Bitpack64_read_two(&end0,subst,this->offsetsmeta,this->offsetsstrm);
+    compoundpos->npositions[i] = compoundpos->npositions_reset[i];
   }
+  return;
+}
 
 
-  debug(printf("point_one_shift: %08X %u %u\n",subst,ptr0,end0));
-
-  if ((*nentries = end0 - ptr0) == 0) {
-    return (Univcoord_T *) NULL;
-  } else {
-    if (this->positions_access == FILEIO) {
-      positions = (Univcoord_T *) CALLOC(*nentries,sizeof(Univcoord_T));
-#ifdef HAVE_PTHREAD
-      pthread_mutex_lock(&this->positions_read_mutex);
-#endif
-      positions_move_absolute(this->positions_fd,ptr0);
-      positions_read_multiple(this->positions_fd,positions,*nentries);
-#ifdef HAVE_PTHREAD
-      pthread_mutex_unlock(&this->positions_read_mutex);
-#endif
+void
+Compoundpos_print_sizes (Compoundpos_T compoundpos) {
+  int i;
 
-    } else {
-      /* ALLOCATED or MMAPPED */
-      positions = &(this->positions[ptr0]);
-    }
+  for (i = 0; i < compoundpos->n; i++) {
+    printf(" %d",compoundpos->npositions[i]);
   }
-      
-#ifdef WORDS_BIGENDIAN
-  debug(
-	printf("%d entries:",*nentries);
-	for (i = 0; i < *nentries; i++) {
-	  printf(" %u",Bigendian_convert_univcoord(positions[i]));
-	}
-	printf("\n");
-	);
-#else
-  debug(
-	printf("%d entries:",*nentries);
-	for (i = 0; i < *nentries; i++) {
-	  printf(" %u",positions[i]);
-	}
-	printf("\n");
-	);
-#endif
-  
-  return positions;
-}
-
-#endif
 
+  return;
+}
 
-/*                      87654321 */
-#define LOW_TWO_BITS  0x00000003
 
-#ifdef DEBUG
-static char *
-shortoligo_nt (Oligospace_T oligo, int oligosize) {
-  char *nt;
+void
+Compoundpos_dump (Compoundpos_T compoundpos, int diagterm) {
   int i, j;
-  Oligospace_T lowbits;
 
-  nt = (char *) CALLOC(oligosize+1,sizeof(char));
-  j = oligosize-1;
-  for (i = 0; i < oligosize; i++) {
-    lowbits = oligo & LOW_TWO_BITS;
-    switch (lowbits) {
-    case RIGHT_A: nt[j] = 'A'; break;
-    case RIGHT_C: nt[j] = 'C'; break;
-    case RIGHT_G: nt[j] = 'G'; break;
-    case RIGHT_T: nt[j] = 'T'; break;
-    }
-    oligo >>= 2;
-    j--;
+  printf("%d diagonals: ",compoundpos->n);
+  for (i = 0; i < compoundpos->n; i++) {
+    printf(" %d",compoundpos->npositions[i]);
   }
+  printf("\n");
 
-  return nt;
-}
-#endif
-
-
+  for (i = 0; i < compoundpos->n; i++) {
+    for (j = 0; j < compoundpos->npositions[i]; j++) {
 #ifdef LARGE_GENOMES
-static int
-count_one_shift (T this, Oligospace_T subst, int nadjacent) {
-  Positionsptr_T ptr0, end0;
-
-  if (this->compression_type == NO_COMPRESSION) {
-#ifdef WORDS_BIGENDIAN
-#if 0
-    if (this->offsetsstrm_access == ALLOCATED) {
-      ptr0 = this->offsetsstrm[subst];
-      end0 = this->offsetsstrm[subst+nadjacent];
-    } else {
-      ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]);
-      end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]);
-    }
-#else
-    abort();
-#endif
+      printf(" compound%d.%d:%llu+%d\n",
+	     i,j,((Univcoord_T) compoundpos->positions_high[i][j] << 32) + compoundpos->positions_low[i][j],diagterm);
+#elif defined(WORDS_BIGENDIAN)
+      printf(" compound%d.%d:%u+%d\n",
+	     i,j,Bigendian_convert_univcoord(compoundpos->positions[i][j]),diagterm);
 #else
-    ptr0 = this->offsetsstrm[subst];
-    end0 = this->offsetsstrm[subst+nadjacent];
+      printf(" compound%d.%d:%u+%d\n",i,j,compoundpos->positions[i][j],diagterm);
 #endif
-
-  } else if (this->compression_type == BITPACK64_COMPRESSION) {
-    ptr0 = Bitpack64_read_one_huge(subst,this->offsetspages,this->offsetsmeta,this->offsetsstrm);
-    end0 = Bitpack64_read_one_huge(subst+nadjacent,this->offsetspages,this->offsetsmeta,this->offsetsstrm);
-
-  } else {
-    abort();
+    }
   }
-
-  debug(printf("count_one_shift: oligo = %06X (%s), %u - %u = %u\n",
-	       subst,shortoligo_nt(subst,index1part),end0,ptr0,end0-ptr0));
-  return (end0 - ptr0);
-
+  return;
 }
 
-#else
-static int
-count_one_shift (T this, Oligospace_T subst, int nadjacent) {
-  Positionsptr_T ptr0, end0;
 
-  if (this->compression_type == NO_COMPRESSION) {
-#ifdef WORDS_BIGENDIAN
-#if 0
-    if (this->offsetsstrm_access == ALLOCATED) {
-      ptr0 = this->offsetsstrm[subst];
-      end0 = this->offsetsstrm[subst+nadjacent];
-    } else {
-      ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]);
-      end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]);
-    }
-#else
-    abort();
-#endif
+void
+Compoundpos_free (Compoundpos_T *old) {
+  int i;
+
+  if (*old) {
+    if (free_positions_p == true) {
+      for (i = 0; i < (*old)->n; i++) {
+#ifdef LARGE_GENOMES
+	FREE((*old)->positions_high[i]);
+	FREE((*old)->positions_low[i]);
 #else
-    ptr0 = this->offsetsstrm[subst];
-    end0 = this->offsetsstrm[subst+nadjacent];
+	FREE((*old)->positions[i]);
 #endif
+      }
+    }
 
-  } else if (this->compression_type == BITPACK64_COMPRESSION) {
-    ptr0 = Bitpack64_read_one(subst,this->offsetsmeta,this->offsetsstrm);
-    end0 = Bitpack64_read_one(subst+nadjacent,this->offsetsmeta,this->offsetsstrm);
-
-  } else {
-    abort();
+    /* No need, since allocated statically.  FREE((*old)->npositions); */
+    /* No need, since allocated statically.  FREE((*old)->positions); */
+  
+    FREE(*old);
   }
-
-  debug(printf("count_one_shift: oligo = %06X (%s), %u - %u = %u\n",
-	       subst,shortoligo_nt(subst,index1part),end0,ptr0,end0-ptr0));
-  return (end0 - ptr0);
-
+  return;
 }
 
-#endif
-
-
-/************************************************************************
- *   Counting procedures
- ************************************************************************/
 
-/* Don't mask out leftmost nucleotides with LOWXXMER */
-/* TODO: Eliminate the loop by implementing a single procedure in bitpack64-readtwo that counts over the base */
-int
-Indexdb_count_left_subst_2 (T this, Oligospace_T oligo) {
-  int nentries = 0;
+Compoundpos_T
+Indexdb_compoundpos_left_subst_2 (T this, Oligospace_T oligo) {
+  Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos));
   Oligospace_T base;
   int i;
 
-  debug(printf("count_left_subst_2: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
+  debug(printf("compoundpos_left_subst_2: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
+
+  compoundpos->n = 16;
+  /* compoundpos->npositions = (int *) CALLOC(16,sizeof(int)); */
+  /* compoundpos->positions = (Univcoord_T **) CALLOC(16,sizeof(Univcoord_T *)); */
 
-#ifdef ALLOW_DUPLICATES
   /* Right shift */
   base = (oligo >> 4);
   for (i = 0; i < 16; i++, base += left_subst) {
-    nentries += count_one_shift(this,base);
-  }
-#else
-  /* Right shift */
-  base = (oligo >> 4);
-  debug(printf("shift right => %06X (%s)\n",base,shortoligo_nt(base,index1part)));
-  for (i = 0; i < 16; i++, base += left_subst) {
-#if 0
-    nentries += count_one_shift(this,base,/*nadjacent*/1);
+#ifdef LARGE_GENOMES
+    compoundpos->positions_low[i] =
+      point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base);
 #else
-    nentries += Indexdb_count_no_subst(this,base);
+    compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base);
 #endif
   }
-#endif
-      
-  return nentries;
-}
 
+  return compoundpos;
+}
 
-/* Don't mask out leftmost nucleotides with LOWXXMER */
-/* TODO: Eliminate the loop by implementing a single procedure in bitpack64-readtwo that counts over the base */
-int
-Indexdb_count_left_subst_1 (T this, Oligospace_T oligo) {
-  int nentries = 0;
+Compoundpos_T
+Indexdb_compoundpos_left_subst_1 (T this, Oligospace_T oligo) {
+  Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos));
   Oligospace_T base;
   int i;
 
-  debug(printf("count_left_subst_1: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
+  debug(printf("compoundpos_left_subst_1: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
 
-#ifdef ALLOW_DUPLICATES
-  /* Zero shift. */
-  base = (oligo >> 2);
-  for (i = 0; i < 4; i++, base += top_subst) {
-    nentries += count_one_shift(this,base);
-  }
-#else
-  /* Zero shift. */
+  compoundpos->n = 4;
+  /* compoundpos->npositions = (int *) CALLOC(4,sizeof(int)); */
+  /* compoundpos->positions = (Univcoord_T **) CALLOC(4,sizeof(Univcoord_T *)); */
+
+  /* Zero shift */
   base = (oligo >> 2);
   for (i = 0; i < 4; i++, base += top_subst) {
-#if 0
-    nentries += count_one_shift(this,base,/*nadjacent*/1);
+#ifdef LARGE_GENOMES
+    compoundpos->positions_low[i] =
+      point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base);
 #else
-    nentries += Indexdb_count_no_subst(this,base);
+    compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base);
 #endif
   }
-#endif
-      
-  return nentries;
-}
 
+  return compoundpos;
+}
 
-int
-Indexdb_count_right_subst_2 (T this, Oligospace_T oligo) {
-  int nentries;
+Compoundpos_T
+Indexdb_compoundpos_right_subst_2 (T this, Oligospace_T oligo) {
+  Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos));
   Oligospace_T base;
-#ifdef ALLOW_DUPLICATES
-  int i;
-#endif
-#ifdef DEBUG
   int i;
-#endif
 
-  debug(printf("count_right_subst_2: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
+  debug(printf("compoundpos_right_subst_2: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
+
+  compoundpos->n = 16;
+  /* compoundpos->npositions = (int *) CALLOC(16,sizeof(int)); */
+  /* compoundpos->positions = (Univcoord_T **) CALLOC(16,sizeof(Univcoord_T *)); */
 
-#ifdef ALLOW_DUPLICATES
   /* Left shift */
   base = (oligo << 4) & kmer_mask;
-  nentries = 0;
   for (i = 0; i < 16; i++, base += right_subst) {
-    nentries += count_one_shift(this,base);
-  }
+#ifdef LARGE_GENOMES
+    compoundpos->positions_low[i] =
+      point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base);
 #else
-  /* Left shift */
-  base = (oligo << 4) & kmer_mask;
-  nentries = count_one_shift(this,base,/*nadjacent*/16);
-
-  debug(
-	printf("Details\n");
-	nentries = 0;
-	for (i = 0; i < 16; i++, base += right_subst) {
-	  nentries += count_one_shift(this,base,/*nadjacent*/1);
-	}
-	);
+    compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base);
 #endif
-      
-  return nentries;
-}
+  }
 
+  return compoundpos;
+}
 
-int
-Indexdb_count_right_subst_1 (T this, Oligospace_T oligo) {
-  int nentries;
+Compoundpos_T
+Indexdb_compoundpos_right_subst_1 (T this, Oligospace_T oligo) {
+  Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos));
   Oligospace_T base;
-#ifdef ALLOW_DUPLICATES
-  int i;
-#endif
-#ifdef DEBUG
   int i;
-#endif
 
-  debug(printf("count_right_subst_1: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
+  debug(printf("compoundpos_right_subst_1: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
+
+  compoundpos->n = 4;
+  /* compoundpos->npositions = (int *) CALLOC(4,sizeof(int)); */
+  /* compoundpos->positions = (Univcoord_T **) CALLOC(4,sizeof(Univcoord_T *)); */
 
-#ifdef ALLOW_DUPLICATES
   /* Zero shift */
   base = (oligo << 2) & kmer_mask;
-  nentries = 0;
   for (i = 0; i < 4; i++, base += right_subst) {
-    nentries += count_one_shift(this,base);
-  }
+#ifdef LARGE_GENOMES
+    compoundpos->positions_low[i] =
+      point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base);
 #else
-  /* Zero shift */
-  base = (oligo << 2) & kmer_mask;
-  nentries = count_one_shift(this,base,/*nadjacent*/4);
-
-  debug(
-	printf("Details\n");
-	nentries = 0;
-	for (i = 0; i < 4; i++, base += right_subst) {
-	  nentries += count_one_shift(this,base,/*nadjacent*/1);
-	}
-	);
+    compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base);
 #endif
-      
-  return nentries;
+  }
+
+  return compoundpos;
 }
 
 
+
 /************************************************************************/
 
+#ifdef LARGE_GENOMES
+static int
+binary_search (int lowi, int highi, unsigned char *positions_high, UINT4 *positions_low, Univcoord_T goal) {
+  bool foundp = false;
+  int middlei;
+  Univcoord_T position;
+
+#ifdef NOBINARY
+  return lowi;
+#endif
 
-static bool free_positions_p;	/* Needs to be true if Indexdb positions are FILEIO */
+  if (goal == 0U) {
+    return lowi;
+  }
 
-void
-Compoundpos_init_positions_free (bool positions_fileio_p) {
-  if (positions_fileio_p == true) {
-    free_positions_p = true;
+  while (!foundp && lowi < highi) {
+    middlei = lowi + ((highi - lowi) / 2);
+    position = ((Univcoord_T) positions_high[middlei] << 32) + positions_low[middlei];
+    debug2(printf("  binary: %d:%u %d:%u %d:%u   vs. %u\n",
+		  lowi,(positions_high[lowi] << 32) + positions_low[lowi],
+		  middlei,position,
+		  highi,(positions_high[highi] << 32) + positions_low[highi],goal));
+    if (goal < position) {
+      highi = middlei;
+    } else if (goal > position) {
+      lowi = middlei + 1;
+    } else {
+      foundp = true;
+    }
+  }
+
+  if (foundp == true) {
+    return middlei;
   } else {
-    free_positions_p = false;
+    return highi;
   }
-  return;
 }
 
+#else
 
+static int
+binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) {
+  bool foundp = false;
+  int middlei;
 
-struct Compoundpos_T {
-  int n;
+#ifdef NOBINARY
+  return lowi;
+#endif
 
-#ifdef LARGE_GENOMES
-  unsigned char *positions_high[16];
-  UINT4 *positions_low[16];
+  if (goal == 0U) {
+    return lowi;
+  }
+
+  while (!foundp && lowi < highi) {
+    middlei = lowi + ((highi - lowi) / 2);
+#ifdef WORDS_BIGENDIAN
+    debug2(printf("  binary: %d:%u %d:%u %d:%u   vs. %u\n",
+		  lowi,Bigendian_convert_univcoord(positions[lowi]),
+		  middlei,Bigendian_convert_univcoord(positions[middlei]),
+		  highi,Bigendian_convert_univcoord(positions[highi]),goal));
+    if (goal < Bigendian_convert_univcoord(positions[middlei])) {
+      highi = middlei;
+    } else if (goal > Bigendian_convert_univcoord(positions[middlei])) {
+      lowi = middlei + 1;
+    } else {
+      foundp = true;
+    }
 #else
-  Univcoord_T *positions[16];
+    debug2(printf("  binary: %d:%u %d:%u %d:%u   vs. %u\n",
+		  lowi,positions[lowi],middlei,positions[middlei],
+		  highi,positions[highi],goal));
+    if (goal < positions[middlei]) {
+      highi = middlei;
+    } else if (goal > positions[middlei]) {
+      lowi = middlei + 1;
+    } else {
+      foundp = true;
+    }
 #endif
-  int npositions[16];
+  }
 
-  struct Batch_T batchpool[16];
-  Batch_T heap[17];
-  int heapsize;
-  struct Batch_T sentinel_struct;
-  Batch_T sentinel;
+  if (foundp == true) {
+    return middlei;
+  } else {
+    return highi;
+  }
+}
 
-#ifdef LARGE_GENOMES
-  unsigned char *positions_high_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */
-  UINT4 *positions_low_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */
-#else
-  Univcoord_T *positions_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */
 #endif
-  int npositions_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */
-};
 
 
 void
-Compoundpos_set (Compoundpos_T compoundpos) {
-  int i;
+Compoundpos_heap_init (Compoundpos_T compoundpos, int querylength, int diagterm) {
+  Batch_T batch;
+  int startbound, i;
 
+  compoundpos->heapsize = 0;
   for (i = 0; i < compoundpos->n; i++) {
+    batch = &(compoundpos->batchpool[i]);
 #ifdef LARGE_GENOMES
-    compoundpos->positions_high_reset[i] = compoundpos->positions_high[i];
-    compoundpos->positions_low_reset[i] = compoundpos->positions_low[i];
+    batch->positionptr_high = compoundpos->positions_high[i];
+    batch->positionptr_low = compoundpos->positions_low[i];
 #else
-    compoundpos->positions_reset[i] = compoundpos->positions[i];
+    batch->positionptr = compoundpos->positions[i];
 #endif
-    compoundpos->npositions_reset[i] = compoundpos->npositions[i];
-  }
-  return;
-}
-
-void
-Compoundpos_reset (Compoundpos_T compoundpos) {
-  int i;
+    batch->nentries = compoundpos->npositions[i];
+    if (diagterm < querylength) {
+      startbound = querylength - diagterm;
+#ifdef LARGE_GENOMES
+      while (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < (unsigned int) startbound) {
+	debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n",
+		       ((Univcoord_T) *batch->positionptr_high << 32) + *batch->positionptr_low));
+	++batch->positionptr_high;
+	++batch->positionptr_low;
+	--batch->nentries;
+      }
+#elif defined(WORDS_BIGENDIAN)
+      while (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < (unsigned int) startbound) {
+	debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n",
+		       Bigendian_convert_univcoord(*batch->positionptr)));
+	++batch->positionptr;
+	--batch->nentries;
+      }
+#else
+      while (batch->nentries > 0 && *batch->positionptr < (unsigned int) startbound) {
+	debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n",
+		       *batch->positionptr));
+	++batch->positionptr;
+	--batch->nentries;
+      }
+#endif
+    }
+    if (batch->nentries > 0) {
+#ifdef LARGE_GENOMES
+      batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low);
+#elif defined(WORDS_BIGENDIAN)
+      batch->position = Bigendian_convert_univcoord(*batch->positionptr);
+#else
+      batch->position = *batch->positionptr;
+#endif
+      heap_insert_even(compoundpos->heap,&compoundpos->heapsize,batch,batch->position);
+    }
+  }
 
-  for (i = 0; i < compoundpos->n; i++) {
+  compoundpos->sentinel_struct.position = (Univcoord_T) -1; /* infinity */
 #ifdef LARGE_GENOMES
-    compoundpos->positions_high[i] = compoundpos->positions_high_reset[i];
-    compoundpos->positions_low[i] = compoundpos->positions_low_reset[i];
+  compoundpos->sentinel_struct.positionptr_high = &sentinel_position_high;
+  compoundpos->sentinel_struct.positionptr_low = &sentinel_position_low;
 #else
-    compoundpos->positions[i] = compoundpos->positions_reset[i];
+  compoundpos->sentinel_struct.positionptr = &(compoundpos->sentinel_struct.position);
 #endif
-    compoundpos->npositions[i] = compoundpos->npositions_reset[i];
+  compoundpos->sentinel = &compoundpos->sentinel_struct;
+
+  for (i = compoundpos->heapsize+1; i <= compoundpos->n; i++) {
+    compoundpos->heap[i] = compoundpos->sentinel;
   }
+
   return;
 }
 
 
-void
-Compoundpos_print_sizes (Compoundpos_T compoundpos) {
+#if 0
+/* Used by DEBUG3 and DEBUG6 */
+static void
+heap_even_dump (Batch_T *heap, int heapsize) {
   int i;
+  Batch_T batch;
 
-  for (i = 0; i < compoundpos->n; i++) {
-    printf(" %d",compoundpos->npositions[i]);
+  for (i = 1; i <= heapsize; i++) {
+    batch = heap[i];
+    printf("#%d--%d:%llu  ",i,batch->nentries,(unsigned long long) batch->position);
   }
-
-  return;
+  printf("\n");
 }
+#endif
 
 
-void
-Compoundpos_dump (Compoundpos_T compoundpos, int diagterm) {
+
+/* Returns true if found.  emptyp is true only if every batch is
+   empty.  If procedure returns true, empty is guaranteed to be
+   false. */
+bool
+Compoundpos_find (bool *emptyp, Compoundpos_T compoundpos, Univcoord_T local_goal) {
+  Batch_T *heap = compoundpos->heap, batch;
   int i, j;
 
-  printf("%d diagonals: ",compoundpos->n);
-  for (i = 0; i < compoundpos->n; i++) {
-    printf(" %d",compoundpos->npositions[i]);
-  }
-  printf("\n");
+  debug6(printf("\nEntering Compoundpos_find with local_goal %u\n",local_goal));
 
-  for (i = 0; i < compoundpos->n; i++) {
-    for (j = 0; j < compoundpos->npositions[i]; j++) {
+  *emptyp = true;
+  i = 1;
+  while (i <= compoundpos->heapsize) {
+    debug6(printf("Compoundpos_find iteration, heapsize %d:\n",compoundpos->heapsize));
+    debug6(heap_even_dump(heap,compoundpos->heapsize));
+
+    batch = heap[i];
 #ifdef LARGE_GENOMES
-      printf(" compound%d.%d:%llu+%d\n",
-	     i,j,((Univcoord_T) compoundpos->positions_high[i][j] << 32) + compoundpos->positions_low[i][j],diagterm);
+    if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) {
+      j = 1;
+      while (j < batch->nentries &&
+	     ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) {
+	j <<= 1;		/* gallop by 2 */
+      }
+      if (j >= batch->nentries) {
+	j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal);
+      } else {
+	j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal);
+      }
+      batch->positionptr_high += j;
+      batch->positionptr_low += j;
+      batch->nentries -= j;
+      debug6(printf("binary search jump %d positions to %d:%u\n",
+		    j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low)));
+    }
 #elif defined(WORDS_BIGENDIAN)
-      printf(" compound%d.%d:%u+%d\n",
-	     i,j,Bigendian_convert_univcoord(compoundpos->positions[i][j]),diagterm);
+    if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) {
+      j = 1;
+      while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) {
+	j <<= 1;		/* gallop by 2 */
+      }
+      if (j >= batch->nentries) {
+	j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
+      } else {
+	j = binary_search(j >> 1,j,batch->positionptr,local_goal);
+      }
+      batch->positionptr += j;
+      batch->nentries -= j;
+      debug6(printf("binary search jump %d positions to %d:%u\n",
+		    j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr)));
+    }
 #else
-      printf(" compound%d.%d:%u+%d\n",i,j,compoundpos->positions[i][j],diagterm);
-#endif
+    if (batch->nentries > 0 && *batch->positionptr < local_goal) {
+      j = 1;
+      while (j < batch->nentries && batch->positionptr[j] < local_goal) {
+	j <<= 1;		/* gallop by 2 */
+      }
+      if (j >= batch->nentries) {
+	j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
+      } else {
+	j = binary_search(j >> 1,j,batch->positionptr,local_goal);
+      }
+      batch->positionptr += j;
+      batch->nentries -= j;
+      debug6(printf("binary search jump %d positions to %d:%u\n",
+		    j,batch->nentries,*batch->positionptr));
     }
-  }
-  return;
-}
-
+#endif
 
-void
-Compoundpos_free (Compoundpos_T *old) {
-  int i;
+    if (batch->nentries <= 0) {
+      /* Empty, so continue with loop */
+      /* Move last heap to this one, and reduce heapsize */
+      compoundpos->heap[i] = compoundpos->heap[compoundpos->heapsize];
+      --compoundpos->heapsize;
 
-  if (*old) {
-    if (free_positions_p == true) {
-      for (i = 0; i < (*old)->n; i++) {
 #ifdef LARGE_GENOMES
-	FREE((*old)->positions_high[i]);
-	FREE((*old)->positions_low[i]);
+    } else if (((Univcoord_T) *batch->positionptr_high << 32) + (*batch->positionptr_low) > local_goal) {
+      /* Already advanced past goal, so continue with loop */
+      debug6(printf("Setting emptyp to be false\n"));
+      *emptyp = false;
+      i++;
+#elif defined(WORDS_BIGENDIAN)
+    } else if (Bigendian_convert_univcoord(*batch->positionptr) > local_goal) {
+      /* Already advanced past goal, so continue with loop */
+      debug6(printf("Setting emptyp to be false\n"));
+      *emptyp = false;
+      i++;
 #else
-	FREE((*old)->positions[i]);
+    } else if (*batch->positionptr > local_goal) {
+      /* Already advanced past goal, so continue with loop */
+      debug6(printf("Setting emptyp to be false\n"));
+      *emptyp = false;
+      i++;
 #endif
-      }
-    }
-
-    /* No need, since allocated statically.  FREE((*old)->npositions); */
-    /* No need, since allocated statically.  FREE((*old)->positions); */
-  
-    FREE(*old);
-  }
-  return;
-}
-
-
-Compoundpos_T
-Indexdb_compoundpos_left_subst_2 (T this, Oligospace_T oligo) {
-  Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos));
-  Oligospace_T base;
-  int i;
-
-  debug(printf("compoundpos_left_subst_2: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
-
-  compoundpos->n = 16;
-  /* compoundpos->npositions = (int *) CALLOC(16,sizeof(int)); */
-  /* compoundpos->positions = (Univcoord_T **) CALLOC(16,sizeof(Univcoord_T *)); */
-
-  /* Right shift */
-  base = (oligo >> 4);
-  for (i = 0; i < 16; i++, base += left_subst) {
+    } else {
+      /* Found goal, so return */
+      debug6(printf("Setting emptyp to be false\n"));
+      *emptyp = false;
 #ifdef LARGE_GENOMES
-    compoundpos->positions_low[i] =
-      point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base);
+      debug6(printf("Found! Returning position %llu\n",(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low)));
+#elif defined(WORDS_BIGENDIAN)
+      debug6(printf("Found! Returning position %u\n",Bigendian_convert_univcoord(*batch->positionptr)));
 #else
-    compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base);
+      debug6(printf("Found! Returning position %u\n",*batch->positionptr));
 #endif
-  }
-
-  return compoundpos;
-}
-
-Compoundpos_T
-Indexdb_compoundpos_left_subst_1 (T this, Oligospace_T oligo) {
-  Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos));
-  Oligospace_T base;
-  int i;
-
-  debug(printf("compoundpos_left_subst_1: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
-
-  compoundpos->n = 4;
-  /* compoundpos->npositions = (int *) CALLOC(4,sizeof(int)); */
-  /* compoundpos->positions = (Univcoord_T **) CALLOC(4,sizeof(Univcoord_T *)); */
-
-  /* Zero shift */
-  base = (oligo >> 2);
-  for (i = 0; i < 4; i++, base += top_subst) {
 #ifdef LARGE_GENOMES
-    compoundpos->positions_low[i] =
-      point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base);
+      ++batch->positionptr_high;
+      ++batch->positionptr_low;
 #else
-    compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base);
+      ++batch->positionptr;
 #endif
+      --batch->nentries;
+      return true;
+    }
   }
 
-  return compoundpos;
+  /* Done with loop: Fail. */
+  debug6(printf("Returning emptyp %d\n",*emptyp));
+  return false;
 }
 
-Compoundpos_T
-Indexdb_compoundpos_right_subst_2 (T this, Oligospace_T oligo) {
-  Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos));
-  Oligospace_T base;
-  int i;
 
-  debug(printf("compoundpos_right_subst_2: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
 
-  compoundpos->n = 16;
-  /* compoundpos->npositions = (int *) CALLOC(16,sizeof(int)); */
-  /* compoundpos->positions = (Univcoord_T **) CALLOC(16,sizeof(Univcoord_T *)); */
+/* Returns 0 if heapsize is 0, else 1, and returns smallest value >= local_goal */
+int
+Compoundpos_search (Univcoord_T *value, Compoundpos_T compoundpos, Univcoord_T local_goal) {
+  int parenti, smallesti, j;
+  Batch_T batch, *heap = compoundpos->heap;
+  Univcoord_T position;
 
-  /* Left shift */
-  base = (oligo << 4) & kmer_mask;
-  for (i = 0; i < 16; i++, base += right_subst) {
-#ifdef LARGE_GENOMES
-    compoundpos->positions_low[i] =
-      point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base);
-#else
-    compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base);
-#endif
+  debug3(printf("\nEntering Compoundpos_search with local_goal %u\n",local_goal));
+  if (compoundpos->heapsize <= 0) {
+    debug3(printf("Returning because heapsize is %d\n",compoundpos->heapsize));
+    return 0;
   }
 
-  return compoundpos;
-}
-
-Compoundpos_T
-Indexdb_compoundpos_right_subst_1 (T this, Oligospace_T oligo) {
-  Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos));
-  Oligospace_T base;
-  int i;
-
-  debug(printf("compoundpos_right_subst_1: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part)));
-
-  compoundpos->n = 4;
-  /* compoundpos->npositions = (int *) CALLOC(4,sizeof(int)); */
-  /* compoundpos->positions = (Univcoord_T **) CALLOC(4,sizeof(Univcoord_T *)); */
-
-  /* Zero shift */
-  base = (oligo << 2) & kmer_mask;
-  for (i = 0; i < 4; i++, base += right_subst) {
+  if (compoundpos->n == 4) {
+    while (compoundpos->heapsize > 0 && (batch = heap[1])->position < local_goal) {
+      debug3(printf("Compoundpos_search iteration, heapsize %d:\n",compoundpos->heapsize));
+      debug3(heap_even_dump(heap,compoundpos->heapsize));
 #ifdef LARGE_GENOMES
-    compoundpos->positions_low[i] =
-      point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base);
+      if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) {
+	j = 1;
+	while (j < batch->nentries &&
+	       ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) {
+	  j <<= 1;		/* gallop by 2 */
+	}
+	if (j >= batch->nentries) {
+	  j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal);
+	} else {
+	  j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal);
+	}
+	batch->positionptr_high += j;
+	batch->positionptr_low += j;
+	batch->nentries -= j;
+	debug3(printf("binary search jump %d positions to %d:%u\n",
+		      j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low)));
+      }
+      batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low);
+#elif defined(WORDS_BIGENDIAN)
+      if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) {
+	j = 1;
+	while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) {
+	  j <<= 1;		/* gallop by 2 */
+	}
+	if (j >= batch->nentries) {
+	  j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
+	} else {
+	  j = binary_search(j >> 1,j,batch->positionptr,local_goal);
+	}
+	batch->positionptr += j;
+	batch->nentries -= j;
+	debug3(printf("binary search jump %d positions to %d:%u\n",
+		      j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr)));
+      }
+      batch->position = Bigendian_convert_univcoord(*batch->positionptr);
 #else
-    compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base);
+      if (batch->nentries > 0 && *batch->positionptr < local_goal) {
+	j = 1;
+	while (j < batch->nentries && batch->positionptr[j] < local_goal) {
+	  j <<= 1;		/* gallop by 2 */
+	}
+	if (j >= batch->nentries) {
+	  j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
+	} else {
+	  j = binary_search(j >> 1,j,batch->positionptr,local_goal);
+	}
+	batch->positionptr += j;
+	batch->nentries -= j;
+	debug3(printf("binary search jump %d positions to %d:%u\n",
+		      j,batch->nentries,*batch->positionptr));
+      }
+      batch->position = *batch->positionptr;
 #endif
-  }
 
-  return compoundpos;
-}
-
-
-
-/************************************************************************/
+      if (batch->nentries <= 0) {
+	debug3(printf("top of heap found to be empty\n"));
+	heap[1] = batch = (compoundpos->heapsize == 1) ? 
+	  compoundpos->sentinel : heap[compoundpos->heapsize];
+	heap[compoundpos->heapsize--] = compoundpos->sentinel;
+      }
+      
+      position = batch->position;
+      debug3(printf("heapify downward on %u\n",position));
+      debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
+      if (position <= heap[2]->position) {
+	debug3(printf("Inserting at 1\n"));
+	/* heap[1] = batch; -- not necessary because batch is already at heap[1] */
+      } else {
+	heap[1] = heap[2];
+	debug3(printf("Comparing left %d/right %d: %u and %u\n",
+		      3,4,heap[3]->position,heap[4]->position));
+	smallesti = 4 - (heap[3]->position < heap[4]->position);
+	if (position <= heap[smallesti]->position) {
+	  debug3(printf("Inserting at 2\n"));
+	  heap[2] = batch;
+	} else {
+	  debug3(printf("Inserting at %d\n",smallesti));
+	  heap[2] = heap[smallesti];
+	  heap[smallesti] = batch;
+	}
+      }
+    }
+    if (batch->position == local_goal) {
+      *value = batch->position;
+      debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value));
+      return 1;
+    }
 
+  } else {
+    /* 16 batches */
+    while (compoundpos->heapsize > 0 && (batch = heap[1])->position < local_goal) {
+      debug3(printf("Compoundpos_search iteration, heapsize %d:\n",compoundpos->heapsize));
+      debug3(heap_even_dump(heap,compoundpos->heapsize));
 #ifdef LARGE_GENOMES
-static int
-binary_search (int lowi, int highi, unsigned char *positions_high, UINT4 *positions_low, Univcoord_T goal) {
-  bool foundp = false;
-  int middlei;
-  Univcoord_T position;
-
-#ifdef NOBINARY
-  return lowi;
+      if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) {
+	j = 1;
+	while (j < batch->nentries &&
+	       ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) {
+	  j <<= 1;		/* gallop by 2 */
+	}
+	if (j >= batch->nentries) {
+	  j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal);
+	} else {
+	  j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal);
+	}
+	batch->positionptr_high += j;
+	batch->positionptr_low += j;
+	batch->nentries -= j;
+	debug3(printf("binary search jump %d positions to %d:%u\n",
+		      j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32 + (*batch->positionptr_low))));
+      }
+      batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low);
+#elif defined(WORDS_BIGENDIAN)
+      if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) {
+	j = 1;
+	while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) {
+	  j <<= 1;		/* gallop by 2 */
+	}
+	if (j >= batch->nentries) {
+	  j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
+	} else {
+	  j = binary_search(j >> 1,j,batch->positionptr,local_goal);
+	}
+	batch->positionptr += j;
+	batch->nentries -= j;
+	debug3(printf("binary search jump %d positions to %d:%u\n",
+		      j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr)));
+      }
+      batch->position = Bigendian_convert_univcoord(*batch->positionptr);
+#else
+      if (batch->nentries > 0 && *batch->positionptr < local_goal) {
+	j = 1;
+	while (j < batch->nentries && batch->positionptr[j] < local_goal) {
+	  j <<= 1;		/* gallop by 2 */
+	}
+	if (j >= batch->nentries) {
+	  j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
+	} else {
+	  j = binary_search(j >> 1,j,batch->positionptr,local_goal);
+	}
+	batch->positionptr += j;
+	batch->nentries -= j;
+	debug3(printf("binary search jump %d positions to %d:%u\n",
+		      j,batch->nentries,*batch->positionptr));
+      }
+      batch->position = *batch->positionptr;
 #endif
 
-  if (goal == 0U) {
-    return lowi;
-  }
-
-  while (!foundp && lowi < highi) {
-    middlei = lowi + ((highi - lowi) / 2);
-    position = ((Univcoord_T) positions_high[middlei] << 32) + positions_low[middlei];
-    debug2(printf("  binary: %d:%u %d:%u %d:%u   vs. %u\n",
-		  lowi,(positions_high[lowi] << 32) + positions_low[lowi],
-		  middlei,position,
-		  highi,(positions_high[highi] << 32) + positions_low[highi],goal));
-    if (goal < position) {
-      highi = middlei;
-    } else if (goal > position) {
-      lowi = middlei + 1;
-    } else {
-      foundp = true;
+      if (batch->nentries <= 0) {
+	debug3(printf("top of heap found to be empty\n"));
+	heap[1] = batch = (compoundpos->heapsize == 1) ? 
+	  compoundpos->sentinel : heap[compoundpos->heapsize];
+	heap[compoundpos->heapsize--] = compoundpos->sentinel;
+      }
+      
+      position = batch->position;
+      debug3(printf("heapify downward on %u\n",position));
+      /* Comparison 0/3 */
+      debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
+      if (position <= heap[2]->position) {
+	debug3(printf("Inserting at 1\n"));
+	/* heap[1] = batch; -- not necessary because batch is already at heap[1] */
+      } else {
+	heap[1] = heap[2];
+	/* Comparison 1/3 */
+	debug3(printf("Comparing left %d/right %d: %u and %u\n",
+		      3,4,heap[3]->position,heap[4]->position));
+	smallesti = 4 - (heap[3]->position < heap[4]->position);
+	if (position <= heap[smallesti]->position) {
+	  debug3(printf("Inserting at 2\n"));
+	  heap[2] = batch;
+	} else {
+	  heap[2] = heap[smallesti];
+	  parenti = smallesti;
+	  smallesti <<= 1;
+	  /* Comparison 2/3 */
+	  debug3(printf("Comparing left %d/right %d: %u and %u\n",
+			smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
+	  smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
+	  if (position <= heap[smallesti]->position) {
+	    debug3(printf("Inserting at %d\n",parenti));
+	    heap[parenti] = batch;
+	  } else {
+	    heap[parenti] = heap[smallesti];
+	    parenti = smallesti;
+	    smallesti <<= 1;
+	    /* Comparison 3/3 */
+	    debug3(printf("Comparing left %d/right %d: %u and %u\n",
+			  smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
+	    smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
+	    if (position <= heap[smallesti]->position) {
+	      debug3(printf("Inserting at %d\n",parenti));
+	      heap[parenti] = batch;
+	    } else {
+	      heap[parenti] = heap[smallesti];
+	      debug3(printf("Inserting at %d\n",smallesti));
+	      heap[smallesti] = batch;
+	    }
+	  }
+	}
+      }
+    }
+    if (batch->position == local_goal) {
+      *value = batch->position;
+      debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value));
+      return 1;
     }
   }
 
-  if (foundp == true) {
-    return middlei;
-  } else {
-    return highi;
-  }
+  *value = batch->position;
+  debug3(printf("Returning position %llu\n",(unsigned long long) *value));
+  return 1;
 }
 
-#else
 
-static int
-binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) {
-  bool foundp = false;
-  int middlei;
 
-#ifdef NOBINARY
-  return lowi;
-#endif
+#if defined(LARGE_GENOMES) || !defined(HAVE_SSE4_1)
 
-  if (goal == 0U) {
-    return lowi;
-  }
+#define READ_THEN_WRITE 1
 
-  while (!foundp && lowi < highi) {
-    middlei = lowi + ((highi - lowi) / 2);
-#ifdef WORDS_BIGENDIAN
-    debug2(printf("  binary: %d:%u %d:%u %d:%u   vs. %u\n",
-		  lowi,Bigendian_convert_univcoord(positions[lowi]),
-		  middlei,Bigendian_convert_univcoord(positions[middlei]),
-		  highi,Bigendian_convert_univcoord(positions[highi]),goal));
-    if (goal < Bigendian_convert_univcoord(positions[middlei])) {
-      highi = middlei;
-    } else if (goal > Bigendian_convert_univcoord(positions[middlei])) {
-      lowi = middlei + 1;
-    } else {
-      foundp = true;
-    }
+static Univcoord_T *
+merge_batches_one_heap_16_existing (int *nmerged, struct Batch_T *batchpool, int nentries, int diagterm) {
+  Univcoord_T *positions, *ptr, position, last_position, this_position;
+  struct Batch_T sentinel_struct;
+  Batch_T batch, sentinel, heap[17];
+  int heapsize;
+  unsigned int i;
+#ifdef READ_THEN_WRITE
+  unsigned int smallesti_1, smallesti_2, smallesti;
 #else
-    debug2(printf("  binary: %d:%u %d:%u %d:%u   vs. %u\n",
-		  lowi,positions[lowi],middlei,positions[middlei],
-		  highi,positions[highi],goal));
-    if (goal < positions[middlei]) {
-      highi = middlei;
-    } else if (goal > positions[middlei]) {
-      lowi = middlei + 1;
-    } else {
-      foundp = true;
-    }
+  unsigned int parenti, smallesti;
 #endif
-  }
 
-  if (foundp == true) {
-    return middlei;
-  } else {
-    return highi;
-  }
-}
-
-#endif
+  debug3(printf("starting merge_batches_one_heap_16_existing\n"));
 
+  debug0(int nentries_save = nentries);
 
-void
-Compoundpos_heap_init (Compoundpos_T compoundpos, int querylength, int diagterm) {
-  Batch_T batch;
-  int startbound, i;
+  ptr = positions = (Univcoord_T *) MALLOC_ALIGN(nentries * sizeof(Univcoord_T));
 
-  compoundpos->heapsize = 0;
-  for (i = 0; i < compoundpos->n; i++) {
-    batch = &(compoundpos->batchpool[i]);
-#ifdef LARGE_GENOMES
-    batch->positionptr_high = compoundpos->positions_high[i];
-    batch->positionptr_low = compoundpos->positions_low[i];
-#else
-    batch->positionptr = compoundpos->positions[i];
-#endif
-    batch->nentries = compoundpos->npositions[i];
-    if (diagterm < querylength) {
-      startbound = querylength - diagterm;
-#ifdef LARGE_GENOMES
-      while (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < (unsigned int) startbound) {
-	debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n",
-		       ((Univcoord_T) *batch->positionptr_high << 32) + *batch->positionptr_low));
-	++batch->positionptr_high;
-	++batch->positionptr_low;
-	--batch->nentries;
-      }
-#elif defined(WORDS_BIGENDIAN)
-      while (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < (unsigned int) startbound) {
-	debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n",
-		       Bigendian_convert_univcoord(*batch->positionptr)));
-	++batch->positionptr;
-	--batch->nentries;
-      }
-#else
-      while (batch->nentries > 0 && *batch->positionptr < (unsigned int) startbound) {
-	debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n",
-		       *batch->positionptr));
-	++batch->positionptr;
-	--batch->nentries;
-      }
-#endif
-    }
+  /* Set up heap */
+  heapsize = 0;
+  for (i = 0; i < 16; i++) {
+    batch = &(batchpool[i]);
     if (batch->nentries > 0) {
 #ifdef LARGE_GENOMES
-      batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low);
+      batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++);
 #elif defined(WORDS_BIGENDIAN)
-      batch->position = Bigendian_convert_univcoord(*batch->positionptr);
+      batch->position = Bigendian_convert_univcoord(*batch->positionptr++);
 #else
-      batch->position = *batch->positionptr;
+      batch->position = *batch->positionptr++;
 #endif
-      heap_insert_even(compoundpos->heap,&compoundpos->heapsize,batch,batch->position);
+      heap_insert_even(heap,&heapsize,batch,batch->position);
     }
   }
 
-  compoundpos->sentinel_struct.position = (Univcoord_T) -1; /* infinity */
+  sentinel_struct.position = (Univcoord_T) -1; /* infinity */
 #ifdef LARGE_GENOMES
-  compoundpos->sentinel_struct.positionptr_high = &sentinel_position_high;
-  compoundpos->sentinel_struct.positionptr_low = &sentinel_position_low;
+  sentinel_struct.positionptr_high = &sentinel_position_high;
+  sentinel_struct.positionptr_low = &sentinel_position_low;
 #else
-  compoundpos->sentinel_struct.positionptr = &(compoundpos->sentinel_struct.position);
+  sentinel_struct.positionptr = &(sentinel_struct.position);
 #endif
-  compoundpos->sentinel = &compoundpos->sentinel_struct;
+  sentinel = &sentinel_struct;
 
-  for (i = compoundpos->heapsize+1; i <= compoundpos->n; i++) {
-    compoundpos->heap[i] = compoundpos->sentinel;
+  for (i = heapsize+1; i <= 16; i++) {
+    heap[i] = sentinel;
   }
 
-  return;
-}
-
-
-#if 0
-/* Used by DEBUG3 and DEBUG6 */
-static void
-heap_even_dump (Batch_T *heap, int heapsize) {
-  int i;
-  Batch_T batch;
+  last_position = 0U;
+  while (--nentries >= 1) {
+    debug3(printf("nentries = %d, top of heap is %u (%d)\n",
+		  nentries+1,heap[1]->position,heapsize));
 
-  for (i = 1; i <= heapsize; i++) {
-    batch = heap[i];
-    printf("#%d--%d:%llu  ",i,batch->nentries,(unsigned long long) batch->position);
-  }
-  printf("\n");
-}
+    /* Get minimum */
+    batch = heap[1];
+#ifdef CONVERT_TO_LITTLEENDIAN
+    this_position = Bigendian_convert_univcoord(batch->position) + diagterm;
+#else
+    this_position = batch->position + diagterm;
 #endif
+    if (this_position != last_position) {
+      *ptr++ = this_position;
+    }
+    last_position = this_position;
 
+    if (--batch->nentries <= 0) {
+      /* Use last batch (or sentinel) in heap for insertion */
+      heap[1] = batch = (heapsize == 1) ? sentinel : heap[heapsize];
+      heap[heapsize--] = sentinel;
 
-
-/* Returns true if found.  emptyp is true only if every batch is
-   empty.  If procedure returns true, empty is guaranteed to be
-   false. */
-bool
-Compoundpos_find (bool *emptyp, Compoundpos_T compoundpos, Univcoord_T local_goal) {
-  Batch_T *heap = compoundpos->heap, batch;
-  int i, j;
-
-  debug6(printf("\nEntering Compoundpos_find with local_goal %u\n",local_goal));
-
-  *emptyp = true;
-  i = 1;
-  while (i <= compoundpos->heapsize) {
-    debug6(printf("Compoundpos_find iteration, heapsize %d:\n",compoundpos->heapsize));
-    debug6(heap_even_dump(heap,compoundpos->heapsize));
-
-    batch = heap[i];
+    } else {
+      /* Advance heap, and use this batch for insertion */
 #ifdef LARGE_GENOMES
-    if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) {
-      j = 1;
-      while (j < batch->nentries &&
-	     ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) {
-	j <<= 1;		/* gallop by 2 */
-      }
-      if (j >= batch->nentries) {
-	j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal);
-      } else {
-	j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal);
-      }
-      batch->positionptr_high += j;
-      batch->positionptr_low += j;
-      batch->nentries -= j;
-      debug6(printf("binary search jump %d positions to %d:%u\n",
-		    j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low)));
-    }
+      batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++);
 #elif defined(WORDS_BIGENDIAN)
-    if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) {
-      j = 1;
-      while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) {
-	j <<= 1;		/* gallop by 2 */
-      }
-      if (j >= batch->nentries) {
-	j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
+      batch->position = Bigendian_convert_univcoord(*batch->positionptr++);
+#else
+      batch->position = *batch->positionptr++;
+#endif
+    }
+
+    position = batch->position;
+    debug3(printf("starting heapify with %u\n",position));
+
+#ifdef READ_THEN_WRITE
+    /* Comparison 0/3 */
+    debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
+    if (position <= heap[2]->position) {
+      debug3(printf("Inserting at 1\n"));
+      /* heap[1] = batch; -- not necessary because batch is already at heap[1] */
+    } else {
+      /* Comparison 1/3 */
+      debug3(printf("Comparing left %d/right %d: %u and %u\n",
+		    3,4,heap[3]->position,heap[4]->position));
+      smallesti = 4 - (heap[3]->position < heap[4]->position);
+      if (position <= heap[smallesti]->position) {
+	debug3(printf("Inserting at 2\n"));
+	heap[1] = heap[2];
+	heap[2] = batch;
       } else {
-	j = binary_search(j >> 1,j,batch->positionptr,local_goal);
+	smallesti_1 = smallesti;
+	smallesti <<= 1;
+	/* Comparison 2/3 */
+	debug3(printf("Comparing left %d/right %d: %u and %u\n",
+		      smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
+	smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
+	if (position <= heap[smallesti]->position) {
+	  debug3(printf("Inserting at %d\n",smallesti_1));
+	  heap[1] = heap[2];
+	  heap[2] = heap[smallesti_1];
+	  heap[smallesti_1] = batch;
+	} else {
+	  smallesti_2 = smallesti;
+	  smallesti <<= 1;
+	  /* Comparison 3/3 */
+	  debug3(printf("Comparing left %d/right %d: %u and %u\n",
+			smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
+	  smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
+	  if (position <= heap[smallesti]->position) {
+	    debug3(printf("Inserting at %d\n",smallesti_2));
+	    heap[1] = heap[2];
+	    heap[2] = heap[smallesti_1];
+	    heap[smallesti_1] = heap[smallesti_2];
+	    heap[smallesti_2] = batch;
+	  } else {
+	    debug3(printf("Inserting at %d\n",smallesti));
+	    heap[1] = heap[2];
+	    heap[2] = heap[smallesti_1];
+	    heap[smallesti_1] = heap[smallesti_2];
+	    heap[smallesti_2] = heap[smallesti];
+	    heap[smallesti] = batch;
+	  }
+	}
       }
-      batch->positionptr += j;
-      batch->nentries -= j;
-      debug6(printf("binary search jump %d positions to %d:%u\n",
-		    j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr)));
     }
 #else
-    if (batch->nentries > 0 && *batch->positionptr < local_goal) {
-      j = 1;
-      while (j < batch->nentries && batch->positionptr[j] < local_goal) {
-	j <<= 1;		/* gallop by 2 */
-      }
-      if (j >= batch->nentries) {
-	j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
+    /* Comparison 0/3 */
+    debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
+    if (position <= heap[2]->position) {
+      debug3(printf("Inserting at 1\n"));
+      /* heap[1] = batch; -- not necessary because batch is already at heap[1] */
+    } else {
+      heap[1] = heap[2];
+      /* Comparison 1/3 */
+      debug3(printf("Comparing left %d/right %d: %u and %u\n",
+		    3,4,heap[3]->position,heap[4]->position));
+      smallesti = 4 - (heap[3]->position < heap[4]->position);
+      if (position <= heap[smallesti]->position) {
+	debug3(printf("Inserting at 2\n"));
+	heap[2] = batch;
       } else {
-	j = binary_search(j >> 1,j,batch->positionptr,local_goal);
+	heap[2] = heap[smallesti];
+	parenti = smallesti;
+	smallesti <<= 1;
+	/* Comparison 2/3 */
+	debug3(printf("Comparing left %d/right %d: %u and %u\n",
+		      smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
+	smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
+	if (position <= heap[smallesti]->position) {
+	  debug3(printf("Inserting at %d\n",parenti));
+	  heap[parenti] = batch;
+	} else {
+	  heap[parenti] = heap[smallesti];
+	  parenti = smallesti;
+	  smallesti <<= 1;
+	  /* Comparison 3/3 */
+	  debug3(printf("Comparing left %d/right %d: %u and %u\n",
+			smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
+	  smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
+	  if (position <= heap[smallesti]->position) {
+	    debug3(printf("Inserting at %d\n",parenti));
+	    heap[parenti] = batch;
+	  } else {
+	    heap[parenti] = heap[smallesti];
+	    debug3(printf("Inserting at %d\n",smallesti));
+	    heap[smallesti] = batch;
+	  }
+	}
       }
-      batch->positionptr += j;
-      batch->nentries -= j;
-      debug6(printf("binary search jump %d positions to %d:%u\n",
-		    j,batch->nentries,*batch->positionptr));
     }
 #endif
-
-    if (batch->nentries <= 0) {
-      /* Empty, so continue with loop */
-      /* Move last heap to this one, and reduce heapsize */
-      compoundpos->heap[i] = compoundpos->heap[compoundpos->heapsize];
-      --compoundpos->heapsize;
-
-#ifdef LARGE_GENOMES
-    } else if (((Univcoord_T) *batch->positionptr_high << 32) + (*batch->positionptr_low) > local_goal) {
-      /* Already advanced past goal, so continue with loop */
-      debug6(printf("Setting emptyp to be false\n"));
-      *emptyp = false;
-      i++;
-#elif defined(WORDS_BIGENDIAN)
-    } else if (Bigendian_convert_univcoord(*batch->positionptr) > local_goal) {
-      /* Already advanced past goal, so continue with loop */
-      debug6(printf("Setting emptyp to be false\n"));
-      *emptyp = false;
-      i++;
-#else
-    } else if (*batch->positionptr > local_goal) {
-      /* Already advanced past goal, so continue with loop */
-      debug6(printf("Setting emptyp to be false\n"));
-      *emptyp = false;
-      i++;
-#endif
-    } else {
-      /* Found goal, so return */
-      debug6(printf("Setting emptyp to be false\n"));
-      *emptyp = false;
-#ifdef LARGE_GENOMES
-      debug6(printf("Found! Returning position %llu\n",(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low)));
-#elif defined(WORDS_BIGENDIAN)
-      debug6(printf("Found! Returning position %u\n",Bigendian_convert_univcoord(*batch->positionptr)));
-#else
-      debug6(printf("Found! Returning position %u\n",*batch->positionptr));
-#endif
-#ifdef LARGE_GENOMES
-      ++batch->positionptr_high;
-      ++batch->positionptr_low;
+  }
+
+#ifdef CONVERT_TO_LITTLEENDIAN
+  this_position = Bigendian_convert_univcoord(heap[1]->position) + diagterm;
 #else
-      ++batch->positionptr;
+  this_position = heap[1]->position + diagterm;
 #endif
-      --batch->nentries;
-      return true;
+  if (this_position != last_position) {
+    *ptr++ = this_position;
+  }
+
+  *nmerged = (ptr - positions);
+
+#if 0
+  position = positions[0];
+  for (i = 1; i < nentries_save; i++) {
+    if (positions[i] <= position) {
+      abort();
     }
+    position = positions[i];
   }
+#endif
 
-  /* Done with loop: Fail. */
-  debug6(printf("Returning emptyp %d\n",*emptyp));
-  return false;
+  debug0(
+	 for (i = 0; i < nentries_save; i++) {
+	   printf("%u\n",positions[i]);
+	 }
+	 printf("\n");
+	 )
+
+  return positions;
 }
 
 
+static Univcoord_T *
+merge_batches_one_heap_4_existing (int *nmerged, struct Batch_T *batchpool, int nentries, int diagterm) {
+  Univcoord_T *positions, *ptr, position, last_position, this_position;
+  struct Batch_T sentinel_struct;
+  Batch_T batch, sentinel, heap[5];
+  int heapsize;
+  unsigned int i;
+#ifdef READ_THEN_WRITE
+  unsigned int smallesti;
+#else
+  unsigned int parenti, smallesti;
+#endif
 
-/* Returns 0 if heapsize is 0, else 1, and returns smallest value >= local_goal */
-int
-Compoundpos_search (Univcoord_T *value, Compoundpos_T compoundpos, Univcoord_T local_goal) {
-  int parenti, smallesti, j;
-  Batch_T batch, *heap = compoundpos->heap;
-  Univcoord_T position;
+  debug3(printf("starting merge_batches_one_heap_4_existing\n"));
 
-  debug3(printf("\nEntering Compoundpos_search with local_goal %u\n",local_goal));
-  if (compoundpos->heapsize <= 0) {
-    debug3(printf("Returning because heapsize is %d\n",compoundpos->heapsize));
-    return 0;
-  }
+  debug0(int nentries_save = nentries);
 
-  if (compoundpos->n == 4) {
-    while (compoundpos->heapsize > 0 && (batch = heap[1])->position < local_goal) {
-      debug3(printf("Compoundpos_search iteration, heapsize %d:\n",compoundpos->heapsize));
-      debug3(heap_even_dump(heap,compoundpos->heapsize));
+  ptr = positions = (Univcoord_T *) MALLOC_ALIGN(nentries * sizeof(Univcoord_T));
+
+  /* Set up heap */
+  heapsize = 0;
+  for (i = 0; i < 4; i++) {
+    batch = &(batchpool[i]);
+    if (batch->nentries > 0) {
 #ifdef LARGE_GENOMES
-      if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) {
-	j = 1;
-	while (j < batch->nentries &&
-	       ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) {
-	  j <<= 1;		/* gallop by 2 */
-	}
-	if (j >= batch->nentries) {
-	  j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal);
-	} else {
-	  j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal);
-	}
-	batch->positionptr_high += j;
-	batch->positionptr_low += j;
-	batch->nentries -= j;
-	debug3(printf("binary search jump %d positions to %d:%u\n",
-		      j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low)));
-      }
-      batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low);
+      batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++);
 #elif defined(WORDS_BIGENDIAN)
-      if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) {
-	j = 1;
-	while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) {
-	  j <<= 1;		/* gallop by 2 */
-	}
-	if (j >= batch->nentries) {
-	  j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
-	} else {
-	  j = binary_search(j >> 1,j,batch->positionptr,local_goal);
-	}
-	batch->positionptr += j;
-	batch->nentries -= j;
-	debug3(printf("binary search jump %d positions to %d:%u\n",
-		      j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr)));
-      }
-      batch->position = Bigendian_convert_univcoord(*batch->positionptr);
+      batch->position = Bigendian_convert_univcoord(*batch->positionptr++);
 #else
-      if (batch->nentries > 0 && *batch->positionptr < local_goal) {
-	j = 1;
-	while (j < batch->nentries && batch->positionptr[j] < local_goal) {
-	  j <<= 1;		/* gallop by 2 */
-	}
-	if (j >= batch->nentries) {
-	  j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
-	} else {
-	  j = binary_search(j >> 1,j,batch->positionptr,local_goal);
-	}
-	batch->positionptr += j;
-	batch->nentries -= j;
-	debug3(printf("binary search jump %d positions to %d:%u\n",
-		      j,batch->nentries,*batch->positionptr));
-      }
-      batch->position = *batch->positionptr;
+      batch->position = *batch->positionptr++;
 #endif
-
-      if (batch->nentries <= 0) {
-	debug3(printf("top of heap found to be empty\n"));
-	heap[1] = batch = (compoundpos->heapsize == 1) ? 
-	  compoundpos->sentinel : heap[compoundpos->heapsize];
-	heap[compoundpos->heapsize--] = compoundpos->sentinel;
-      }
-      
-      position = batch->position;
-      debug3(printf("heapify downward on %u\n",position));
-      debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
-      if (position <= heap[2]->position) {
-	debug3(printf("Inserting at 1\n"));
-	/* heap[1] = batch; -- not necessary because batch is already at heap[1] */
-      } else {
-	heap[1] = heap[2];
-	debug3(printf("Comparing left %d/right %d: %u and %u\n",
-		      3,4,heap[3]->position,heap[4]->position));
-	smallesti = 4 - (heap[3]->position < heap[4]->position);
-	if (position <= heap[smallesti]->position) {
-	  debug3(printf("Inserting at 2\n"));
-	  heap[2] = batch;
-	} else {
-	  debug3(printf("Inserting at %d\n",smallesti));
-	  heap[2] = heap[smallesti];
-	  heap[smallesti] = batch;
-	}
-      }
+      heap_insert_even(heap,&heapsize,batch,batch->position);
     }
-    if (batch->position == local_goal) {
-      *value = batch->position;
-      debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value));
-      return 1;
+  }
+
+  sentinel_struct.position = (Univcoord_T) -1; /* infinity */
+#ifdef LARGE_GENOMES
+  sentinel_struct.positionptr_high = &sentinel_position_high;
+  sentinel_struct.positionptr_low = &sentinel_position_low;
+#else
+  sentinel_struct.positionptr = &(sentinel_struct.position);
+#endif
+  sentinel = &sentinel_struct;
+
+  for (i = heapsize+1; i <= 4; i++) {
+    heap[i] = sentinel;
+  }
+
+  last_position = 0U;
+  while (--nentries >= 1) {
+    debug3(printf("nentries = %d, top of heap is %u (%d)\n",
+		  nentries+1,heap[1]->position,heapsize));
+
+    /* Get minimum */
+    batch = heap[1];
+#ifdef CONVERT_TO_LITTLEENDIAN
+    this_position = Bigendian_convert_univcoord(batch->position) + diagterm;
+#else
+    this_position = batch->position + diagterm;
+#endif
+    if (this_position != last_position) {
+      *ptr++ = this_position;
     }
+    last_position = this_position;
 
-  } else {
-    /* 16 batches */
-    while (compoundpos->heapsize > 0 && (batch = heap[1])->position < local_goal) {
-      debug3(printf("Compoundpos_search iteration, heapsize %d:\n",compoundpos->heapsize));
-      debug3(heap_even_dump(heap,compoundpos->heapsize));
+
+    if (--batch->nentries <= 0) {
+      /* Use last batch (or sentinel) in heap for insertion */
+      heap[1] = batch = (heapsize == 1) ? sentinel : heap[heapsize];
+      heap[heapsize--] = sentinel;
+
+    } else {
+      /* Advance heap, and use this batch for insertion */
 #ifdef LARGE_GENOMES
-      if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) {
-	j = 1;
-	while (j < batch->nentries &&
-	       ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) {
-	  j <<= 1;		/* gallop by 2 */
-	}
-	if (j >= batch->nentries) {
-	  j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal);
-	} else {
-	  j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal);
-	}
-	batch->positionptr_high += j;
-	batch->positionptr_low += j;
-	batch->nentries -= j;
-	debug3(printf("binary search jump %d positions to %d:%u\n",
-		      j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32 + (*batch->positionptr_low))));
-      }
-      batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low);
+      batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++);
 #elif defined(WORDS_BIGENDIAN)
-      if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) {
-	j = 1;
-	while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) {
-	  j <<= 1;		/* gallop by 2 */
-	}
-	if (j >= batch->nentries) {
-	  j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
-	} else {
-	  j = binary_search(j >> 1,j,batch->positionptr,local_goal);
-	}
-	batch->positionptr += j;
-	batch->nentries -= j;
-	debug3(printf("binary search jump %d positions to %d:%u\n",
-		      j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr)));
-      }
-      batch->position = Bigendian_convert_univcoord(*batch->positionptr);
+      batch->position = Bigendian_convert_univcoord(*batch->positionptr++);
 #else
-      if (batch->nentries > 0 && *batch->positionptr < local_goal) {
-	j = 1;
-	while (j < batch->nentries && batch->positionptr[j] < local_goal) {
-	  j <<= 1;		/* gallop by 2 */
-	}
-	if (j >= batch->nentries) {
-	  j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal);
-	} else {
-	  j = binary_search(j >> 1,j,batch->positionptr,local_goal);
-	}
-	batch->positionptr += j;
-	batch->nentries -= j;
-	debug3(printf("binary search jump %d positions to %d:%u\n",
-		      j,batch->nentries,*batch->positionptr));
-      }
-      batch->position = *batch->positionptr;
+      batch->position = *batch->positionptr++;
 #endif
+    }
+
+    position = batch->position;
+    debug3(printf("starting heapify with %u\n",position));
 
-      if (batch->nentries <= 0) {
-	debug3(printf("top of heap found to be empty\n"));
-	heap[1] = batch = (compoundpos->heapsize == 1) ? 
-	  compoundpos->sentinel : heap[compoundpos->heapsize];
-	heap[compoundpos->heapsize--] = compoundpos->sentinel;
-      }
-      
-      position = batch->position;
-      debug3(printf("heapify downward on %u\n",position));
-      /* Comparison 0/3 */
-      debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
-      if (position <= heap[2]->position) {
-	debug3(printf("Inserting at 1\n"));
-	/* heap[1] = batch; -- not necessary because batch is already at heap[1] */
+#ifdef READ_THEN_WRITE
+    /* Comparison 0/3 */
+    debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
+    if (position <= heap[2]->position) {
+      debug3(printf("Inserting at 1\n"));
+      /* heap[1] = batch; -- not necessary because batch is already at heap[1] */
+    } else {
+      /* Comparison 1/3 */
+      debug3(printf("Comparing left %d/right %d: %u and %u\n",
+		    3,4,heap[3]->position,heap[4]->position));
+      smallesti = 4 - (heap[3]->position < heap[4]->position);
+      if (position <= heap[smallesti]->position) {
+	debug3(printf("Inserting at 2\n"));
+	heap[1] = heap[2];
+	heap[2] = batch;
       } else {
+	debug3(printf("Inserting at %d\n",smallesti));
 	heap[1] = heap[2];
-	/* Comparison 1/3 */
-	debug3(printf("Comparing left %d/right %d: %u and %u\n",
-		      3,4,heap[3]->position,heap[4]->position));
-	smallesti = 4 - (heap[3]->position < heap[4]->position);
-	if (position <= heap[smallesti]->position) {
-	  debug3(printf("Inserting at 2\n"));
-	  heap[2] = batch;
-	} else {
-	  heap[2] = heap[smallesti];
-	  parenti = smallesti;
-	  smallesti <<= 1;
-	  /* Comparison 2/3 */
-	  debug3(printf("Comparing left %d/right %d: %u and %u\n",
-			smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
-	  smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
-	  if (position <= heap[smallesti]->position) {
-	    debug3(printf("Inserting at %d\n",parenti));
-	    heap[parenti] = batch;
-	  } else {
-	    heap[parenti] = heap[smallesti];
-	    parenti = smallesti;
-	    smallesti <<= 1;
-	    /* Comparison 3/3 */
-	    debug3(printf("Comparing left %d/right %d: %u and %u\n",
-			  smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position));
-	    smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position);
-	    if (position <= heap[smallesti]->position) {
-	      debug3(printf("Inserting at %d\n",parenti));
-	      heap[parenti] = batch;
-	    } else {
-	      heap[parenti] = heap[smallesti];
-	      debug3(printf("Inserting at %d\n",smallesti));
-	      heap[smallesti] = batch;
-	    }
-	  }
-	}
+	heap[2] = heap[smallesti];
+	heap[smallesti] = batch;
       }
     }
-    if (batch->position == local_goal) {
-      *value = batch->position;
-      debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value));
-      return 1;
+
+#else
+    /* Comparison 0/3 */
+    debug3(printf("Comparing right %d: %u\n",2,heap[2]->position));
+    if (position <= heap[2]->position) {
+      debug3(printf("Inserting at 1\n"));
+      /* heap[1] = batch; -- not necessary because batch is already at heap[1] */
+    } else {
+      heap[1] = heap[2];
+      /* Comparison 1/3 */
+      debug3(printf("Comparing left %d/right %d: %u and %u\n",
+		    3,4,heap[3]->position,heap[4]->position));
+      smallesti = 4 - (heap[3]->position < heap[4]->position);
+      if (position <= heap[smallesti]->position) {
+	debug3(printf("Inserting at 2\n"));
+	heap[2] = batch;
+      } else {
+	heap[2] = heap[smallesti];
+	heap[smallesti] = batch;
+      }
     }
+
+#endif
   }
 
-  *value = batch->position;
-  debug3(printf("Returning position %llu\n",(unsigned long long) *value));
-  return 1;
-}
+#ifdef CONVERT_TO_LITTLEENDIAN
+  this_position = Bigendian_convert_univcoord(heap[1]->position) + diagterm;
+#else
+  this_position = heap[1]->position + diagterm;
+#endif
+  if (this_position != last_position) {
+    *ptr++ = this_position;
+  }
+
+  *nmerged = (ptr - positions);
+
+#if 0
+  position = positions[0];
+  for (i = 1; i < nentries_save; i++) {
+    if (positions[i] <= position) {
+      abort();
+    }
+    position = positions[i];
+  }
+#endif
+
+  debug0(
+	 for (i = 0; i < nentries_save; i++) {
+	   printf("%u\n",positions[i]);
+	 }
+	 printf("\n");
+	 )
+
 
+  return positions;
+}
 
+/* Called only by Spanningelt_diagonals */
+/* Note: Result has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */
 Univcoord_T *
 Indexdb_merge_compoundpos (int *nmerged, Compoundpos_T compoundpos, int diagterm) {
   int i;
@@ -1848,6 +1859,415 @@ Indexdb_merge_compoundpos (int *nmerged, Compoundpos_T compoundpos, int diagterm
   }
 }
 
+#elif defined(USE_REGISTER)
+
+#define KEY_MASK (~0U << 2)
+
+/* Without diagterm */
+static Univcoord_T *
+merge_via_register (int *nmerged, unsigned int **positions, int *npositions) {
+  Univcoord_T *results, *ptr;
+  int ptrs[4];
+  unsigned int diagonal;
+  __m128i queue, next, max, cmp;
+  int cmpflags;
+  unsigned int streami;
+  int j, i;
+
+  __m128i shuffle_control[4];
+  unsigned int sorter[4], curr;
+
+  /* Initialize shuffle_control */
+  shuffle_control[0] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4, 0x3,0x2,0x1,0x0);
+  shuffle_control[1] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x3,0x2,0x1,0x0, 0x7,0x6,0x5,0x4);
+  shuffle_control[2] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0x3,0x2,0x1,0x0, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4);
+  shuffle_control[3] = _mm_set_epi8(0x3,0x2,0x1,0x0, 0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4);
+
+
+  debug(printf("merge_compoundpos, sizes:"));
+
+  *nmerged = 0;
+  *nmerged += npositions[0];
+  *nmerged += npositions[1];
+  *nmerged += npositions[2];
+  *nmerged += npositions[3];
+
+  if (*nmerged == 0) {
+    return (unsigned int *) NULL;
+  } else {
+    ptr = results = (unsigned int *) MALLOC_ALIGN((*nmerged) * sizeof(unsigned int));
+  }
+    
+  /* Initialize queue with top of each stream (plus streami).  Use an insertion sort. */
+  memset(ptrs,0,4*sizeof(int));
+  if (ptrs[0] >= npositions[0]) {
+    sorter[0] = -1U;
+  } else {
+    sorter[0] = (positions[0][0] & KEY_MASK) + 0;
+  }
+
+  if (ptrs[1] >= npositions[1]) {
+    sorter[1] = -1U;
+  } else {
+    sorter[1] = (positions[1][0] & KEY_MASK) + 1;
+  }
+
+  if (ptrs[2] >= npositions[2]) {
+    sorter[2] = -1U;
+  } else {
+    sorter[2] = (positions[2][0] & KEY_MASK) + 2;
+  }
+
+  if (ptrs[3] >= npositions[3]) {
+    sorter[3] = -1U;
+  } else {
+    sorter[3] = (positions[3][0] & KEY_MASK) + 3;
+  }
+
+
+  for (j = 1; j < 4; j++) {
+    curr = sorter[j];
+    i = j - 1;
+    while (i >= 0 && sorter[i] > curr) {
+      sorter[i+1] = sorter[i];
+      i--;
+    }
+    sorter[i+1] = curr;
+  }
+
+  queue = _mm_setr_epi32(sorter[0],sorter[1],sorter[2],sorter[3]);
+
+  while ((diagonal = _mm_extract_epi32(queue,0)) < -1U) {
+    /* Get the stream from the coded diagonal */
+    streami = diagonal & ~KEY_MASK;
+    
+    /* Write the true diagonal from that stream */
+    *ptr++ = positions[streami][ptrs[streami]++] /*+ diagterm*/;
+    
+    /* Obtain next value from that stream and encode */
+    if (ptrs[streami] >= npositions[streami]) {
+      diagonal = -1U;
+    } else {
+      diagonal = (positions[streami][ptrs[streami]] & KEY_MASK) + streami;
+    }
+
+    /* Determine where to insert into queue */
+    next = _mm_set1_epi32(diagonal);
+    max = _mm_max_epu32(next,queue);
+    cmp = _mm_cmpeq_epi32(max,next);
+    cmpflags = _mm_movemask_epi8(cmp);
+
+    /* Update queue */
+    queue = _mm_insert_epi32(queue,diagonal,0);
+    queue = _mm_shuffle_epi8(queue,shuffle_control[7 - __builtin_clz(cmpflags)/4]);
+  }
+
+  return results;
+}
+
+static Univcoord_T *
+merge_via_register_diagterm (int *nmerged, unsigned int **positions, int *npositions, int diagterm) {
+  Univcoord_T *results, *ptr;
+  int ptrs[4];
+  unsigned int diagonal;
+  __m128i queue, next, max, cmp;
+  int cmpflags;
+  unsigned int streami;
+  int j, i;
+
+  __m128i shuffle_control[4];
+  unsigned int sorter[4], curr;
+
+  /* Initialize shuffle_control */
+  shuffle_control[0] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4, 0x3,0x2,0x1,0x0);
+  shuffle_control[1] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x3,0x2,0x1,0x0, 0x7,0x6,0x5,0x4);
+  shuffle_control[2] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0x3,0x2,0x1,0x0, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4);
+  shuffle_control[3] = _mm_set_epi8(0x3,0x2,0x1,0x0, 0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4);
+
+
+  debug(printf("merge_compoundpos, sizes:"));
+
+  *nmerged = 0;
+  *nmerged += npositions[0];
+  *nmerged += npositions[1];
+  *nmerged += npositions[2];
+  *nmerged += npositions[3];
+
+  if (*nmerged == 0) {
+    results = (unsigned int *) MALLOC_ALIGN(sizeof(unsigned int));
+  } else {
+    ptr = results = (unsigned int *) MALLOC_ALIGN((*nmerged) * sizeof(unsigned int));
+  }
+    
+  /* Initialize queue with top of each stream (plus streami).  Use an insertion sort. */
+  memset(ptrs,0,4*sizeof(int));
+  if (ptrs[0] >= npositions[0]) {
+    sorter[0] = -1U;
+  } else {
+    sorter[0] = (positions[0][0] & KEY_MASK) + 0;
+  }
+
+  if (ptrs[1] >= npositions[1]) {
+    sorter[1] = -1U;
+  } else {
+    sorter[1] = (positions[1][0] & KEY_MASK) + 1;
+  }
+
+  if (ptrs[2] >= npositions[2]) {
+    sorter[2] = -1U;
+  } else {
+    sorter[2] = (positions[2][0] & KEY_MASK) + 2;
+  }
+
+  if (ptrs[3] >= npositions[3]) {
+    sorter[3] = -1U;
+  } else {
+    sorter[3] = (positions[3][0] & KEY_MASK) + 3;
+  }
+
+
+  for (j = 1; j < 4; j++) {
+    curr = sorter[j];
+    i = j - 1;
+    while (i >= 0 && sorter[i] > curr) {
+      sorter[i+1] = sorter[i];
+      i--;
+    }
+    sorter[i+1] = curr;
+  }
+
+  queue = _mm_setr_epi32(sorter[0],sorter[1],sorter[2],sorter[3]);
+
+  while ((diagonal = _mm_extract_epi32(queue,0)) < -1U) {
+    /* Get the stream from the coded diagonal */
+    streami = diagonal & ~KEY_MASK;
+    
+    /* Write the true diagonal from that stream */
+    *ptr++ = positions[streami][ptrs[streami]++] + diagterm;
+    
+    /* Obtain next value from that stream and encode */
+    if (ptrs[streami] >= npositions[streami]) {
+      diagonal = -1U;
+    } else {
+      diagonal = (positions[streami][ptrs[streami]] & KEY_MASK) + streami;
+    }
+
+    /* Determine where to insert into queue */
+    next = _mm_set1_epi32(diagonal);
+    max = _mm_max_epu32(next,queue);
+    cmp = _mm_cmpeq_epi32(max,next);
+    cmpflags = _mm_movemask_epi8(cmp);
+
+    /* Update queue */
+    queue = _mm_insert_epi32(queue,diagonal,0);
+    queue = _mm_shuffle_epi8(queue,shuffle_control[7 - __builtin_clz(cmpflags)/4]);
+  }
+
+  return results;
+}
+
+
+/* Called only by Spanningelt_diagonals */
+/* SIMD register version (eventually need to pad just 1) */
+/* compoundpos->positions set by Indexdb_read_inplace, so we have to allocate */
+Univcoord_T *
+Indexdb_merge_compoundpos (int *nmerged, Compoundpos_T compoundpos, int diagterm) {
+  Univcoord_T *results, curr;
+  Univcoord_T *part[4];
+  int npart[4];
+  int j, i;
+
+  debug(printf("merge_compoundpos, sizes:"));
+
+  if (compoundpos->n == 4) {
+    results = merge_via_register_diagterm(&(*nmerged),&(compoundpos->positions[0]),&(compoundpos->npositions[0]),diagterm);
+
+  } else {
+    part[0] = merge_via_register(&(npart[0]),&(compoundpos->positions[0]),&(compoundpos->npositions[0]));
+    part[1] = merge_via_register(&(npart[1]),&(compoundpos->positions[4]),&(compoundpos->npositions[4]));
+    part[2] = merge_via_register(&(npart[2]),&(compoundpos->positions[8]),&(compoundpos->npositions[8]));
+    part[3] = merge_via_register(&(npart[3]),&(compoundpos->positions[12]),&(compoundpos->npositions[12]));
+
+    results = merge_via_register_diagterm(&(*nmerged),&(part[0]),&(npart[0]),diagterm);
+
+    FREE(part[3]);
+    FREE(part[2]);
+    FREE(part[1]);
+    FREE(part[0]);
+  }
+
+  /* Final insertion sort to correct for truncation of keys */
+  for (j = 1; j < *nmerged; j++) {
+    curr = results[j];
+    i = j - 1;
+    /* For a stable merge sort, is the second condition possible? */
+    while (i >= 0 && results[i] > curr) {
+      results[i+1] = results[i];
+      i--;
+    }
+    results[i+1] = curr;
+  }
+
+  return results;
+}
+
+#else
+/* SIMD merge version */
+
+#define LEFT(i) (i << 1)
+#define RIGHT(i) ((i << 1) | 1)
+
+Univcoord_T *
+Indexdb_merge_compoundpos (int *nmerged, Compoundpos_T compoundpos, int diagterm) {
+  Univcoord_T *results;
+  int i, heapi, lefti, righti, k;
+  unsigned int *heap[32];
+  int nelts[32];
+  int nalloc, npadded;
+  UINT4 *prev_storage, *curr_storage;
+
+  debug(printf("merge_compoundpos, sizes:"));
+
+  if (compoundpos->n == 4) {
+    nelts[7] = compoundpos->npositions[3];
+    nelts[6] = compoundpos->npositions[2];
+    nelts[5] = compoundpos->npositions[1];
+    nelts[4] = compoundpos->npositions[0];
+    debug(printf(" %d %d %d %d\n",nelts[4],nelts[5],nelts[6],nelts[7]));
+
+    nelts[3] = nelts[6] + nelts[7];
+    nelts[2] = nelts[4] + nelts[5];
+
+    npadded = PAD_UINT4(nelts[2]) + PAD_UINT4(nelts[3]) + PAD_UINT4(nelts[4]) + PAD_UINT4(nelts[5]) + PAD_UINT4(nelts[6]) + PAD_UINT4(nelts[7]);
+    prev_storage = (UINT4 *) MALLOC_ALIGN(npadded * sizeof(UINT4));
+    nalloc = 0;
+    heap[2] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[2]);
+    heap[3] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[3]);
+    heap[4] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[4]);
+    heap[5] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[5]);
+    heap[6] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[6]);
+    heap[7] = &(prev_storage[nalloc]);
+
+    /* Merge_uint4 is destructive, but we are copying compoundpos->positions */
+    memcpy(heap[4],compoundpos->positions[0],nelts[4]*sizeof(UINT4));
+    memcpy(heap[5],compoundpos->positions[1],nelts[5]*sizeof(UINT4));
+    Merge_uint4(/*dest*/heap[2],heap[4],heap[5],nelts[4],nelts[5]);
+
+    memcpy(heap[6],compoundpos->positions[2],nelts[6]*sizeof(UINT4));
+    memcpy(heap[7],compoundpos->positions[3],nelts[7]*sizeof(UINT4));
+    Merge_uint4(/*dest*/heap[3],heap[6],heap[7],nelts[6],nelts[7]);
+
+    heap[1] = Merge_uint4(/*dest*/NULL,heap[2],heap[3],nelts[2],nelts[3]);
+    *nmerged = nelts[2] + nelts[3];
+
+#if defined(HAVE_SSE4_1)
+    /* Spanningelt procedure is not prepared for memory from _mm_malloc */
+    if (*nmerged == 0) {
+      results = (unsigned int *) NULL;
+    } else {
+      results = (unsigned int *) MALLOC_ALIGN((*nmerged) * sizeof(unsigned int));
+      memcpy(results,heap[1],(*nmerged) * sizeof(unsigned int));
+    }
+
+    _mm_free(heap[1]);
+    /* _mm_free(prev_storage); */
+#else
+    results = heap[1];
+    /* FREE(prev_storage); */
+#endif
+    FREE_ALIGN(prev_storage);
+
+
+    for (i = 0; i < *nmerged; i++) {
+      results[i] += diagterm;
+    }
+
+    CHECK_ALIGN(results);
+    return results;
+
+  } else {
+    npadded = 0;
+    for (heapi = 16; heapi < 32; heapi++) {
+      nelts[heapi] = compoundpos->npositions[heapi-16];
+      npadded += PAD_UINT4(nelts[heapi]);
+    }
+
+    /* Merge_uint4 is destructive, but we are copying compoundpos->positions */
+    prev_storage = (UINT4 *) MALLOC_ALIGN(npadded * sizeof(UINT4));
+    nalloc = 0;
+    for (heapi = 16; heapi < 32; heapi++) {
+      heap[heapi] = &(prev_storage[nalloc]);
+      memcpy(heap[heapi],compoundpos->positions[heapi-16],nelts[heapi]*sizeof(UINT4));
+      nalloc += PAD_UINT4(nelts[heapi]);
+    }
+
+    debug(printf(" %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n",
+		 nelts[16],nelts[17],nelts[18],nelts[19],
+		 nelts[20],nelts[21],nelts[22],nelts[23],
+		 nelts[24],nelts[25],nelts[26],nelts[27],
+		 nelts[28],nelts[29],nelts[30],nelts[31]));
+    debug(printf("npadded = %d\n",npadded));
+
+
+    curr_storage = (UINT4 *) MALLOC_ALIGN(npadded * sizeof(UINT4));
+    nalloc = 0;
+    for (heapi = 8; heapi < 16; heapi++) {
+      heap[heapi] = &(curr_storage[nalloc]);
+      lefti = LEFT(heapi);
+      righti = RIGHT(heapi);
+      Merge_uint4(/*dest*/heap[heapi],heap[lefti],heap[righti],nelts[lefti],nelts[righti]);
+      nelts[heapi] = nelts[lefti] + nelts[righti];
+      nalloc += PAD_UINT4(nelts[heapi]);
+    }
+
+    nalloc = 0;
+    for (heapi = 4; heapi < 8; heapi++) {
+      heap[heapi] = &(prev_storage[nalloc]);
+      lefti = LEFT(heapi);
+      righti = RIGHT(heapi);
+      Merge_uint4(/*dest*/heap[heapi],heap[lefti],heap[righti],nelts[lefti],nelts[righti]);
+      nelts[heapi] = nelts[lefti] + nelts[righti];
+      nalloc += PAD_UINT4(nelts[heapi]);
+    }
+
+    heap[2] = &(curr_storage[0]);
+    Merge_uint4(/*dest*/heap[2],heap[4],heap[5],nelts[4],nelts[5]);
+    nelts[2] = nelts[4] + nelts[5];
+    heap[3] = &(curr_storage[PAD_UINT4(nelts[2])]);
+    Merge_uint4(/*dest*/heap[3],heap[6],heap[7],nelts[6],nelts[7]);
+    nelts[3] = nelts[6] + nelts[7];
+
+    heap[1] = &(prev_storage[0]);
+    Merge_uint4(/*dest*/heap[1],heap[2],heap[3],nelts[2],nelts[3]);
+    *nmerged = nelts[2] + nelts[3];
+
+#if defined(HAVE_SSE4_1)
+    /* Spanningelt procedure is not prepared for memory from _mm_malloc */
+    if (*nmerged == 0) {
+      results = (unsigned int *) NULL;
+    } else {
+      results = (unsigned int *) MALLOC_ALIGN((*nmerged) * sizeof(unsigned int));
+      memcpy(results,heap[1],(*nmerged) * sizeof(unsigned int));
+    }
+
+    _mm_free(prev_storage);
+    /* _mm_free(curr_storage); */
+#else
+    results = heap[1];
+    /* FREE(curr_storage); */
+#endif
+    FREE_ALIGN(curr_storage);
+
+    for (k = 0; k < *nmerged; k++) {
+      results[k] += diagterm;
+    }
+
+    CHECK_ALIGN(results);
+    return results;
+  }
+}
+
+#endif
 
 
 /* Should be the same as count_one_shift(this,oligo,1) */
diff --git a/src/interval.c b/src/interval.c
index 741aacd..9af14fc 100644
--- a/src/interval.c
+++ b/src/interval.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: interval.c 135351 2014-05-07 15:56:14Z twu $";
+static char rcsid[] = "$Id: interval.c 207855 2017-06-29 20:34:17Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -143,7 +143,35 @@ Interval_overlap_p (Chrpos_T x, Chrpos_T y, struct T *intervals, int index) {
   }
 }
 
+#if 0
+/* Was previously called Interval_contained_p */
+/* Have to subtract 1 because intervals array is zero-based */
+bool
+Interval_contains_region_p (unsigned int x, unsigned int y, struct T *intervals, int index) {
+  unsigned int low = intervals[index-1].low;
+  unsigned int high = intervals[index-1].high;
 
+  /* interval contains region */
+  if (low <= x && y <= high) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
+bool
+Interval_contained_by_region_p (unsigned int x, unsigned int y, struct T *intervals, int index) {
+  unsigned int low = intervals[index-1].low;
+  unsigned int high = intervals[index-1].high;
+
+  /* region contains interval */
+  if (x <= low && high <= y) {
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif
 
 
 /************************************************************************/
@@ -193,14 +221,46 @@ omega_compare (const void *i, const void *j) {
 /* These routines sort table[i..j] in place.  Assume that
    current_intervals has been set. */
 void
-Interval_qsort_by_sigma (int *table, int i, int j, struct T *intervals) {
+Interval_qsort_by_sigma (int *table, int i, int j, struct T *intervals,
+			 bool presortedp) {
+  int k;
+  bool sortedp = true;
+
+  if (presortedp == true) {
+    for (k = i + 1; sortedp == true && k < j; k++) {
+      if (intervals[k-1].low > intervals[k].low) {
+	fprintf(stderr,"Intervals are not sorted by sigma\n");
+	sortedp = false;
+      }
+    }
+    if (sortedp == true) {
+      return;
+    }
+  }
+
   current_intervals = intervals;
   qsort(&(table[i]), j - i + 1, sizeof(int), sigma_compare);
   return;
 }
 
 void
-Interval_qsort_by_omega (int *table, int i, int j, struct T *intervals) {
+Interval_qsort_by_omega (int *table, int i, int j, struct T *intervals,
+			 bool presortedp) {
+  int k;
+  bool sortedp = true;
+
+  if (presortedp == true) {
+    for (k = i + 1; sortedp == true && k < j; k++) {
+      if (intervals[k-1].high > intervals[k].high) {
+	fprintf(stderr,"Intervals are not sorted by omega\n");
+	sortedp = false;
+      }
+    }
+    if (sortedp == true) {
+      return;
+    }
+  }
+
   current_intervals = intervals;
   qsort(&(table[i]), j - i + 1, sizeof(int), omega_compare);
   return;
diff --git a/src/interval.h b/src/interval.h
index 8e0bede..b6a1fe9 100644
--- a/src/interval.h
+++ b/src/interval.h
@@ -1,4 +1,4 @@
-/* $Id: interval.h 157221 2015-01-22 18:38:57Z twu $ */
+/* $Id: interval.h 207855 2017-06-29 20:34:17Z twu $ */
 #ifndef INTERVAL_INCLUDED
 #define INTERVAL_INCLUDED
 
@@ -50,9 +50,11 @@ extern bool
 Interval_overlap_p (Chrpos_T x, Chrpos_T y, struct T *intervals, int index);
 
 extern void
-Interval_qsort_by_sigma (int *table, int i, int j, struct T *intervals);
+Interval_qsort_by_sigma (int *table, int i, int j, struct T *intervals,
+			 bool presortedp);
 extern void
-Interval_qsort_by_omega (int *table, int i, int j, struct T *intervals);
+Interval_qsort_by_omega (int *table, int i, int j, struct T *intervals,
+			 bool presortedp);
 
 extern int
 Interval_cmp (const void *a, const void *b);
diff --git a/src/intlist.c b/src/intlist.c
index e3c040c..605d455 100644
--- a/src/intlist.c
+++ b/src/intlist.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: intlist.c 193875 2016-07-12 02:43:38Z twu $";
+static char rcsid[] = "$Id: intlist.c 207385 2017-06-15 20:59:42Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -281,7 +281,26 @@ Intlist_to_char_array (int *n, T list) {
   if (*n == 0) {
     return NULL;
   } else {
-    array = (char *) CALLOC(*n + 1,sizeof(char));
+    array = (char *) MALLOC((*n + 1)*sizeof(char));
+    for (i = 0; i < *n; i++) {
+      array[i] = (char) list->first;
+      list = list->rest;
+    }
+    array[*n] = '\0';
+    return array;
+  }
+}
+
+char *
+Intlist_to_char_array_in (int *n, T list) {
+  char *array;
+  int i;
+
+  *n = Intlist_length(list);
+  if (*n == 0) {
+    return NULL;
+  } else {
+    array = (char *) MALLOC_IN((*n + 1)*sizeof(char));
     for (i = 0; i < *n; i++) {
       array[i] = (char) list->first;
       list = list->rest;
diff --git a/src/intlist.h b/src/intlist.h
index 537c32e..2edc5f7 100644
--- a/src/intlist.h
+++ b/src/intlist.h
@@ -1,4 +1,4 @@
-/* $Id: intlist.h 193875 2016-07-12 02:43:38Z twu $ */
+/* $Id: intlist.h 207385 2017-06-15 20:59:42Z twu $ */
 #ifndef INTLIST_INCLUDED
 #define INTLIST_INCLUDED
 
@@ -51,6 +51,8 @@ extern int *
 Intlist_to_array_out (int *n, T list);
 extern char *
 Intlist_to_char_array (int *n, T list);
+extern char *
+Intlist_to_char_array_in (int *n, T list);
 extern T
 Intlist_from_array (int *array, int n);
 extern T 
diff --git a/src/intron.c b/src/intron.c
index 4fbbf95..4c0433d 100644
--- a/src/intron.c
+++ b/src/intron.c
@@ -1,9 +1,10 @@
-static char rcsid[] = "$Id: intron.c 99737 2013-06-27 19:33:03Z twu $";
+static char rcsid[] = "$Id: intron.c 204388 2017-03-18 00:03:34Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include "intron.h"
+#include "sense.h"
 #include <stdlib.h>		/* For abort() */
 
 
@@ -167,13 +168,51 @@ Intron_type (char left1, char left2, char right2, char right1,
       return introntype;
     }
   } else {
-    /* Should happen only from Stage3_merge_local_splice */
+    /* Should happen only from Stage3_merge_local_splice and Indel_resolve_middle_deletion */
     /* return NONINTRON; */
     return introntype;		/* Needed for guess */
   }
 }
 
 
+int
+Intron_sensedir (int introntype) {
+  switch (introntype) {
+  case GTAG_FWD: case GCAG_FWD: case ATAC_FWD: return SENSE_FORWARD;
+#ifndef PMAP
+  case GTAG_REV: case GCAG_REV: case ATAC_REV: return SENSE_ANTI;
+#endif
+  default: return SENSE_NULL;
+  }
+}    
+
+int
+Intron_canonical_sensedir (int introntype) {
+  switch (introntype) {
+  case GTAG_FWD: case GCAG_FWD: return SENSE_FORWARD;
+#ifndef PMAP
+  case GTAG_REV: case GCAG_REV: return SENSE_ANTI;
+#endif
+  default: return SENSE_NULL;
+  }
+}    
+
+
+int
+Intron_level (int introntype) {
+  switch (introntype) {
+  case GTAG_FWD: return 3;
+  case GCAG_FWD: return 2;
+  case ATAC_FWD: return 1;
+#ifndef PMAP
+  case GTAG_REV: return 3;
+  case GCAG_REV: return 2;
+  case ATAC_REV: return 1;
+#endif
+  default: return 0;
+  }
+}    
+
 char *
 Intron_type_string (int introntype) {
   switch (introntype) {
diff --git a/src/intron.h b/src/intron.h
index 24b4541..4ee3817 100644
--- a/src/intron.h
+++ b/src/intron.h
@@ -1,4 +1,4 @@
-/* $Id: intron.h 157221 2015-01-22 18:38:57Z twu $ */
+/* $Id: intron.h 204388 2017-03-18 00:03:34Z twu $ */
 #ifndef INTRON_INCLUDED
 #define INTRON_INCLUDED
 
@@ -46,6 +46,14 @@ Intron_type (char left1, char left2, char right2, char right1,
 	     Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
 #endif
 );
+
+extern int
+Intron_sensedir (int introntype);
+extern int
+Intron_canonical_sensedir (int introntype);
+extern int
+Intron_level (int introntype);
+
 extern char *
 Intron_type_string (int introntype);
 
diff --git a/src/junction.c b/src/junction.c
index 5c1e3c1..042cc11 100644
--- a/src/junction.c
+++ b/src/junction.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: junction.c 183888 2016-02-05 20:33:29Z twu $";
+static char rcsid[] = "$Id: junction.c 204389 2017-03-18 00:04:11Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -212,13 +212,16 @@ char *
 Junction_deletion_string (T this, Genome_T genome, bool plusp) {
   char *deletion_string;
   
-  deletion_string = (char *) MALLOC((this->nindels+1)*sizeof(char));
+  /* printf("Entered Junction_deletion_string with plusp %d\n",plusp); */
   /* printf("deletionpos = %u\n",this->deletionpos); */
+
+  deletion_string = (char *) MALLOC((this->nindels+1)*sizeof(char));
   Genome_fill_buffer_simple(genome,this->deletionpos,this->nindels,deletion_string);
   if (plusp == false) {
     make_complement_inplace(deletion_string,this->nindels);
   }
 
+  /* printf("string = %s\n",deletion_string); */
   return deletion_string;
 }
 
diff --git a/src/list.c b/src/list.c
index 02e7eb7..0f97f17 100644
--- a/src/list.c
+++ b/src/list.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: list.c 166641 2015-05-29 21:13:04Z twu $";
+static char rcsid[] = "$Id: list.c 207386 2017-06-15 21:00:14Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -170,7 +170,7 @@ List_to_array (T list, void *end) {
     return (void *) NULL;
   } else {
 #endif
-    array = (void **) CALLOC((n+1),sizeof(*array));
+    array = (void **) MALLOC((n+1)*sizeof(*array));
     for (i = 0; i < n; i++) {
       array[i] = list->first;
       list = list->rest;
@@ -236,7 +236,7 @@ List_to_array_out (T list, void *end) {
     return (void *) NULL;
   } else {
 #endif
-    array = (void **) CALLOC_OUT((n+1),sizeof(*array));
+    array = (void **) MALLOC_OUT((n+1)*sizeof(*array));
     for (i = 0; i < n; i++) {
       array[i] = list->first;
       list = list->rest;
@@ -257,7 +257,25 @@ List_to_array_n (int *n, T list) {
   if (*n == 0) {
     return NULL;
   } else {
-    array = (void **) CALLOC(*n,sizeof(*array));
+    array = (void **) MALLOC((*n)*sizeof(*array));
+    for (i = 0; i < *n; i++) {
+      array[i] = list->first;
+      list = list->rest;
+    }
+    return array;
+  }
+}
+
+void **
+List_to_array_out_n (int *n, T list) {
+  void **array;
+  int i;
+
+  *n = List_length(list);
+  if (*n == 0) {
+    return NULL;
+  } else {
+    array = (void **) MALLOC((*n)*sizeof(*array));
     for (i = 0; i < *n; i++) {
       array[i] = list->first;
       list = list->rest;
@@ -386,8 +404,9 @@ List_from_string (char *string) {
   while (*++scout != '\0') {
     if (*scout == ',') {
       substringlen = (scout-p)/sizeof(char);
-      substring = (char *) CALLOC(substringlen+1,sizeof(char));
+      substring = (char *) MALLOC((substringlen+1)*sizeof(char));
       strncpy(substring,p,substringlen);
+      substring[substringlen] = '\0';
       this = List_push(this,substring);
       scout++;
       p = scout;
@@ -395,8 +414,9 @@ List_from_string (char *string) {
   }
 
   substringlen = (scout-p)/sizeof(char);
-  substring = (char *) CALLOC(substringlen+1,sizeof(char));
+  substring = (char *) MALLOC((substringlen+1)*sizeof(char));
   strncpy(substring,p,substringlen);
+  substring[substringlen] = '\0';
   this = List_push(this,substring);
 
   return List_reverse(this);
diff --git a/src/list.h b/src/list.h
index 7bd93ca..b0f645c 100644
--- a/src/list.h
+++ b/src/list.h
@@ -1,4 +1,4 @@
-/* $Id: list.h 166641 2015-05-29 21:13:04Z twu $ */
+/* $Id: list.h 207386 2017-06-15 21:00:14Z twu $ */
 #ifndef LIST_INCLUDED
 #define LIST_INCLUDED
 
@@ -27,6 +27,7 @@ extern void List_fill_array_and_free (void **array, T *list);
 extern T List_fill_array_with_handle (struct T *new, void **array, int nelts);
 extern void **List_to_array_out (T list, void *end);
 extern void **List_to_array_n (int *n, T list);
+extern void **List_to_array_out_n (int *n, T list);
 extern T List_copy (T list);
 extern void
 List_dump (T list);
diff --git a/src/littleendian.h b/src/littleendian.h
index b28d150..7067645 100644
--- a/src/littleendian.h
+++ b/src/littleendian.h
@@ -1,4 +1,4 @@
-/* $Id: littleendian.h 157223 2015-01-22 18:43:01Z twu $ */
+/* $Id: littleendian.h 207318 2017-06-14 19:32:55Z twu $ */
 #ifndef LITTLEENDIAN_INCLUDED
 #define LITTLEENDIAN_INCLUDED
 #ifdef HAVE_CONFIG_H
@@ -21,6 +21,7 @@ Littleendian_write_uint8 (UINT8 value, int fd);
 #define FREAD_UINT(p,fp) fread(p,sizeof(UINT4),1,fp)
 #define FREAD_INTS(a,n,fp) fread(a,sizeof(int),n,fp)
 #define FREAD_UINTS(a,n,fp) fread(a,sizeof(UINT4),n,fp)
+#define FREAD_FLOATS(a,n,fp) fread(a,sizeof(float),n,fp)
 #ifdef HAVE_64_BIT
 #define FREAD_UINT8(p,fp) fread(p,sizeof(UINT8),1,fp)
 #define FREAD_UINT8S(a,n,fp) fread(a,sizeof(UINT8),n,fp)
@@ -36,6 +37,8 @@ Littleendian_write_uint8 (UINT8 value, int fd);
 #define WRITE_UINT8_AS_UINT(x,fd) Littleendian_write_uint8_as_uint(x,fd)
 #define FWRITE_INTS(a,n,fp) fwrite(a,sizeof(int),n,fp)
 #define FWRITE_UINTS(a,n,fp) fwrite(a,sizeof(UINT4),n,fp)
+#define FWRITE_FLOATS(a,n,fp) fwrite(a,sizeof(float),n,fp)
+
 #ifdef HAVE_64_BIT
 #define FWRITE_UINT8(x,fp) fwrite(&(x),sizeof(UINT8),1,fp)
 #define FWRITE_UINT8S(a,n,fp) fwrite(a,sizeof(UINT8),n,fp)
diff --git a/src/maxent_hr.c b/src/maxent_hr.c
index ca0a1e6..e540073 100644
--- a/src/maxent_hr.c
+++ b/src/maxent_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: maxent_hr.c 99737 2013-06-27 19:33:03Z twu $";
+static char rcsid[] = "$Id: maxent_hr.c 204392 2017-03-18 00:06:18Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -27230,6 +27230,8 @@ Maxent_hr_donor_prob (Univcoord_T splice_pos, Univcoord_T chroffset) {
 #endif
   
 
+  debug(printf("Maxent_hr_donor_prob called with %lu, chroffet %lu\n",splice_pos,chroffset));
+
   if (splice_pos < chroffset + DONOR_MODEL_LEFT_MARGIN) {
     return 0.0;
   } else {
@@ -27297,6 +27299,8 @@ Maxent_hr_acceptor_prob (Univcoord_T splice_pos, Univcoord_T chroffset) {
   char g_alt;
 #endif
 
+  debug(printf("Maxent_hr_acceptor_prob called with %lu, chroffet %lu\n",splice_pos,chroffset));
+
   if (splice_pos < chroffset + ACCEPTOR_MODEL_LEFT_MARGIN) {
     return 0.0;
   } else {
@@ -27368,6 +27372,8 @@ Maxent_hr_antidonor_prob (Univcoord_T splice_pos, Univcoord_T chroffset) {
   char g_alt;
 #endif
 
+  debug(printf("Maxent_hr_antidonor_prob called with %lu, chroffet %lu\n",splice_pos,chroffset));
+
   if (splice_pos < chroffset + DONOR_MODEL_RIGHT_MARGIN) {
     return 0.0;
   } else {
@@ -27434,6 +27440,8 @@ Maxent_hr_antiacceptor_prob (Univcoord_T splice_pos, Univcoord_T chroffset) {
   char g_alt;
 #endif
 
+  debug(printf("Maxent_hr_antiacceptor_prob called with %lu, chroffet %lu\n",splice_pos,chroffset));
+
   if (splice_pos < chroffset + ACCEPTOR_MODEL_RIGHT_MARGIN) {
     return 0.0;
   } else {
diff --git a/src/mem.h b/src/mem.h
index e607a5b..055d012 100644
--- a/src/mem.h
+++ b/src/mem.h
@@ -1,4 +1,4 @@
-/* $Id: mem.h 157223 2015-01-22 18:43:01Z twu $ */
+/* $Id: mem.h 202588 2017-01-13 23:28:23Z twu $ */
 #ifndef MEM_INCLUDED
 #define MEM_INCLUDED
 #ifdef HAVE_CONFIG_H
@@ -16,6 +16,11 @@
 #include <alloca.h>
 #endif
 
+#ifdef HAVE_SSE4_1
+/* For SIMD merge, which requires SSE4.1 as a minimum */
+#include "xmmintrin.h"		/* For MALLOC_ALIGN */
+#endif
+
 
 #define MAX_QUERYLENGTH_STACK 10000
 
@@ -86,6 +91,7 @@ extern void *Mem_resize (void *ptr, size_t nbytes,
 #define MTRAP(location) Mem_trap_start((location), __FILE__, __LINE__)
 #define MCHECK() Mem_trap_check(__FILE__, __LINE__)
 
+
 #define MALLOC(nbytes) Mem_alloc((nbytes), __FILE__, __LINE__)
 #define CALLOC(count, nbytes) Mem_calloc((count), (nbytes), __FILE__, __LINE__)
 #define FREE(ptr) ((void)(Mem_free((ptr),__FILE__, __LINE__), (ptr) = 0))
@@ -111,6 +117,24 @@ extern void *Mem_resize (void *ptr, size_t nbytes,
 #endif
 
 
+#ifdef HAVE_AVX512
+#define MALLOC_ALIGN(x) _mm_malloc(x,64)
+#define FREE_ALIGN(x) _mm_free(x)
+#define CHECK_ALIGN(x) assert((unsigned long) x % 64 == 0)
+#elif defined(HAVE_AVX2)
+#define MALLOC_ALIGN(x) _mm_malloc(x,32)
+#define FREE_ALIGN(x) _mm_free(x)
+#define CHECK_ALIGN(x) assert((unsigned long) x % 32 == 0)
+#elif defined(HAVE_SSE4_1)
+#define MALLOC_ALIGN(x) _mm_malloc(x,16)
+#define FREE_ALIGN(x) _mm_free(x)
+#define CHECK_ALIGN(x) assert((unsigned long) x % 16 == 0)
+#else
+#define MALLOC_ALIGN(x) MALLOC(x)
+#define FREE_ALIGN(x) FREE(x)
+#define CHECK_ALIGN(x)
+#endif
+
 
 #ifdef MEMUSAGE
 #define MALLOC_KEEP(nbytes) Mem_alloc_keep((nbytes), __FILE__, __LINE__)
diff --git a/src/merge-heap.c b/src/merge-heap.c
new file mode 100644
index 0000000..d8dabfa
--- /dev/null
+++ b/src/merge-heap.c
@@ -0,0 +1,400 @@
+static char rcsid[] = "$Id: merge-heap.c 201745 2016-12-16 16:51:24Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "merge-heap.h"
+#include "assert.h"
+#include "mem.h"
+#include "popcount.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define PYRAMID_SIZE 32
+#define KEY_MASK (~0U << 5)
+
+
+#ifdef DEBUG
+#define debug(x) x
+#else
+#define debug(x)
+#endif
+
+#ifdef DEBUG0
+#define debug0(x) x
+#else
+#define debug0(x)
+#endif
+
+#ifdef DEBUG6
+#define debug6(x) x
+#else
+#define debug6(x)
+#endif
+
+
+#define PARENT(i) (i >> 1)
+#define LEFT(i) (i << 1)
+#define RIGHT(i) ((i << 1) | 1)
+
+static void
+min_heap_insert (UINT4 *heap, int *heapsize, UINT4 diagonal) {
+  int i;
+
+  i = ++(*heapsize);
+  while (i > 1 && (heap[PARENT(i)] > diagonal)) {
+    heap[i] = heap[PARENT(i)];
+    i = PARENT(i);
+  }
+  heap[i] = diagonal;
+
+  return;
+}
+
+
+/* Provide ancestori as inserti */
+static void
+heapify (unsigned int *heap, unsigned int diagonal, int merge_heap_size) {
+  int inserti, smallesti, righti;
+  int i;
+
+  debug6(printf("Starting heapify with %llu\n",(unsigned long long) diagonal));
+#ifdef DEBUG6
+  for (i = 1; i <= 2*merge_heap_size + 1; i++) {
+    printf("%d %u\n",i,heap[i]);
+  }
+  printf("\n");
+#endif
+
+  inserti = 1;
+  smallesti = (heap[3] < heap[2]) ? 3 : 2;
+  debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+		2,3,(unsigned long long) heap[2],(unsigned long long)heap[3]));
+  while (diagonal > heap[smallesti]) {
+    heap[inserti] = heap[smallesti];
+    inserti = smallesti;
+    smallesti = LEFT(inserti);
+    righti = smallesti+1;
+    debug6(printf("Comparing left %d/right %d: %llu and %llu\n",
+		  smallesti,righti,(unsigned long long) heap[smallesti],
+		  (unsigned long long) heap[righti]));
+    if (heap[righti] < heap[smallesti]) {
+      smallesti = righti;
+    }
+  }
+  heap[inserti] = diagonal;
+  debug6(printf("Inserting at %d\n\n",inserti));
+  return;
+}
+
+
+static int
+pyramid_merge_full (Record_T **record_heap, unsigned int **key_streams, unsigned int *merge_heap,
+		    int node_start, int ancestori, int merge_heap_size) {
+  int nelts = 0;
+  unsigned int diagonal;
+  int streami, k;
+  int ptrs[PYRAMID_SIZE];
+
+  k = 0;
+  memset(ptrs,0,PYRAMID_SIZE*sizeof(int));
+  while ((diagonal = merge_heap[1]) < -1U) {
+    /* Convert integer to structure */
+    streami = diagonal & ~KEY_MASK;
+    record_heap[ancestori][k++] = record_heap[node_start + streami][ptrs[streami]];
+    debug(printf("Writing %u (stream %d): %u\n",diagonal,streami,record_heap[ancestori][k-1]->diagonal));
+
+    /* Advance pointer and get next value */
+    diagonal = key_streams[streami][++ptrs[streami]];
+    heapify(merge_heap,diagonal,merge_heap_size);
+    nelts += 1;
+  }
+
+  return nelts;
+}
+
+
+static Record_T **
+make_record_heap (int **nelts, List_T stream_list, Intlist_T streamsize_list, 
+		  Intlist_T querypos_list, Intlist_T diagterm_list, int nstreams,
+		  int base, struct Record_T *all_records) {
+  Record_T **record_heap;
+  UINT4 *diagonals;
+  int heapsize, null_pyramid_start, heapi, basei;
+  int querypos, diagterm;
+  int i, k;
+
+  heapsize = 2*nstreams - 1;
+  null_pyramid_start = (heapsize + PYRAMID_SIZE - 1)/PYRAMID_SIZE * PYRAMID_SIZE; /* full or partial pyramid for entries below this */
+
+  /* Add 4 to handle partial pyramid */
+  record_heap = (Record_T **) CALLOC(heapsize + PYRAMID_SIZE,sizeof(Record_T *));
+  *nelts = (int *) CALLOC(heapsize + PYRAMID_SIZE,sizeof(int));
+
+  /* Process as (base - 1) downto nstreams, then heapsize downto base,
+     because stream_list is in reverse order of elts */
+  k = 0;
+  for (heapi = base - 1; heapi >= PARENT(null_pyramid_start); heapi--) {
+    /* Put all information into penultimate row */
+    stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */
+    streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi]));
+    querypos_list = Intlist_pop(querypos_list,&querypos);
+    diagterm_list = Intlist_pop(diagterm_list,&diagterm);
+    record_heap[heapi] = (Record_T *) MALLOC(((*nelts)[heapi]) * sizeof(Record_T));
+    debug(printf("NULL: Assigning node %d with %d elts (%p)",heapi,(*nelts)[heapi],record_heap[heapi]));
+
+    for (i = 0; i < (*nelts)[heapi]; i++) {
+      /* Process in forward order to keep records in order */
+      all_records[k].diagonal = diagonals[i] + diagterm;
+      all_records[k].querypos = querypos;
+      record_heap[heapi][i] = &(all_records[k]);
+      debug(printf(" %u+%d",diagonals[i],querypos));
+      k++;
+    }
+    debug(printf("\n"));
+  }
+    
+  for ( ; heapi >= nstreams; heapi--) {
+    /* Move all information down to left child */
+    basei = LEFT(heapi);
+    stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */
+    streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[basei]));
+    querypos_list = Intlist_pop(querypos_list,&querypos);
+    diagterm_list = Intlist_pop(diagterm_list,&diagterm);
+    record_heap[basei] = (Record_T *) MALLOC(((*nelts)[basei]) * sizeof(Record_T));
+    debug(printf("PART: Assigning node %d => %d with %d elts (%p)",heapi,basei,(*nelts)[basei],record_heap[basei]));
+
+    for (i = 0; i < (*nelts)[basei]; i++) {
+      /* Process in forward order to keep records in order */
+      all_records[k].diagonal = diagonals[i] + diagterm;
+      all_records[k].querypos = querypos;
+      record_heap[basei][i] = &(all_records[k]);
+      debug(printf(" %u+%d",diagonals[i],querypos));
+      k++;
+    }
+    debug(printf("\n"));
+  }
+
+  for (heapi = heapsize; heapi >= base; heapi--) {
+    /* Put all information into base row */
+    stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */
+    streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi]));
+    querypos_list = Intlist_pop(querypos_list,&querypos);
+    diagterm_list = Intlist_pop(diagterm_list,&diagterm);
+    record_heap[heapi] = (Record_T *) MALLOC(((*nelts)[heapi]) * sizeof(Record_T));
+    debug(printf("FULL: Assigning node %d with %d elts (%p)",heapi,(*nelts)[heapi],record_heap[heapi]));
+
+    for (i = 0; i < (*nelts)[heapi]; i++) {
+      /* Process in forward order to keep records in order */
+      all_records[k].diagonal = diagonals[i] + diagterm;
+      all_records[k].querypos = querypos;
+      record_heap[heapi][i] = &(all_records[k]);
+      debug(printf(" %u+%d",diagonals[i],querypos));
+      k++;
+    }
+    debug(printf("\n"));
+  }
+
+  return record_heap;
+}
+
+
+/* For initializing heap, there are three categories:
+   base..(heapsize % PYRAMID_SIZE) + PYRAMID_SIZE: Fill bottom row
+   straddling heapsize: Pull down some nodes to bottom row
+   heapsize..(2*base - 1): Fill penultimate row */
+Record_T *
+Merge_records_heap (int *nelts1, List_T stream_list, Intlist_T streamsize_list,
+		    Intlist_T querypos_list, Intlist_T diagterm_list, 
+		    struct Record_T *all_records) {
+  Record_T *result, **record_heap, curr;
+  UINT4 *key_streams[PYRAMID_SIZE];
+  UINT4 merge_heap[2*PYRAMID_SIZE+1+1]; /* Add second 1 because top node is at 1 */
+  UINT4 *storage;
+  int *nelts, nalloc;
+  int nstreams, heapsize, base, ancestori, pyramid_start, pyramid_end,
+    node_start, node_end, start, end;
+  int merge_heap_size;
+  int bits;
+  int heapi, streami, i, j;
+
+  debug(printf("Entered Merge_records\n"));
+
+  if ((nstreams = List_length(stream_list)) == 0) {
+    *nelts1 = 0;
+    return (Record_T *) NULL;
+
+  } else {
+    heapsize = 2*nstreams - 1;	/* also index of last node */
+#ifdef HAVE_BUILTIN_CLZ
+    bits = 31 - __builtin_clz(heapsize);
+#elif defined(HAVE_ASM_BSR)
+    asm("bsr %1,%0" : "=r"(bits) : "r"(heapsize));
+#else
+    bits = 31 - ((heapsize >> 16) ? clz_table[heapsize >> 16] : 16 + clz_table[heapsize]); 
+#endif
+    base = (1 << bits);
+    debug(printf("nstreams %d, heapsize %d, base %d\n",nstreams,heapsize,base));
+    record_heap = make_record_heap(&nelts,stream_list,streamsize_list,querypos_list,diagterm_list,
+				   nstreams,base,all_records);
+  }
+
+
+  while (base > 1) {
+    if (base < PYRAMID_SIZE) {
+      pyramid_start = base;
+      pyramid_end = 2*base - 1;
+
+      ancestori = 1;
+      debug(printf("records: pyramid_start %d, pyramid_end %d, nstreams %d\n",pyramid_start,pyramid_end,nstreams));
+
+      /* Allocate memory for the pyramid key_streams */
+      nalloc = 0;
+      for (heapi = pyramid_start; heapi <= pyramid_end; heapi++) {
+	nalloc += (nelts[heapi] + 1);
+      }
+      storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4));
+
+      /* Convert structures to integers (key_streams) */
+      nalloc = 0;
+      merge_heap_size = 0;
+      for (heapi = pyramid_start, streami = 0; heapi <= pyramid_end; heapi++, streami++) {
+	key_streams[streami] = &(storage[nalloc]);
+	for (i = 0; i < nelts[heapi]; i++) {
+	  key_streams[streami][i] = (record_heap[heapi][i]->diagonal & KEY_MASK) + streami;
+	}
+	key_streams[streami][i] = -1U;
+	nalloc += (i + 1);	/* nelts[heapi] + 1 */
+
+	min_heap_insert(merge_heap,&merge_heap_size,key_streams[streami][0]);
+      }
+
+      /* Set up bounds of heap (sentinels) */
+      assert(merge_heap_size <= PYRAMID_SIZE);
+      debug(printf("merge_heap_size is %d\n",merge_heap_size));
+      for (i = merge_heap_size+1; i <= 2*merge_heap_size+1; i++) {
+	merge_heap[i] = -1U;
+      }
+
+      /* Merge and convert integers to structures */
+      record_heap[1] = (Record_T *) MALLOC(nalloc * sizeof(Record_T));
+      nelts[1] = pyramid_merge_full(record_heap,key_streams,merge_heap,pyramid_start,ancestori,merge_heap_size);
+
+      /* Free base heaps */
+      for (heapi = pyramid_start, streami = 0; heapi <= pyramid_end; heapi++, streami++) {
+	FREE(record_heap[pyramid_start + streami]);
+      }
+
+      /* Free key_streams storage */
+      FREE(storage);
+
+    } else {
+      for (pyramid_start = 2*base - PYRAMID_SIZE, pyramid_end = 2*base - 1; pyramid_start >= base;
+	   pyramid_start -= PYRAMID_SIZE, pyramid_end -= PYRAMID_SIZE) {
+	debug(printf("records: pyramid_start %d, pyramid_end %d, nstreams %d",pyramid_start,pyramid_end,nstreams));
+
+	if (pyramid_start > heapsize) {
+	  node_start = PARENT(pyramid_start);
+	  node_end = PARENT(pyramid_end);
+	  debug(printf(" => node_start %d, node_end %d\n",node_start,node_end));
+	} else {
+	  node_start = pyramid_start;
+	  node_end = pyramid_end;
+	}
+	debug(printf("\n"));
+
+	/* Determine ancestori */
+	start = node_start;
+	end = node_end;
+	while ((start = PARENT(start)) < (end = PARENT(end))) ;
+	ancestori = start;
+
+	/* Allocate memory for the pyramid key_streams */
+	nalloc = 0;
+	for (heapi = node_start; heapi <= node_end; heapi++) {
+	  nalloc += (nelts[heapi] + 1);
+	}
+	storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4));
+
+	/* Convert structures to integers (key_streams) */
+	nalloc = 0;
+	merge_heap_size = 0;
+	for (heapi = node_start, streami = 0; heapi <= node_end; heapi++, streami++) {
+	  key_streams[streami] = &(storage[nalloc]);
+	  for (i = 0; i < nelts[heapi]; i++) {
+	    key_streams[streami][i] = (record_heap[heapi][i]->diagonal & KEY_MASK) + streami;
+	  }
+	  key_streams[streami][i] = -1U;
+	  nalloc += (i + 1);	/* nelts[heapi] + 1 */
+
+	  min_heap_insert(merge_heap,&merge_heap_size,key_streams[streami][0]);
+	}
+
+#ifdef DEBUG
+	for (heapi = node_start, streami = 0; heapi <= node_end; heapi++, streami++) {
+	  printf("key_stream %d:",streami);
+	  for (i = 0; i <= nelts[heapi]; i++) {
+	    printf(" %u",key_streams[streami][i]);
+	  }
+	  printf("\n");
+	}
+#endif
+
+	/* Set up bounds of heap (sentinels) */
+	assert(merge_heap_size <= PYRAMID_SIZE);
+	debug(printf("merge_heap_size is %d\n",merge_heap_size));
+	for (i = merge_heap_size+1; i <= 2*merge_heap_size+1; i++) {
+	  merge_heap[i] = -1U;
+	}
+
+	/* Merge and convert integers to structures */
+	record_heap[ancestori] = (Record_T *) MALLOC(nalloc * sizeof(Record_T));
+	nelts[ancestori] = pyramid_merge_full(record_heap,key_streams,merge_heap,node_start,ancestori,merge_heap_size);
+
+	/* Free base heaps */
+	for (heapi = node_start; heapi <= node_end; heapi++) {
+	  FREE(record_heap[heapi]);
+	}
+
+	/* Free key_streams storage */
+	FREE(storage);
+      }
+    }
+
+    base = ancestori;
+  }
+
+  *nelts1 = nelts[1];
+  result = record_heap[1];
+
+  /* Final insertion sort to correct for truncation of keys */
+  for (j = 1; j < *nelts1; j++) {
+    curr = result[j];
+    i = j - 1;
+    /* For a stable merge sort, is the second condition possible? */
+    while (i >= 0 && (result[i]->diagonal > curr->diagonal ||
+		     (result[i]->diagonal == curr->diagonal &&
+		      result[i]->querypos > curr->querypos))) {
+      assert(result[i]->diagonal > curr->diagonal);
+      result[i+1] = result[i];
+      i--;
+    }
+    result[i+1] = curr;
+  }
+
+
+  FREE(nelts);
+  FREE(record_heap);
+
+#ifdef DEBUG0
+  printf("Merge_records returning result of length %d\n",*nelts1);
+  for (i = 0; i < *nelts1; i++) {
+    printf("%u %d\n",result[i]->diagonal,result[i]->querypos);
+  }
+#endif
+
+  return result;
+}
+
+
diff --git a/src/merge-heap.h b/src/merge-heap.h
new file mode 100644
index 0000000..5e79dc9
--- /dev/null
+++ b/src/merge-heap.h
@@ -0,0 +1,19 @@
+#ifndef MERGE_HEAP_INCLUDED
+#define MERGE_HEAP_INCLUDED
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#include "list.h"
+#include "intlist.h"
+#include "merge.h"		/* For Record_T */
+
+
+extern Record_T *
+Merge_records_heap (int *nelts1, List_T stream_list, Intlist_T streamsize_list,
+		    Intlist_T querypos_list, Intlist_T diagterm_list, 
+		    struct Record_T *all_records);
+
+#endif
+
diff --git a/src/merge.c b/src/merge.c
new file mode 100644
index 0000000..9e8a758
--- /dev/null
+++ b/src/merge.c
@@ -0,0 +1,1110 @@
+static char rcsid[] = "$Id: merge.c 205967 2017-05-04 00:49:41Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "merge.h"
+#include "assert.h"
+#include "mem.h"
+#include "popcount.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>		/* For memcpy */
+
+
+#if defined(HAVE_SSE4_1)
+#include <smmintrin.h>
+#endif
+#if defined(HAVE_AVX2)
+#include <immintrin.h>
+#endif
+#if defined(HAVE_AVX512)
+#include <immintrin.h>
+#endif
+
+
+/* #define PYRAMID_SIZE 4 */
+/* #define KEY_MASK (~0U << 2) */
+
+#define PYRAMID_SIZE 32
+#define KEY_MASK (~0U << 5)
+
+#ifdef DEBUG0
+#define debug0(x) x
+#else
+#define debug0(x)
+#endif
+
+#ifdef DEBUG
+#define debug(x) x
+#else
+#define debug(x)
+#endif
+
+#ifdef DEBUG2
+#define debug2(x) x
+#else
+#define debug2(x)
+#endif
+
+
+#ifdef DEBUG
+#ifdef HAVE_SSE4_1
+static void
+print_vector (__m128i x, char *label) {
+  unsigned int *s = (unsigned int *) &x;
+
+  printf("%s: %u %u %u %u\n",label,s[0],s[1],s[2],s[3]);
+  return;
+}
+#endif
+
+#ifdef HAVE_AVX2
+static void
+print_vector_256 (__m256i x, char *label) {
+  unsigned int *s = (unsigned int *) &x;
+
+  printf("%s: %u %u %u %u %u %u %u %u\n",label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7]);
+  return;
+}
+#endif
+
+#ifdef HAVE_AVX512
+static void
+print_vector_512 (__m512i x, char *label) {
+  unsigned int *s = (unsigned int *) &x;
+
+  printf("%s: %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n",
+	 label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],
+	 s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]);
+  return;
+}
+#endif
+#endif
+
+
+
+#ifdef HAVE_SSE4_1
+/* The min and max procedures require SSE4.1, which makes SSE4.1 the minimum requirement for SIMD-based merge */
+static void
+merge_4x4 (__m128i *__restrict__ vMergedA, __m128i *__restrict__ vMergedB, __m128i vA, __m128i vB) {
+  __m128i vTmp, vMin, vMax;
+
+  vMin = _mm_min_epu32(vA, vB);
+  vMax = _mm_max_epu32(vA, vB);
+  /* print_vector(vMin,"Min 1"); */
+  /* print_vector(vMax,"Max 1"); */
+
+  vTmp = _mm_alignr_epi8(vMin, vMin, 4); /* Rotate Min by 4 */
+  vMin = _mm_min_epu32(vTmp, vMax);
+  vMax = _mm_max_epu32(vTmp, vMax);
+  /* print_vector(vTmp,"Tmp 2"); */
+  /* print_vector(vMin,"Min 2"); */
+  /* print_vector(vMax,"Max 2"); */
+
+  vTmp = _mm_alignr_epi8(vMin, vMin, 4);
+  vMin = _mm_min_epu32(vTmp, vMax);
+  vMax = _mm_max_epu32(vTmp, vMax);
+  /* print_vector(vTmp,"Tmp 3"); */
+  /* print_vector(vMin,"Min 3"); */
+  /* print_vector(vMax,"Max 3"); */
+
+  vTmp = _mm_alignr_epi8(vMin, vMin, 4);
+  vMin = _mm_min_epu32(vTmp, vMax);
+  /* print_vector(vTmp,"Tmp 4"); */
+  /* print_vector(vMin,"Min 4"); */
+
+  *vMergedB = _mm_max_epu32(vTmp, vMax);
+  *vMergedA = _mm_alignr_epi8(vMin, vMin, 4);
+
+  return;
+}
+
+
+#ifndef HAVE_AVX2
+static void
+merge_8x8_network (__m128i *__restrict__ vMergedA, __m128i *__restrict__ vMergedB,
+		   __m128i *__restrict__ vMergedC, __m128i *__restrict__ vMergedD,
+		   __m128i vA0, __m128i vA1, __m128i vB0, __m128i vB1) {
+  merge_4x4(&(*vMergedA),&(*vMergedB),vA0,vB0);
+  merge_4x4(&(*vMergedC),&(*vMergedD),vA1,vB1);
+
+  merge_4x4(&(*vMergedB),&(*vMergedC),*vMergedC,*vMergedB);
+  return;
+}
+#endif
+#endif
+
+
+#ifdef HAVE_AVX2
+/* The problem is that _mm256_alignr_epi8 rotates within 128-bit lanes */
+/* So use _mm256_permutevar8x32_epi32, which shuffles across lanes */
+static void
+merge_8x8 (__m256i *__restrict__ vMergedA, __m256i *__restrict__ vMergedB, __m256i vA, __m256i vB) {
+  __m256i vTmp, vMin, vMax;
+  __m256i vRot;
+
+  vRot = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
+
+  /* print_vector_256(vA,"vA"); */
+  /* print_vector_256(*vB,"vB"); */
+
+  /* 1 */
+  vMin = _mm256_min_epu32(vA, vB);
+  vMax = _mm256_max_epu32(vA, vB);
+  /* print_vector_256(vMin,"Min 1"); */
+  /* print_vector_256(vMax,"Max 1"); */
+
+  /* 2 */
+  vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */
+  vMin = _mm256_min_epu32(vTmp, vMax);
+  vMax = _mm256_max_epu32(vTmp, vMax);
+
+  /* 3 */
+  vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */
+  vMin = _mm256_min_epu32(vTmp, vMax);
+  vMax = _mm256_max_epu32(vTmp, vMax);
+
+  /* 4 */
+  vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */
+  vMin = _mm256_min_epu32(vTmp, vMax);
+  vMax = _mm256_max_epu32(vTmp, vMax);
+
+  /* 5 */
+  vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */
+  vMin = _mm256_min_epu32(vTmp, vMax);
+  vMax = _mm256_max_epu32(vTmp, vMax);
+
+  /* 6 */
+  vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */
+  vMin = _mm256_min_epu32(vTmp, vMax);
+  vMax = _mm256_max_epu32(vTmp, vMax);
+
+  /* 7 */
+  vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */
+  vMin = _mm256_min_epu32(vTmp, vMax);
+  vMax = _mm256_max_epu32(vTmp, vMax);
+
+  /* 8 */
+  vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */
+  vMin = _mm256_min_epu32(vTmp, vMax);
+  /* print_vector_256(vTmp,"Tmp 8"); */
+  /* print_vector_256(vMin,"Min 8"); */
+
+  *vMergedB = _mm256_max_epu32(vTmp, vMax);
+  *vMergedA = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */
+  /* print_vector_256(*vMergedA,"vMergedA"); */
+  /* print_vector_256(*vMergedB,"vMergedB"); */
+  /* printf("\n"); */
+
+  return;
+}
+
+#ifndef HAVE_AVX512
+static void
+merge_16x16_network (__m256i *__restrict__ vMergedA, __m256i *__restrict__ vMergedB,
+		     __m256i *__restrict__ vMergedC, __m256i *__restrict__ vMergedD,
+		     __m256i vA0, __m256i vA1, __m256i vB0, __m256i vB1) {
+  merge_8x8(&(*vMergedA),&(*vMergedB),vA0,vB0);
+  merge_8x8(&(*vMergedC),&(*vMergedD),vA1,vB1);
+
+  merge_8x8(&(*vMergedB),&(*vMergedC),*vMergedC,*vMergedB);
+  return;
+}
+#endif
+#endif
+
+
+#ifdef HAVE_AVX512
+static void
+merge_16x16 (__m512i *__restrict__ vMergedA, __m512i *__restrict__ vMergedB, __m512i vA, __m512i vB) {
+  __m512i vTmp, vMin, vMax;
+  __m512i vRot;
+  int i;
+
+  vRot = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0);
+
+  /* print_vector_512(vA,"vA"); */
+  /* print_vector_512(vB,"vB"); */
+
+  /* 1 */
+  vMin = _mm512_min_epu32(vA, vB);
+  vMax = _mm512_max_epu32(vA, vB);
+  /* print_vector_512(vMin,"Min 1"); */
+  /* print_vector_512(vMax,"Max 1"); */
+
+  /* 2..15 */
+  for (i = 0; i < 14; i++) {
+    vTmp = _mm512_permutexvar_epi32(vRot, vMin); /* Rotate Min by ints */
+    vMin = _mm512_min_epu32(vTmp, vMax);
+    vMax = _mm512_max_epu32(vTmp, vMax);
+    /* print_vector_512(vTmp,"Tmp 2"); */
+    /* print_vector_512(vMin,"Min 2"); */
+    /* print_vector_512(vMax,"Max 2"); */
+  }
+
+  /* 16 */
+  vTmp = _mm512_permutexvar_epi32(vRot, vMin); /* Rotate Min by ints */
+  vMin = _mm512_min_epu32(vTmp, vMax);
+  /* print_vector_512(vTmp,"Tmp 16"); */
+  /* print_vector_512(vMin,"Min 16"); */
+
+  *vMergedB = _mm512_max_epu32(vTmp, vMax);
+  *vMergedA = _mm512_permutexvar_epi32(vRot, vMin); /* Rotate Min by ints */
+  /* print_vector_512(*vMergedA,"vMergedA"); */
+  /* print_vector_512(*vMergedB,"vMergedB"); */
+  /* printf("\n"); */
+
+  return;
+}
+
+static void
+merge_32x32_network (__m512i *__restrict__ vMergedA, __m512i *__restrict__ vMergedB,
+		     __m512i *__restrict__ vMergedC, __m512i *__restrict__ vMergedD,
+		     __m512i vA0, __m512i vA1, __m512i vB0, __m512i vB1) {
+  merge_16x16(&(*vMergedA),&(*vMergedB),vA0,vB0);
+  merge_16x16(&(*vMergedC),&(*vMergedD),vA1,vB1);
+
+  merge_16x16(&(*vMergedB),&(*vMergedC),*vMergedC,*vMergedB);
+  return;
+}
+#endif
+
+
+
+/* Assumes padding to nearest 4 uints, and alignment to nearest 16 bytes */
+/* If dest is NULL, then allocates and returns memory.  Otherwise, fills in at dest */
+unsigned int *
+Merge_uint4 (unsigned int *__restrict__ dest, unsigned int *__restrict__ A,
+	     unsigned int *__restrict__ B, int nA, int nB) {
+  unsigned int *C0, *C, *Aend, *Bend;
+  unsigned int nextA, nextB;
+  int nC;
+#ifdef HAVE_AVX512
+  __m512i vMerged512, vMerged512_0, vMerged512_1,
+    vOld512, vNew512, vOld512_0, vOld512_1, vNew512_0, vNew512_1;
+#endif
+#ifdef HAVE_AVX2
+  __m256i vMerged256, vMerged256_0, vMerged256_1,
+    vOld256, vNew256, vOld256_0, vOld256_1, vNew256_0, vNew256_1;
+#endif
+#ifdef HAVE_SSE4_1
+  __m128i vMerged128, vMerged128_0, vMerged128_1,
+    vOld128, vNew128, vOld128_0, vOld128_1, vNew128_0, vNew128_1;
+#endif
+
+
+  if ((nC = nA + nB) == 0) {
+    return (unsigned int *) NULL;
+  } else if (dest) {
+    C0 = C = dest;
+  } else {
+#if defined(HAVE_SSE4_1)
+    C0 = C = (unsigned int *) MALLOC_ALIGN(nC * sizeof(unsigned int));
+#else
+    C0 = C = (unsigned int *) MALLOC(nC * sizeof(unsigned int));
+#endif
+  }
+
+  Aend = &(A[nA]);
+  Bend = &(B[nB]);
+
+#ifdef HAVE_AVX512
+  if (A < Aend - 32 && B < Bend - 32) {
+    /* 32 ints = 1024 bits */
+    if ((nextA = A[32]) < (nextB = B[32])) {
+      vOld512_0 = _mm512_load_si512((__m512i *) B); B += 16;
+      vOld512_1 = _mm512_load_si512((__m512i *) B); B += 16;
+      vNew512_0 = _mm512_load_si512((__m512i *) A); A += 16;
+      vNew512_1 = _mm512_load_si512((__m512i *) A); A += 16;
+    } else {
+      vOld512_0 = _mm512_load_si512((__m512i *) A); A += 16;
+      vOld512_1 = _mm512_load_si512((__m512i *) A); A += 16;
+      vNew512_0 = _mm512_load_si512((__m512i *) B); B += 16;
+      vNew512_1 = _mm512_load_si512((__m512i *) B); B += 16;
+    }
+    merge_32x32_network(&vMerged512_0,&vMerged512_1,&vOld512_0,&vOld512_1,
+			vOld512_0,vOld512_1,vNew512_0,vNew512_1);
+    _mm512_stream_si512((__m512i *) C,vMerged512_0); C += 16;
+    _mm512_stream_si512((__m512i *) C,vMerged512_1); C += 16;
+
+    while (A < Aend - 32 && B < Bend - 32) {
+      if (nextA < nextB) {
+	vNew512_0 = _mm512_load_si512((__m512i *) A); A += 16;
+	vNew512_1 = _mm512_load_si512((__m512i *) A); A += 16;
+	nextA = *A;
+      } else {
+	vNew512_0 = _mm512_load_si512((__m512i *) B); B += 16;
+	vNew512_1 = _mm512_load_si512((__m512i *) B); B += 16;
+	nextB = *B;
+      }
+      merge_32x32_network(&vMerged512_0,&vMerged512_1,&vOld512_0,&vOld512_1,
+			  vOld512_0,vOld512_1,vNew512_0,vNew512_1);
+      _mm512_stream_si512((__m512i *) C,vMerged512_0); C += 16;
+      _mm512_stream_si512((__m512i *) C,vMerged512_1); C += 16;
+    }
+
+    /* Re-insert before largest element */
+    if (nextA < nextB) {
+      B -= 16; _mm512_store_si512((__m512i *) B,vOld512_1);
+      B -= 16; _mm512_store_si512((__m512i *) B,vOld512_0);
+    } else {
+      A -= 16; _mm512_store_si512((__m512i *) A,vOld512_1);
+      A -= 16; _mm512_store_si512((__m512i *) A,vOld512_0);
+    }
+  }
+#endif
+
+
+#ifdef HAVE_AVX512
+  if (A < Aend - 16 && B < Bend - 16) {
+    /* 512 bits */
+    if ((nextA = A[16]) < (nextB = B[16])) {
+      vOld512 = _mm512_load_si512((__m512i *) B); B += 16;
+      vNew512 = _mm512_load_si512((__m512i *) A); A += 16;
+    } else {
+      vOld512 = _mm512_load_si512((__m512i *) A); A += 16;
+      vNew512 = _mm512_load_si512((__m512i *) B); B += 16;
+    }
+    merge_16x16(&vMerged512,&vOld512,vOld512,vNew512);
+    _mm512_stream_si512((__m512i *) C,vMerged512); C += 16;
+
+    while (A < Aend - 16 && B < Bend - 16) {
+      if (nextA < nextB) {
+	vNew512 = _mm512_load_si512((__m512i *) A); A += 16; nextA = *A;
+      } else {
+	vNew512 = _mm512_load_si512((__m512i *) B); B += 16; nextB = *B;
+      }
+      merge_16x16(&vMerged512,&vOld512,vOld512,vNew512);
+      _mm512_stream_si512((__m512i *) C,vMerged512); C += 16;
+    }
+
+    /* Re-insert before largest element */
+    if (nextA < nextB) {
+      B -= 16; _mm512_store_si512((__m512i *) B,vOld512);
+    } else {
+      A -= 16; _mm512_store_si512((__m512i *) A,vOld512);
+    }
+  }
+
+#elif defined(HAVE_AVX2)
+  if (A < Aend - 16 && B < Bend - 16) {
+    if ((nextA = A[16]) < (nextB = B[16])) {
+      vOld256_0 = _mm256_load_si256((__m256i *) B); B += 8;
+      vOld256_1 = _mm256_load_si256((__m256i *) B); B += 8;
+      vNew256_0 = _mm256_load_si256((__m256i *) A); A += 8;
+      vNew256_1 = _mm256_load_si256((__m256i *) A); A += 8;
+    } else {
+      vOld256_0 = _mm256_load_si256((__m256i *) A); A += 8;
+      vOld256_1 = _mm256_load_si256((__m256i *) A); A += 8;
+      vNew256_0 = _mm256_load_si256((__m256i *) B); B += 8;
+      vNew256_1 = _mm256_load_si256((__m256i *) B); B += 8;
+    }
+    merge_16x16_network(&vMerged256_0,&vMerged256_1,&vOld256_0,&vOld256_1,
+			vOld256_0,vOld256_1,vNew256_0,vNew256_1);
+    _mm256_stream_si256((__m256i *) C,vMerged256_0); C += 8;
+    _mm256_stream_si256((__m256i *) C,vMerged256_1); C += 8;
+
+    while (A < Aend - 16 && B < Bend - 16) {
+      if (nextA < nextB) {
+	vNew256_0 = _mm256_load_si256((__m256i *) A); A += 8;
+	vNew256_1 = _mm256_load_si256((__m256i *) A); A += 8;
+	nextA = *A;
+      } else {
+	vNew256_0 = _mm256_load_si256((__m256i *) B); B += 8;
+	vNew256_1 = _mm256_load_si256((__m256i *) B); B += 8;
+	nextB = *B;
+      }
+      merge_16x16_network(&vMerged256_0,&vMerged256_1,&vOld256_0,&vOld256_1,
+			  vOld256_0,vOld256_1,vNew256_0,vNew256_1);
+      _mm256_stream_si256((__m256i *) C,vMerged256_0); C += 8;
+      _mm256_stream_si256((__m256i *) C,vMerged256_1); C += 8;
+    }
+
+    /* Re-insert before largest element */
+    if (nextA < nextB) {
+      B -= 8; _mm256_store_si256((__m256i *) B,vOld256_1);
+      B -= 8; _mm256_store_si256((__m256i *) B,vOld256_0);
+    } else {
+      A -= 8; _mm256_store_si256((__m256i *) A,vOld256_1);
+      A -= 8; _mm256_store_si256((__m256i *) A,vOld256_0);
+    }
+  }
+#endif
+
+
+#ifdef HAVE_AVX2
+  if (A < Aend - 8 && B < Bend - 8) {
+    /* 256 bits */
+    if ((nextA = A[8]) < (nextB = B[8])) {
+      vOld256 = _mm256_load_si256((__m256i *) B); B += 8;
+      vNew256 = _mm256_load_si256((__m256i *) A); A += 8;
+    } else {
+      vOld256 = _mm256_load_si256((__m256i *) A); A += 8;
+      vNew256 = _mm256_load_si256((__m256i *) B); B += 8;
+    }
+    merge_8x8(&vMerged256,&vOld256,vOld256,vNew256);
+    _mm256_stream_si256((__m256i *) C,vMerged256); C += 8;
+
+    while (A < Aend - 8 && B < Bend - 8) {
+      if (nextA < nextB) {
+	vNew256 = _mm256_load_si256((__m256i *) A); A += 8; nextA = *A;
+      } else {
+	vNew256 = _mm256_load_si256((__m256i *) B); B += 8; nextB = *B;
+      }
+      merge_8x8(&vMerged256,&vOld256,vOld256,vNew256);
+      _mm256_stream_si256((__m256i *) C,vMerged256); C += 8;
+    }
+
+    /* Re-insert before largest element */
+    if (nextA < nextB) {
+      B -= 8; _mm256_store_si256((__m256i *) B,vOld256);
+    } else {
+      A -= 8; _mm256_store_si256((__m256i *) A,vOld256);
+    }
+  }
+
+#elif defined(HAVE_SSE4_1)
+  if (A < Aend - 8 && B < Bend - 8) {
+    if ((nextA = A[8]) < (nextB = B[8])) {
+      vOld128_0 = _mm_load_si128((__m128i *) B); B += 4;
+      vOld128_1 = _mm_load_si128((__m128i *) B); B += 4;
+      vNew128_0 = _mm_load_si128((__m128i *) A); A += 4;
+      vNew128_1 = _mm_load_si128((__m128i *) A); A += 4;
+    } else {
+      vOld128_0 = _mm_load_si128((__m128i *) A); A += 4;
+      vOld128_1 = _mm_load_si128((__m128i *) A); A += 4;
+      vNew128_0 = _mm_load_si128((__m128i *) B); B += 4;
+      vNew128_1 = _mm_load_si128((__m128i *) B); B += 4;
+    }
+    merge_8x8_network(&vMerged128_0,&vMerged128_1,&vOld128_0,&vOld128_1,
+		      vOld128_0,vOld128_1,vNew128_0,vNew128_1);
+    _mm_stream_si128((__m128i *) C,vMerged128_0); C += 4;
+    _mm_stream_si128((__m128i *) C,vMerged128_1); C += 4;
+
+    while (A < Aend - 8 && B < Bend - 8) {
+      if (nextA < nextB) {
+	vNew128_0 = _mm_load_si128((__m128i *) A); A += 4;
+	vNew128_1 = _mm_load_si128((__m128i *) A); A += 4;
+	nextA = *A;
+      } else {
+	vNew128_0 = _mm_load_si128((__m128i *) B); B += 4;
+	vNew128_1 = _mm_load_si128((__m128i *) B); B += 4;
+	nextB = *B;
+      }
+      merge_8x8_network(&vMerged128_0,&vMerged128_1,&vOld128_0,&vOld128_1,
+			vOld128_0,vOld128_1,vNew128_0,vNew128_1);
+      _mm_stream_si128((__m128i *) C,vMerged128_0); C += 4;
+      _mm_stream_si128((__m128i *) C,vMerged128_1); C += 4;
+    }
+
+    /* Re-insert before largest element */
+    if (nextA < nextB) {
+      B -= 4; _mm_store_si128((__m128i *) B,vOld128_1);
+      B -= 4; _mm_store_si128((__m128i *) B,vOld128_0);
+    } else {
+      A -= 4; _mm_store_si128((__m128i *) A,vOld128_1);
+      A -= 4; _mm_store_si128((__m128i *) A,vOld128_0);
+    }
+  }
+#endif
+
+
+#ifdef HAVE_SSE4_1
+  if (A < Aend - 4 && B < Bend - 4) {
+    /* 128 bits */
+    if ((nextA = A[4]) < (nextB = B[4])) {
+      vOld128 = _mm_load_si128((__m128i *) B); B += 4;
+      vNew128 = _mm_load_si128((__m128i *) A); A += 4;
+    } else {
+      vOld128 = _mm_load_si128((__m128i *) A); A += 4;
+      vNew128 = _mm_load_si128((__m128i *) B); B += 4;
+    }
+    merge_4x4(&vMerged128,&vOld128,vOld128,vNew128);
+    _mm_stream_si128((__m128i *) C,vMerged128); C += 4;
+
+    while (A < Aend - 4 && B < Bend - 4) {
+      if (nextA < nextB) {
+	vNew128 = _mm_load_si128((__m128i *) A); A += 4; nextA = *A;
+      } else {
+	vNew128 = _mm_load_si128((__m128i *) B); B += 4; nextB = *B;
+      }
+      merge_4x4(&vMerged128,&vOld128,vOld128,vNew128);
+      _mm_stream_si128((__m128i *) C,vMerged128); C += 4;
+    }
+
+    /* Re-insert before largest element */
+    if (nextA < nextB) {
+      B -= 4; _mm_store_si128((__m128i *) B,vOld128);
+    } else {
+      A -= 4; _mm_store_si128((__m128i *) A,vOld128);
+    }
+  }
+#endif
+
+  /* Serial */
+  while (A < Aend && B < Bend) {
+    if (*A < *B) {
+      *C++ = *A++;
+    } else {
+      *C++ = *B++;
+    }
+  }
+
+  memcpy(C,A,(Aend - A) * sizeof(unsigned int));
+  memcpy(C,B,(Bend - B) * sizeof(unsigned int));
+
+  return C0;
+}
+
+
+
+#define PARENT(i) (i >> 1)
+#define LEFT(i) (i << 1)
+#define RIGHT(i) ((i << 1) | 1)
+
+static int
+pyramid_merge (unsigned int **heap, int nstreams, int heapsize, int *nelts,
+	       int pyramid_start, int pyramid_end) {
+  int nodei;
+#ifdef DEBUG
+  int i;
+#endif
+
+  while (pyramid_end > pyramid_start) {
+    debug(printf("Merging level: %d..%d for heapsize %d\n",pyramid_start,pyramid_end,heapsize));
+
+    if (pyramid_end > heapsize) {
+      nodei = heapsize;
+    } else {
+      nodei = pyramid_end;
+    }
+
+    while (nodei >= pyramid_start) {
+      debug2(printf("Merging nodes %d (%d elts) and %d (%d elts) => %d\n",
+		    nodei-1,nelts[nodei-1],nodei,nelts[nodei],PARENT(nodei)));
+      heap[PARENT(nodei)] = Merge_uint4(/*dest*/NULL,heap[nodei-1],heap[nodei],nelts[nodei-1],nelts[nodei]);
+      CHECK_ALIGN(heap[PARENT(nodei)]);
+      nelts[PARENT(nodei)] = nelts[nodei-1] + nelts[nodei];
+      debug2(printf("Created list %p of length %d at node %d\n",
+		    heap[PARENT(nodei)],nelts[PARENT(nodei)],PARENT(nodei)));
+
+#ifdef DEBUG
+      for (i = 0; i < nelts[PARENT(nodei)]; i++) {
+	printf("%u\n",heap[PARENT(nodei)][i]);
+      }
+#endif
+
+      /* Don't free original lists (when nodei >= nstreams) */
+      debug(printf("Freeing nodes %d and %d\n",nodei-1,nodei));
+      if (nodei < nstreams) {
+	FREE_ALIGN(heap[nodei]);
+      }
+      if (nodei-1 < nstreams) {
+	FREE_ALIGN(heap[nodei-1]);
+      }
+      nodei -= 2;
+    }
+
+    pyramid_end = PARENT(pyramid_end);
+    pyramid_start = PARENT(pyramid_start);
+  }
+
+  debug(printf("Returning ancestor %d\n\n",pyramid_start));
+  return pyramid_start;
+}
+
+
+/* Assumes heapi < base put into LEFT(heapi) */
+static int
+pyramid_merge_prealloc (unsigned int **heap, unsigned int *curr_storage, unsigned int *prev_storage,
+			int *nelts, int pyramid_start, int pyramid_end) {
+  unsigned int *temp;
+  int nodei;
+  int nalloc;
+#ifdef HAVE_SSE4_1
+  int n;
+#endif
+
+  while (pyramid_end > pyramid_start) {
+    debug2(printf("Merging level: %d..%d\n",pyramid_start,pyramid_end));
+    nalloc = 0;
+
+    nodei = pyramid_end;
+    while (nodei >= pyramid_start) {
+      debug2(printf("Merging nodes %d (%d elts) and %d (%d elts) => %d\n",
+		   nodei-1,nelts[nodei-1],nodei,nelts[nodei],PARENT(nodei)));
+      heap[PARENT(nodei)] = Merge_uint4(/*dest*/&(curr_storage[nalloc]),heap[nodei-1],heap[nodei],nelts[nodei-1],nelts[nodei]);
+      CHECK_ALIGN(heap[PARENT(nodei)]);
+      /* Have to align start of each entry curr_storage[nalloc], regardless of end padding */
+#ifdef HAVE_SSE4_1
+      n = nelts[PARENT(nodei)] = nelts[nodei-1] + nelts[nodei];
+      nalloc += PAD_UINT4(n);
+#else
+      nalloc += (nelts[PARENT(nodei)] = nelts[nodei-1] + nelts[nodei]);
+#endif
+      debug2(printf("Created list %p of length %d at node %d\n",
+		    heap[PARENT(nodei)],nelts[PARENT(nodei)],PARENT(nodei)));
+
+#ifdef DEBUG2
+      for (i = 0; i < nelts[PARENT(nodei)]; i++) {
+	printf("%u\n",heap[PARENT(nodei)][i]);
+      }
+#endif
+
+      /* Freeing memory one row at a time, so don't do it here */
+      nodei -= 2;
+    }
+
+    /* Swap memory spaces */
+    debug(printf("Swapping storage spaces\n"));
+    temp = prev_storage;
+    prev_storage = curr_storage;
+    curr_storage = temp;
+
+    /* Go up a level */
+    pyramid_end = PARENT(pyramid_end);
+    pyramid_start = PARENT(pyramid_start);
+  }
+
+  debug(printf("Returning ancestor %d\n\n",pyramid_start));
+  return pyramid_start;
+}
+
+
+static UINT4 **
+make_diagonals_heap (int *ncopied, int **nelts, List_T stream_list, Intlist_T streamsize_list, int nstreams) {
+  UINT4 **heap, *stream;
+  int heapsize, heapi, n;
+
+#ifdef DEBUG
+  int i;
+#endif
+
+  *ncopied = 0;
+  heapsize = 2*nstreams - 1;
+
+  heap = (UINT4 **) CALLOC((heapsize + 1),sizeof(UINT4 *));
+  *nelts = (int *) CALLOC((heapsize + 1),sizeof(int));
+
+  /* Process in reverse order, because stream_list is in reverse order of elts */
+  heapi = heapsize;
+  while (stream_list != NULL) {
+    streamsize_list = Intlist_pop(streamsize_list,&n);
+    (*nelts)[heapi] = n;
+
+#if 0
+    stream_list = List_pop(stream_list,(void *) &(heap[heapi])); /* already padded */
+#else
+    /* Copy to make the merging process non-destructive */
+    heap[heapi] = MALLOC_ALIGN(n*sizeof(UINT4));
+    stream_list = List_pop(stream_list,(void *) &stream);
+    memcpy(heap[heapi],stream,n*sizeof(UINT4));
+    *ncopied += 1;
+#endif
+
+    CHECK_ALIGN(heap[heapi]);
+    debug(printf("Assigning node %d with %d elts",heapi,(*nelts)[heapi]));
+#ifdef DEBUG
+    for (i = 0; i < (*nelts)[heapi]; i++) {
+      printf(" %u",heap[heapi][i]);
+    }
+#endif
+    debug(printf("\n"));
+    heapi--;
+  }
+
+  return heap;
+}
+
+
+UINT4 *
+Merge_diagonals (int *nelts1, List_T stream_list, Intlist_T streamsize_list) {
+  UINT4 *result, **heap, *stream;
+  int *nelts;
+  int nstreams, ncopied, heapi, heapsize, base, ancestori, pyramid_start, pyramid_end;
+  int bits;
+#ifdef DEBUG
+  int i;
+#endif
+
+
+  if ((nstreams = List_length(stream_list)) == 0) {
+    *nelts1 = 0;
+    return (UINT4 *) NULL;
+
+  } else if (nstreams == 1) {
+    streamsize_list = Intlist_pop(streamsize_list,&(*nelts1));
+    stream_list = List_pop(stream_list,(void *) &stream);
+    result = MALLOC_ALIGN((*nelts1)*sizeof(UINT4));
+    memcpy(result,stream,(*nelts1)*sizeof(UINT4));
+    return result;
+
+  } else {
+    heapsize = 2*nstreams - 1;	/* also index of last node */
+#ifdef HAVE_BUILTIN_CLZ
+    bits = 31 - __builtin_clz((unsigned int) heapsize);
+#elif defined(HAVE_ASM_BSR)
+    asm("bsr %1,%0" : "=r"(bits) : "r"(heapsize));
+#else
+    bits = 31 - ((heapsize >> 16) ? clz_table[heapsize >> 16] : 16 + clz_table[heapsize]); 
+#endif
+
+    base = (1 << bits);
+    heap = make_diagonals_heap(&ncopied,&nelts,stream_list,streamsize_list,nstreams);
+    debug(printf("nstreams %d, heapsize %d, clz %d, bits %d, base %d\n",nstreams,heapsize,__builtin_clz(heapsize),bits,base));
+  }
+
+  /* Middle pyramids */
+  while (base > PYRAMID_SIZE) {
+    for (pyramid_start = 2*base - PYRAMID_SIZE, pyramid_end = 2*base - 1; pyramid_start >= base;
+	 pyramid_start -= PYRAMID_SIZE, pyramid_end -= PYRAMID_SIZE) {
+      debug(printf("diagonals: pyramid_start %d, pyramid_end %d, nstreams %d\n",pyramid_start,pyramid_end,nstreams));
+      ancestori = pyramid_merge(heap,nstreams,heapsize,nelts,pyramid_start,pyramid_end);
+    }
+    base = ancestori;
+  }
+
+  /* Last pyramid */
+  pyramid_start = base;
+  pyramid_end = 2*base - 1;
+  debug(printf("diagonals: pyramid_start %d, pyramid_end %d, nstreams %d\n",pyramid_start,pyramid_end,nstreams));
+  /* base = */ pyramid_merge(heap,nstreams,heapsize,nelts,pyramid_start,pyramid_end);
+
+  *nelts1 = nelts[1];
+  result = heap[1];
+
+  for (heapi = heapsize; heapi > heapsize - ncopied; heapi--) {
+    FREE_ALIGN(heap[heapi]);
+  }
+
+  FREE(heap);
+  FREE(nelts);
+
+#ifdef DEBUG
+  printf("Merge_diagonals returning result of length %d\n",*nelts1);
+  for (i = 0; i < *nelts1; i++) {
+    printf("%u\n",result[i]);
+  }
+#endif
+
+  return result;
+}
+
+
+static Record_T **
+make_record_heap (int **nelts, List_T stream_list, Intlist_T streamsize_list, 
+		  Intlist_T querypos_list, Intlist_T diagterm_list, int nstreams,
+		  int base, struct Record_T *all_records) {
+  Record_T **record_heap;
+  UINT4 *diagonals;
+  int heapsize, null_pyramid_start, heapi, basei;
+  int querypos, diagterm;
+  int i, k;
+
+  heapsize = 2*nstreams - 1;
+  null_pyramid_start = (heapsize + PYRAMID_SIZE - 1)/PYRAMID_SIZE * PYRAMID_SIZE; /* full or partial pyramid for entries below this */
+
+  /* Add PYRAMID_SIZE to handle partial pyramid */
+  record_heap = (Record_T **) CALLOC(heapsize + PYRAMID_SIZE,sizeof(Record_T *));
+  *nelts = (int *) CALLOC(heapsize + PYRAMID_SIZE,sizeof(int));
+
+  /* Process as (base - 1) downto nstreams, then heapsize downto base,
+     because stream_list is in reverse order of elts */
+  k = 0;
+  for (heapi = base - 1; heapi >= PARENT(null_pyramid_start); heapi--) {
+    /* Put all information into penultimate row */
+    stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */
+    streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi]));
+    querypos_list = Intlist_pop(querypos_list,&querypos);
+    diagterm_list = Intlist_pop(diagterm_list,&diagterm);
+    record_heap[heapi] = (Record_T *) MALLOC(((*nelts)[heapi]) * sizeof(Record_T));
+    debug2(printf("NULL: Assigning node %d with %d elts (%p)",heapi,(*nelts)[heapi],record_heap[heapi]));
+
+    for (i = 0; i < (*nelts)[heapi]; i++) {
+      /* Process in forward order to keep records in order */
+      all_records[k].diagonal = diagonals[i] + diagterm;
+      all_records[k].querypos = querypos;
+      record_heap[heapi][i] = &(all_records[k]);
+      debug2(printf(" %u+%d",diagonals[i],querypos));
+      k++;
+    }
+    debug2(printf("\n"));
+  }
+    
+  for ( ; heapi >= nstreams; heapi--) {
+    /* Move all information down to left child */
+    basei = LEFT(heapi);
+    stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */
+    streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[basei]));
+    querypos_list = Intlist_pop(querypos_list,&querypos);
+    diagterm_list = Intlist_pop(diagterm_list,&diagterm);
+    record_heap[basei] = (Record_T *) MALLOC(((*nelts)[basei]) * sizeof(Record_T));
+    debug2(printf("PART: Assigning node %d => %d with %d elts (%p)",heapi,basei,(*nelts)[basei],record_heap[basei]));
+
+    for (i = 0; i < (*nelts)[basei]; i++) {
+      /* Process in forward order to keep records in order */
+      all_records[k].diagonal = diagonals[i] + diagterm;
+      all_records[k].querypos = querypos;
+      record_heap[basei][i] = &(all_records[k]);
+      debug2(printf(" %u+%d",diagonals[i],querypos));
+      k++;
+    }
+    debug2(printf("\n"));
+  }
+
+  for (heapi = heapsize; heapi >= base; heapi--) {
+    /* Put all information into base row */
+    stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */
+    streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi]));
+    querypos_list = Intlist_pop(querypos_list,&querypos);
+    diagterm_list = Intlist_pop(diagterm_list,&diagterm);
+    record_heap[heapi] = (Record_T *) MALLOC(((*nelts)[heapi]) * sizeof(Record_T));
+    debug2(printf("FULL: Assigning node %d with %d elts (%p)",heapi,(*nelts)[heapi],record_heap[heapi]));
+
+    for (i = 0; i < (*nelts)[heapi]; i++) {
+      /* Process in forward order to keep records in order */
+      all_records[k].diagonal = diagonals[i] + diagterm;
+      all_records[k].querypos = querypos;
+      record_heap[heapi][i] = &(all_records[k]);
+      debug2(printf(" %u+%d",diagonals[i],querypos));
+      k++;
+    }
+    debug2(printf("\n"));
+  }
+
+  return record_heap;
+}
+
+
+
+/* For initializing heap, there are three categories:
+   base..(heapsize % PYRAMID_SIZE) + PYRAMID_SIZE: Fill bottom row
+   straddling heapsize: Pull down some nodes to bottom row
+   heapsize..(2*base - 1): Fill penultimate row */
+Record_T *
+Merge_records (int *nelts1, List_T stream_list, Intlist_T streamsize_list,
+	       Intlist_T querypos_list, Intlist_T diagterm_list, 
+	       struct Record_T *all_records) {
+  Record_T *result, **record_heap, curr;
+  UINT4 **key_heap, *prev_storage, *curr_storage;
+  int ptrs[PYRAMID_SIZE];
+  int *nelts, nalloc;
+  int nstreams, heapsize, base, ancestori, pyramid_start, pyramid_end,
+    node_start, node_end;
+  int bits;
+  int heapi, streami, i, j, k;
+
+  debug2(printf("Entered Merge_records\n"));
+
+  if ((nstreams = List_length(stream_list)) == 0) {
+    *nelts1 = 0;
+    return (Record_T *) NULL;
+
+  } else {
+    heapsize = 2*nstreams - 1;	/* also index of last node */
+#ifdef HAVE_BUILTIN_CLZ
+    bits = 31 - __builtin_clz(heapsize);
+#elif defined(HAVE_ASM_BSR)
+    asm("bsr %1,%0" : "=r"(bits) : "r"(heapsize));
+#else
+    bits = 31 - ((heapsize >> 16) ? clz_table[heapsize >> 16] : 16 + clz_table[heapsize]); 
+#endif
+    base = (1 << bits);
+    debug2(printf("nstreams %d, heapsize %d, base %d\n",nstreams,heapsize,base));
+    record_heap = make_record_heap(&nelts,stream_list,streamsize_list,querypos_list,diagterm_list,
+				   nstreams,base,all_records);
+  }
+
+  if (nstreams == 1) {
+    *nelts1 = nelts[1];
+    result = record_heap[1];
+
+    FREE(nelts);
+    FREE(record_heap);
+
+#ifdef DEBUG2
+    printf("Merge_records returning result of length %d\n",*nelts1);
+    for (i = 0; i < *nelts1; i++) {
+      printf("%u %d\n",result[i]->diagonal,result[i]->querypos);
+    }
+#endif
+
+    return result;
+  }
+
+
+  key_heap = (UINT4 **) CALLOC(heapsize + PYRAMID_SIZE,sizeof(UINT4 *));
+
+  /* Middle pyramids */
+  while (base > PYRAMID_SIZE) {
+    for (pyramid_start = 2*base - PYRAMID_SIZE, pyramid_end = 2*base - 1; pyramid_start >= base;
+	 pyramid_start -= PYRAMID_SIZE, pyramid_end -= PYRAMID_SIZE) {
+      debug2(printf("records: pyramid_start %d, pyramid_end %d, nstreams %d",pyramid_start,pyramid_end,nstreams));
+
+      if (pyramid_start > heapsize) {
+	node_start = PARENT(pyramid_start);
+	node_end = PARENT(pyramid_end);
+	debug2(printf(" => node_start %d, node_end %d\n",node_start,node_end));
+      } else {
+	node_start = pyramid_start;
+	node_end = pyramid_end;
+      }
+      debug2(printf("\n"));
+
+      /* Allocate memory for the pyramid */
+      nalloc = 0;
+      /* Have to align start of each entry prev_storage[nalloc] and curr_storage[nalloc], regardless of end padding */
+#ifdef HAVE_SSE4_1
+      for (heapi = node_start; heapi <= node_end; heapi++) {
+	nalloc += PAD_UINT4(nelts[heapi]);
+      }
+      prev_storage = (UINT4 *) MALLOC_ALIGN(nalloc * sizeof(UINT4));
+      curr_storage = (UINT4 *) MALLOC_ALIGN(nalloc * sizeof(UINT4));
+#else
+      for (heapi = node_start; heapi <= node_end; heapi++) {
+        nalloc += nelts[heapi];
+      }
+      prev_storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4));
+      curr_storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4));
+#endif
+      
+      /* Convert structures to integers (key_heap) */
+      nalloc = 0;
+      for (heapi = node_start, streami = 0; heapi <= node_end; heapi++, streami++) {
+	debug2(printf("Creating key node %d from %p\n",heapi,record_heap[heapi]));
+	/* key_heap[heapi] = (UINT4 *) MALLOC((npadded + 1) * sizeof(UINT4)); */
+	key_heap[heapi] = &(prev_storage[nalloc]);
+	for (i = 0; i < nelts[heapi]; i++) {
+	  key_heap[heapi][i] = (record_heap[heapi][i]->diagonal & KEY_MASK) + streami;
+	}
+        /* Had to align start of each entry prev_storage[nalloc], regardless of end padding */
+#ifdef HAVE_SSE4_1
+	nalloc += PAD_UINT4(nelts[heapi]);
+#else
+	nalloc += nelts[heapi];
+#endif
+      }
+
+      ancestori = pyramid_merge_prealloc(key_heap,curr_storage,prev_storage,nelts,
+                                	 node_start,node_end);
+
+      /* Convert integers to structures */
+      record_heap[ancestori] = (Record_T *) MALLOC(nelts[ancestori] * sizeof(Record_T));
+      memset(ptrs,0,PYRAMID_SIZE*sizeof(int));
+      k = 0;
+      for (i = 0; i < nelts[ancestori]; i++) {
+	streami = key_heap[ancestori][i] & ~KEY_MASK;
+	record_heap[ancestori][k++] = record_heap[node_start + streami][ptrs[streami]++];
+      }
+      
+      /* Free base heaps */
+      for (heapi = node_start; heapi <= node_end; heapi++) {
+	FREE(record_heap[heapi]);
+      }
+      
+      /* Free key_heap storage */
+      FREE_ALIGN(prev_storage);
+      FREE_ALIGN(curr_storage);
+
+    }
+    base = ancestori;
+  }
+
+  /* Last pyramid */
+  pyramid_start = base;
+  pyramid_end = 2*base - 1;
+  debug2(printf("records: pyramid_start %d, pyramid_end %d, nstreams %d\n",pyramid_start,pyramid_end,nstreams));
+
+  /* Allocate memory for the pyramid */
+  nalloc = 0;
+
+  /* Have to align start of each entry prev_storage[nalloc] and curr_storage[nalloc], regardless of end padding */
+#ifdef HAVE_SSE4_1
+  for (heapi = pyramid_start; heapi <= pyramid_end; heapi++) {
+    nalloc += PAD_UINT4(nelts[heapi]);
+  }
+  prev_storage = (UINT4 *) MALLOC_ALIGN(nalloc * sizeof(UINT4));
+  curr_storage = (UINT4 *) MALLOC_ALIGN(nalloc * sizeof(UINT4));
+#else
+  for (heapi = pyramid_start; heapi <= pyramid_end; heapi++) {
+    nalloc += nelts[heapi];
+  }
+  prev_storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4));
+  curr_storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4));
+#endif
+
+  /* Convert structures to integers (key_heap) */
+  nalloc = 0;
+  for (heapi = pyramid_start, streami = 0; heapi <= pyramid_end; heapi++, streami++) {
+    /* key_heap[heapi] = (UINT4 *) MALLOC((npadded + 1) * sizeof(UINT4)); */
+    key_heap[heapi] = &(prev_storage[nalloc]);
+    for (i = 0; i < nelts[heapi]; i++) {
+      key_heap[heapi][i] = (record_heap[heapi][i]->diagonal & KEY_MASK) + streami;
+    }
+    /* Had to align start each entry prev_storage[nalloc], regardless of end padding */
+#ifdef HAVE_SSE4_1
+    nalloc += PAD_UINT4(nelts[heapi]);
+#else
+    nalloc += nelts[heapi];
+#endif
+  }
+
+  ancestori = pyramid_merge_prealloc(key_heap,curr_storage,prev_storage,
+				     nelts,pyramid_start,pyramid_end);
+  /* ancestori should be 1 */
+
+  /* Convert integers to structures */
+  record_heap[ancestori] = (Record_T *) MALLOC(nelts[ancestori] * sizeof(Record_T));
+  memset(ptrs,0,PYRAMID_SIZE*sizeof(int));
+  k = 0;
+  for (i = 0; i < nelts[ancestori]; i++) {
+    streami = key_heap[ancestori][i] & ~KEY_MASK;
+    record_heap[ancestori][k++] = record_heap[pyramid_start + streami][ptrs[streami]++];
+  }
+
+  /* Free base heaps (unless pyramid_start == 1, implying that base == 1) */
+  for (heapi = pyramid_start, streami = 0; heapi <= pyramid_end; heapi++, streami++) {
+    FREE(record_heap[pyramid_start + streami]);
+  }
+
+  /* Free key_heap storage */
+  FREE_ALIGN(prev_storage);
+  FREE_ALIGN(curr_storage);
+
+
+  *nelts1 = nelts[1];
+  result = record_heap[1];
+
+  /* Final insertion sort to correct for truncation of keys */
+  for (j = 1; j < *nelts1; j++) {
+    curr = result[j];
+    i = j - 1;
+    /* For a stable merge sort, is the second condition possible? */
+    while (i >= 0 && (result[i]->diagonal > curr->diagonal ||
+		     (result[i]->diagonal == curr->diagonal &&
+		      result[i]->querypos > curr->querypos))) {
+      assert(result[i]->diagonal > curr->diagonal);
+      result[i+1] = result[i];
+      i--;
+    }
+    result[i+1] = curr;
+  }
+
+
+  FREE(key_heap);
+  FREE(nelts);
+  FREE(record_heap);
+
+#ifdef DEBUG2
+  printf("Merge_records returning result of length %d\n",*nelts1);
+  for (i = 0; i < *nelts1; i++) {
+    printf("%u %d\n",result[i]->diagonal,result[i]->querypos);
+  }
+#endif
+
+  return result;
+}
+
+
diff --git a/src/merge.h b/src/merge.h
new file mode 100644
index 0000000..b4e8e92
--- /dev/null
+++ b/src/merge.h
@@ -0,0 +1,45 @@
+#ifndef MERGE_INCLUDED
+#define MERGE_INCLUDED
+#ifdef HAVE_CONFIG_H
+#include <config.h>		/* For HAVE_64_BIT */
+#endif
+
+#include "types.h"
+#include "list.h"
+#include "intlist.h"
+
+
+/* Pad lengths at end for row-based storage */
+#ifdef HAVE_AVX512
+#define PAD_UINT4(x) (((x + 15)/16) * 16)
+#elif defined(HAVE_AVX2)
+#define PAD_UINT4(x) (((x + 7)/8) * 8)
+#elif defined(HAVE_SSE4_1)
+#define PAD_UINT4(x) (((x + 3)/4) * 4)
+#else
+#define PAD_UINT4(x) (x)
+#endif
+
+
+typedef struct Record_T *Record_T;
+struct Record_T {
+  Univcoord_T diagonal;		/* Primary sort */
+  int querypos;			/* Secondary sort */
+};
+
+
+extern unsigned int *
+Merge_uint4 (unsigned int *__restrict__ dest, unsigned int *__restrict__ A,
+	     unsigned int *__restrict__ B, int nA, int nB);
+
+extern UINT4 *
+Merge_diagonals (int *nelts1, List_T stream_list, Intlist_T streamsize_list);
+
+extern Record_T *
+Merge_records (int *nelts1, List_T stream_list, Intlist_T streamsize_list,
+	       Intlist_T querypos_list, Intlist_T diagterm_list,
+	       struct Record_T *all_records);
+
+#endif
+
+
diff --git a/src/oligoindex_hr.c b/src/oligoindex_hr.c
index acb86f5..4884758 100644
--- a/src/oligoindex_hr.c
+++ b/src/oligoindex_hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: oligoindex_hr.c 184484 2016-02-18 03:11:53Z twu $";
+static char rcsid[] = "$Id: oligoindex_hr.c 203017 2017-01-27 22:42:24Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -17,6 +17,7 @@ static char rcsid[] = "$Id: oligoindex_hr.c 184484 2016-02-18 03:11:53Z twu $";
 #include "mem.h"
 #include "orderstat.h"
 #include "cmet.h"
+#include "atoi.h"
 
 #ifdef DEBUG14
 /* Need to change Makefile.am to include oligoindex_old.c and oligoindex_old.h */
@@ -39,22 +40,72 @@ static char rcsid[] = "$Id: oligoindex_hr.c 184484 2016-02-18 03:11:53Z twu $";
 #ifdef HAVE_AVX2
 #include <immintrin.h>
 #endif
+#ifdef HAVE_AVX512
+#include <immintrin.h>
+#endif
 
 
-#ifdef HAVE_SSE2
-#define USE_SIMD_FOR_COUNTS 1
-#endif
+#ifdef HAVE_AVX512
+/* AVX512 */
+#define EXTRACT(x,i) _mm_extract_epi32(x,i)
+#define EXTRACT256(x,i) _mm256_extract_epi32(x,i)
+
+#elif defined(HAVE_AVX2)
+/* AVX2 */
+#define EXTRACT(x,i) _mm_extract_epi32(x,i)
+#define EXTRACT256(x,i) _mm256_extract_epi32(x,i)
+
+#elif defined(HAVE_SSE4_2)
+/* SSE4.2 */
+#define USE_UNORDERED_9 1
+#define USE_UNORDERED_8 1
+#define USE_UNORDERED_7 1
+#define USE_UNORDERED_6 1
+#define USE_UNORDERED_5 1
+
+#define EXTRACT(x,i) _mm_extract_epi32(x,i)
+
+#elif defined(HAVE_SSE4_1)
+/* SSE4.1 */
+#define USE_UNORDERED_9 1
+#define USE_UNORDERED_8 1
+#define USE_UNORDERED_7 1
+#define USE_UNORDERED_6 1
+#define USE_UNORDERED_5 1
+
+#define EXTRACT(x,i) _mm_extract_epi32(x,i)
+
+#elif defined(HAVE_SSSE3)
+/* SSSE3 */
+#define USE_UNORDERED_9 1
+#define USE_UNORDERED_8 1
+#define USE_UNORDERED_7 1
+#define USE_UNORDERED_6 1
+#define USE_UNORDERED_5 1
 
-#if !defined(HAVE_SSE2)
-#define INDIVIDUAL_SHIFTS 1
-#elif !defined(HAVE_SSE4_1)
 #define SIMD_MASK_THEN_STORE
 #define EXTRACT(x,i) x[i]
-#elif !defined(HAVE_AVX2)
-#define EXTRACT(x,i) _mm_extract_epi32(x,i)
+
+#elif defined(HAVE_SSE2)
+/* SSE2 */
+#define USE_UNORDERED_9 1
+#define USE_UNORDERED_8 1
+#define USE_UNORDERED_7 1
+#define USE_UNORDERED_6 1
+#define USE_UNORDERED_5 1
+
+#define SIMD_MASK_THEN_STORE
+#define EXTRACT(x,i) x[i]
+
 #else
-#define EXTRACT(x,i) _mm_extract_epi32(x,i)
-#define EXTRACT256(x,i) _mm256_extract_epi32(x,i)
+/* non-SIMD */
+#define USE_UNORDERED_9 1
+#define USE_UNORDERED_8 1
+#define USE_UNORDERED_7 1
+#define USE_UNORDERED_6 1
+#define USE_UNORDERED_5 1
+
+#define INDIVIDUAL_SHIFTS 1
 #endif
 
 
@@ -89,7 +140,10 @@ struct T {
   /* bool query_evaluated_p; */
 
   Oligospace_T oligospace;
-#ifdef HAVE_AVX2
+#if defined(HAVE_AVX512)
+  __m512i *inquery_allocated;
+  __m512i *counts_allocated;
+#elif defined(HAVE_AVX2)
   __m256i *inquery_allocated;
   __m256i *counts_allocated;
 #elif defined(HAVE_SSE2)
@@ -101,8 +155,8 @@ struct T {
 
   Chrpos_T *table;
   UINT4 *positions;
-  UINT4 *pointers;
-  UINT4 *pointers_allocated;
+  /* UINT4 *pointers; */
+  /* UINT4 *pointers_allocated; */
 };
 
 struct Oligoindex_array_T {
@@ -158,38 +212,27 @@ struct Oligoindex_array_T {
 #endif
 
 
-#if defined(DEBUG)
+#if 1
 #ifdef HAVE_SSE2
 /* For debugging of SIMD procedures*/
 static void
 print_vector (__m128i x, char *label) {
-  __m128i a[1];
-  unsigned int *s = a;
+  unsigned int s[4];
 
-  _mm_store_si128(a,x);
+  _mm_store_si128((__m128i *) s,x);
   _mm_mfence();
-  printf("%s: %08X %u\n",label,s[0],s[0]);
-  printf("%s: %08X %u\n",label,s[1],s[1]);
-  printf("%s: %08X %u\n",label,s[2],s[2]);
-  printf("%s: %08X %u\n",label,s[3],s[3]);
+  printf("%s: %08X %08X %08X %08X\n",label,s[0],s[1],s[2],s[3]);
   return;
 }
 
 /* For debugging of SIMD procedures*/
 static void
 print_counts (__m128i x, char *label) {
-  __m128i a[1];
-  Count_T *s = a;
+  Count_T s[16];
 
-  _mm_store_si128(a,x);
+  _mm_store_si128((__m128i *) s,x);
   _mm_mfence();
   printf("%s:",label);
-#ifdef HAVE_AVX2
-  printf(" %u",s[0]);
-  printf(" %u",s[1]);
-  printf(" %u",s[2]);
-  printf(" %u",s[3]);
-#else
   printf(" %hd",s[0]);
   printf(" %hd",s[1]);
   printf(" %hd",s[2]);
@@ -206,7 +249,6 @@ print_counts (__m128i x, char *label) {
   printf(" %hd",s[13]);
   printf(" %hd",s[14]);
   printf(" %hd",s[15]);
-#endif
   printf("\n");
   return;
 }
@@ -215,11 +257,79 @@ print_counts (__m128i x, char *label) {
 #ifdef HAVE_AVX2
 static void
 print_counts_256 (__m256i x, char *label) {
-  __m256i a[1];
-  Count_T *s = a;
+  Count_T s[32];
+
+  _mm256_store_si256((__m256i *) s,x);
+  _mm_mfence();
+  printf("%s:",label);
+  printf(" %hd",s[0]);
+  printf(" %hd",s[1]);
+  printf(" %hd",s[2]);
+  printf(" %hd",s[3]);
+  printf(" %hd",s[4]);
+  printf(" %hd",s[5]);
+  printf(" %hd",s[6]);
+  printf(" %hd",s[7]);
+  printf(" %hd",s[8]);
+  printf(" %hd",s[9]);
+  printf(" %hd",s[10]);
+  printf(" %hd",s[11]);
+  printf(" %hd",s[12]);
+  printf(" %hd",s[13]);
+  printf(" %hd",s[14]);
+  printf(" %hd",s[15]);
+  printf(" %hd",s[16]);
+  printf(" %hd",s[17]);
+  printf(" %hd",s[18]);
+  printf(" %hd",s[19]);
+  printf(" %hd",s[20]);
+  printf(" %hd",s[21]);
+  printf(" %hd",s[22]);
+  printf(" %hd",s[23]);
+  printf(" %hd",s[24]);
+  printf(" %hd",s[25]);
+  printf(" %hd",s[26]);
+  printf(" %hd",s[27]);
+  printf(" %hd",s[28]);
+  printf(" %hd",s[29]);
+  printf(" %hd",s[30]);
+  printf(" %hd",s[31]);
+  printf("\n");
+  return;
+}
+
+/* For debugging of SIMD procedures*/
+static void
+print_vector_256 (__m256i x, char *label) {
+  unsigned int s[8];
+
+  _mm256_store_si256((__m256i *) s,x);
+  _mm_mfence();
+#if 0
+  printf("%s: %08X %u\n",label,s[0],s[0]);
+  printf("%s: %08X %u\n",label,s[1],s[1]);
+  printf("%s: %08X %u\n",label,s[2],s[2]);
+  printf("%s: %08X %u\n",label,s[3],s[3]);
+  printf("%s: %08X %u\n",label,s[4],s[4]);
+  printf("%s: %08X %u\n",label,s[5],s[5]);
+  printf("%s: %08X %u\n",label,s[6],s[6]);
+  printf("%s: %08X %u\n",label,s[7],s[7]);
+#else
+  printf("%s: %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	 label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7]);
+#endif
+  return;
+}
+#endif
+
+#ifdef HAVE_AVX512
+static void
+print_counts_512 (__m512i x, char *label) {
+  Count_T s[64];
 
-  _mm256_store_si256(a,x);
+  _mm512_store_si512((__m512i *) s,x);
   _mm_mfence();
+#if 0
   printf("%s:",label);
   printf(" %u",s[0]);
   printf(" %u",s[1]);
@@ -229,18 +339,30 @@ print_counts_256 (__m256i x, char *label) {
   printf(" %u",s[5]);
   printf(" %u",s[6]);
   printf(" %u",s[7]);
+  printf(" %u",s[8]);
+  printf(" %u",s[9]);
+  printf(" %u",s[10]);
+  printf(" %u",s[11]);
+  printf(" %u",s[12]);
+  printf(" %u",s[13]);
+  printf(" %u",s[14]);
+  printf(" %u",s[15]);
   printf("\n");
+#else
+  printf("%s: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n",
+    label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]);
+#endif
   return;
 }
 
 /* For debugging of SIMD procedures*/
 static void
-print_vector_256 (__m256i x, char *label) {
-  __m256i a[1];
-  unsigned int *s = a;
+print_vector_512 (__m512i x, char *label) {
+  unsigned int s[16];
 
-  _mm256_store_si256(a,x);
+  _mm512_store_si512((__m512i *) s,x);
   _mm_mfence();
+#if 0
   printf("%s: %08X %u\n",label,s[0],s[0]);
   printf("%s: %08X %u\n",label,s[1],s[1]);
   printf("%s: %08X %u\n",label,s[2],s[2]);
@@ -249,6 +371,18 @@ print_vector_256 (__m256i x, char *label) {
   printf("%s: %08X %u\n",label,s[5],s[5]);
   printf("%s: %08X %u\n",label,s[6],s[6]);
   printf("%s: %08X %u\n",label,s[7],s[7]);
+  printf("%s: %08X %u\n",label,s[8],s[8]);
+  printf("%s: %08X %u\n",label,s[9],s[9]);
+  printf("%s: %08X %u\n",label,s[10],s[10]);
+  printf("%s: %08X %u\n",label,s[11],s[11]);
+  printf("%s: %08X %u\n",label,s[12],s[12]);
+  printf("%s: %08X %u\n",label,s[13],s[13]);
+  printf("%s: %08X %u\n",label,s[14],s[14]);
+  printf("%s: %08X %u\n",label,s[15],s[15]);
+#else
+  printf("%s: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n",
+    label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]);
+#endif
   return;
 }
 #endif
@@ -8527,9 +8661,7 @@ static int suffnconsecutives_minor[NOLIGOINDICES_MINOR] = {10, 10, 10};
 static Genomecomp_T *ref_blocks;
 static Mode_T mode;
 
-#define USE_GATHER 1
-
-#ifdef USE_SIMD_FOR_COUNTS
+#ifdef HAVE_SSE2
 static __m128i mask9;
 static __m128i mask8;
 static __m128i mask7;
@@ -8537,32 +8669,46 @@ static __m128i mask6;
 static __m128i mask5;
 #endif
 
-#ifdef HAVE_AVX2
-#ifdef CHECK_FOR_OVERFLOW
-static __m128i maxcount128;
-static __m256i maxcount256;
-#endif
-static __m256i shift0to14;
-/* static __m256i low8; */
-static __m256i low7;
-static __m256i low6;
-static __m256i low5;
-static __m256i low4;
+#ifdef HAVE_SSE4_1
+static __m128i mask7_epi16;
+static __m128i mask6_epi16;
+static __m128i mask5_epi16;
+#endif
+
+#if defined(HAVE_AVX2)
+static __m256i bigshift0to14;
 static __m256i bigmask9;
 static __m256i bigmask8;
 static __m256i bigmask7;
 static __m256i bigmask6;
 static __m256i bigmask5;
-static __m256i byfours;
-static __m256i byeights;
+static __m256i bigmask7_epi16;
+static __m256i bigmask6_epi16;
+static __m256i bigmask5_epi16;
+#endif
+
+#ifdef HAVE_AVX512
+static __m512i hugeshift0to14;
+static __m512i hugemask9;
+static __m512i hugemask8;
+static __m512i hugemask7;
+static __m512i hugemask6;
+static __m512i hugemask5;
+static __m512i highmask8;
+static __m512i highmask7;
+static __m512i highmask6;
+static __m512i highmask5;
+
 #endif
 
 
+
 void
 Oligoindex_hr_setup (Genomecomp_T *ref_blocks_in, Mode_T mode_in) {
   ref_blocks = ref_blocks_in;
   mode = mode_in;
-#ifdef USE_SIMD_FOR_COUNTS
+
+#ifdef HAVE_SSE2
   mask9 = _mm_set1_epi32(262143U);
   mask8 = _mm_set1_epi32(65535U);
   mask7 = _mm_set1_epi32(16383U);
@@ -8570,26 +8716,38 @@ Oligoindex_hr_setup (Genomecomp_T *ref_blocks_in, Mode_T mode_in) {
   mask5 = _mm_set1_epi32(1023U);
 #endif
 
-#ifdef HAVE_AVX2
-#ifdef CHECK_FOR_OVERFLOW
-  maxcount128 = _mm_set1_epi32(MAXCOUNT);
-  maxcount256 = _mm256_set1_epi32(MAXCOUNT);
-#endif
-  shift0to14 = _mm256_setr_epi32(0,2,4,6,8,10,12,14);
-  /* low8 = _mm256_setr_epi32(-1U,-1U,-1U,-1U,-1U,-1U,-1U,-1U); */
-  low7 = _mm256_setr_epi32(-1U,-1U,-1U,-1U,-1U,-1U,-1U, 0U);
-  low6 = _mm256_setr_epi32(-1U,-1U,-1U,-1U,-1U,-1U, 0U, 0U);
-  low5 = _mm256_setr_epi32(-1U,-1U,-1U,-1U,-1U, 0U, 0U, 0U);
-  low4 = _mm256_setr_epi32(-1U,-1U,-1U,-1U, 0U, 0U, 0U, 0U);
+#ifdef HAVE_SSE4_1
+  mask7_epi16 = _mm_set1_epi16(16383U);
+  mask6_epi16 = _mm_set1_epi16(4095U);
+  mask5_epi16 = _mm_set1_epi16(1023U);
+#endif
+
+#if defined(HAVE_AVX2)
+  bigshift0to14 = _mm256_setr_epi32(0,2,4,6,8,10,12,14);
   bigmask9 = _mm256_set1_epi32(262143U);
   bigmask8 = _mm256_set1_epi32(65535U);
   bigmask7 = _mm256_set1_epi32(16383U);
   bigmask6 = _mm256_set1_epi32(4095U);
   bigmask5 = _mm256_set1_epi32(1023U);
-  byfours = _mm256_setr_epi32(28,24,20,16,12,8,4,0);
-  byeights = _mm256_setr_epi32(56,48,40,32,24,16,8,0);
+  bigmask7_epi16 = _mm256_set1_epi16(16383U);
+  bigmask6_epi16 = _mm256_set1_epi16(4095U);
+  bigmask5_epi16 = _mm256_set1_epi16(1023U);
 #endif
 
+#ifdef HAVE_AVX512
+  hugeshift0to14 = _mm512_setr_epi32(0,2,4,6,8,10,12,14, 0,2,4,6,8,10,12,14);
+  hugemask9 = _mm512_set1_epi32(262143U);
+  hugemask8 = _mm512_set1_epi32(65535U); /* 0x0000FFFF */
+  hugemask7 = _mm512_set1_epi32(16383U); /* 0x00003FFF */
+  hugemask6 = _mm512_set1_epi32(4095U);	 /* 0x00000FFF */
+  hugemask5 = _mm512_set1_epi32(1023U);	 /* 0x000003FF */
+  highmask8 = _mm512_set1_epi32(0xFFFF0000);
+  highmask7 = _mm512_set1_epi32(0x3FFF0000);
+  highmask6 = _mm512_set1_epi32(0x0FFF0000);
+  highmask5 = _mm512_set1_epi32(0x03FF0000);
+#endif
+
+
 #ifdef DEBUG14
   Oligoindex_old_setup(ref_blocks_in,mode_in);
 #endif
@@ -8627,7 +8785,14 @@ Oligoindex_new (int indexsize, int diag_lookback, int suffnconsecutive, Shortoli
   new->suffnconsecutive = suffnconsecutive;
 
   /* new->query_evaluated_p = false; */
-#ifdef HAVE_AVX2
+#if defined(HAVE_AVX512)
+  new->inquery_allocated = (__m512i *) _mm_malloc(new->oligospace * sizeof(Inquery_T),64);
+  new->counts_allocated = (__m512i *) _mm_malloc(new->oligospace * sizeof(Count_T),64);
+  assert((long) new->inquery_allocated % 64 == 0);
+  assert((long) new->counts_allocated % 64 == 0);
+  new->inquery = (Inquery_T *) new->inquery_allocated;
+  new->counts = (Count_T *) new->counts_allocated;
+#elif defined(HAVE_AVX2)
   new->inquery_allocated = (__m256i *) _mm_malloc(new->oligospace * sizeof(Inquery_T),32);
   new->counts_allocated = (__m256i *) _mm_malloc(new->oligospace * sizeof(Count_T),32);
   assert((long) new->inquery_allocated % 32 == 0);
@@ -8649,8 +8814,8 @@ Oligoindex_new (int indexsize, int diag_lookback, int suffnconsecutive, Shortoli
   memset((void *) new->inquery,INQUERY_FALSE,new->oligospace*sizeof(Inquery_T));
   memset((void *) new->counts,0,new->oligospace*sizeof(Count_T));
 
-  new->pointers_allocated = (UINT4 *) MALLOC((new->oligospace+1) * sizeof(UINT4));
-  new->pointers = &(new->pointers_allocated[1]);
+  /* new->pointers_allocated = (UINT4 *) MALLOC((new->oligospace+1) * sizeof(UINT4)); */
+  /* new->pointers = &(new->pointers_allocated[1]); */
   new->positions = (UINT4 *) MALLOC(new->oligospace * sizeof(UINT4));
   new->table = (Chrpos_T *) NULL;
 
@@ -8841,7 +9006,7 @@ dump_allocations (Chrpos_T **positions, Count_T *counts, int oligospace, int ind
       printf("Oligo_hr %s (%llu) => %u entries\n",
 	     nt,(unsigned long long) i,counts[i]);
     } else {
-      printf("Oligo_hr %s (%llu) => %u entries: allocation %p (%d entries)\n",
+      printf("Oligo_hr %s (%llu) => %u entries: allocation %p (%lu entries)\n",
 	     nt,(unsigned long long) i,counts[i],positions[i],positions[i] - lastptr);
       lastptr = positions[i];
     }
@@ -8882,31 +9047,40 @@ dump_positions (Chrpos_T *table, UINT4 *positions, Count_T *counts, Inquery_T *i
  *   Counting and storage procedures.  We count the number of
  *   occurrences of each oligomer in the genomic region, modulo 256
  *   (because Count_T is an unsigned char).  The allocate_positions
- *   procedure then assigns pointers_end (which start at the end of
- *   each positions block and go backward) and positions
- *   (which stay fixed) based on those counts, except that oligomers
- *   not in the query sequence have their counts set to 0, and have no
- *   space allocated.  However, during storage, if a pointer hits the
- *   beginning of the position block, that must mean that the count cycled
- *   past 255.  We set that count to be 0, so that oligomer is not used by
- *   Oligomer_get_mappings.  A count greater that 255 is overabundant
- *   and not useful in stage 2.
+ *   procedure then sets counts to 0 when oligomers are not in the
+ *   query sequence, and then assigns positions based on the counts.
+ *   During storage, we decrement count and store at positions +
+ *   count, which could lead to cycling if the count overflowed.
  ************************************************************************/
 
 /************************************************************************
+ *   Use SIMD to process 256 k-mers at a time:
+ *      extract_*mers_{fwd|rev}_simd_256 (AVX512)
+ *      extract_*mers_{fwd|rev}_simd_256_ordered (AVX512)
+ *
+ *   Use SIMD to process 128 k-mers at a time:
+ *      extract_*mers_{fwd|rev}_simd_128 (AVX2)
+ *      extract_*mers_{fwd|rev}_simd_128_ordered (AVX2)
+ *
  *   Use SIMD to process 64 k-mers at a time:
- *      extract_*mers_{fwd|rev}_simd
- *      count_fwdrev_simd
- *      store_fwdrev_simd
+ *      extract_*mers_{fwd|rev}_simd_64 (SSE2)
+ *      extract_*mers_{fwd|rev}_simd_64_ordered (SSE2 for 9mers, SSE4.1 for 8mers and smaller)
  *
- *   Now, extract_*mers_{fwd|rev}_simd plus count_fwdrev_simd has been merged
- *      into count_*mers_{fwd|rev}_simd.  However, we retain extract/store.
+ *      count_fwdrev_simd_n
+ *
+ *      store_fwdrev_simd_256 (AVX512)
+ *      store_fwdrev_simd_128
+ *      store_fwdrev_simd_64
+ *
+ *      store_fwdrev_simd_256_ordered
+ *      store_fwdrev_simd_128_ordered
+ *      store_fwdrev_simd_64_ordered (AVX2)
  *
  *   Use a special procedure to compute an odd block of 32 k-mers
- *      count_*mers_{fwd|rev}
+ *      count_*mers_{fwd|rev}_32
  *      This procedure can use SIMD if we compute backwards
  *
- *   Use a slow procedure to compute the start and end blocks
+ *   Use a serial procedure to compute the start and end blocks (< 32)
  *      count_*mers_{fwd|rev}_partial
  ************************************************************************/
 
@@ -8916,7 +9090,7 @@ dump_positions (Chrpos_T *table, UINT4 *positions, Count_T *counts, Inquery_T *i
  ************************************************************************/
 
 static void
-count_9mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev,
+count_9mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev,
 			 Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -8927,16 +9101,16 @@ count_9mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = nexthigh_rev >> ((96 - 2*9) - 2*pos);
     masked |= low_rev << (2*pos - (64 - 2*9));
     masked &= MASK9;
-    INCR_COUNT(counts[masked],inquery[masked]);
-    debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
+    INCR_COUNT(counts[masked]);
+    debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked]));
     pos--;
   }
 
   while (pos >= startdiscard && pos >= 16) {
     masked = low_rev >> ((64 - 2*9) - 2*pos);
     masked &= MASK9;
-    INCR_COUNT(counts[masked],inquery[masked]);
-    debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
+    INCR_COUNT(counts[masked]);
+    debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked]));
     pos--;
   }
 
@@ -8944,16 +9118,16 @@ count_9mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = low_rev >> ((64 - 2*9) - 2*pos);
     masked |= high_rev << (2*pos - (32 - 2*9));
     masked &= MASK9;
-    INCR_COUNT(counts[masked],inquery[masked]);
-    debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
+    INCR_COUNT(counts[masked]);
+    debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked]));
     pos--;
   }
 
   while (pos >= startdiscard) {
     masked = high_rev >> ((32 - 2*9) - 2*pos);
     masked &= MASK9;
-    INCR_COUNT(counts[masked],inquery[masked]);
-    debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
+    INCR_COUNT(counts[masked]);
+    debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked]));
     pos--;
   }
 
@@ -8961,7 +9135,7 @@ count_9mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
 }
 
 static int
-store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -8974,9 +9148,8 @@ store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= low_rev << (2*pos - (64 - 2*9));
     masked &= MASK9;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
-      debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      debug(printf("Storing masked %04X (%u) at %u (partial)\n",masked,masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -8986,9 +9159,8 @@ store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rev >> ((64 - 2*9) - 2*pos);
     masked &= MASK9;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
-      debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      debug(printf("Storing masked %04X (%u) at %u (partial)\n",masked,masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -8999,9 +9171,8 @@ store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rev << (2*pos - (32 - 2*9));
     masked &= MASK9;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
-      debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      debug(printf("Storing masked %04X (%u) at %u (partial)\n",masked,masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9011,9 +9182,8 @@ store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rev >> ((32 - 2*9) - 2*pos);
     masked &= MASK9;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
-      debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      debug(printf("Storing masked %04X (%u) at %u (partial)\n",masked,masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9024,7 +9194,7 @@ store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 
 
 static void
-count_8mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev,
+count_8mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev,
 			 Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -9035,16 +9205,16 @@ count_8mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = nexthigh_rev >> ((96 - 2*8) - 2*pos);
     masked |= low_rev << (2*pos - (64 - 2*8));
     masked &= MASK8;
-    INCR_COUNT(counts[masked],inquery[masked]);
-    debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
+    INCR_COUNT(counts[masked]);
+    debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked]));
     pos--;
   }
 
   while (pos >= startdiscard && pos >= 16) {
     masked = low_rev >> ((64 - 2*8) - 2*pos);
     masked &= MASK8;
-    INCR_COUNT(counts[masked],inquery[masked]);
-    debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
+    INCR_COUNT(counts[masked]);
+    debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked]));
     pos--;
   }
 
@@ -9052,16 +9222,16 @@ count_8mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = low_rev >> ((64 - 2*8) - 2*pos);
     masked |= high_rev << (2*pos - (32 - 2*8));
     masked &= MASK8;
-    INCR_COUNT(counts[masked],inquery[masked]);
-    debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
+    INCR_COUNT(counts[masked]);
+    debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked]));
     pos--;
   }
 
   while (pos >= startdiscard) {
     masked = high_rev >> ((32 - 2*8) - 2*pos);
     masked &= MASK8;
-    INCR_COUNT(counts[masked],inquery[masked]);
-    debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
+    INCR_COUNT(counts[masked]);
+    debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked]));
     pos--;
   }
 
@@ -9069,7 +9239,7 @@ count_8mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
 }
 
 static int
-store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -9082,9 +9252,8 @@ store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= low_rev << (2*pos - (64 - 2*8));
     masked &= MASK8;
      if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9094,9 +9263,8 @@ store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rev >> ((64 - 2*8) - 2*pos);
     masked &= MASK8;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9107,9 +9275,8 @@ store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rev << (2*pos - (32 - 2*8));
     masked &= MASK8;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9119,9 +9286,8 @@ store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rev >> ((32 - 2*8) - 2*pos);
     masked &= MASK8;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9132,7 +9298,7 @@ store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 
 
 static void
-count_7mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev,
+count_7mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev,
 			 Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -9143,7 +9309,7 @@ count_7mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = nexthigh_rev >> ((96 - 2*7) - 2*pos);
     masked |= low_rev << (2*pos - (64 - 2*7));
     masked &= MASK7;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9151,7 +9317,7 @@ count_7mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
   while (pos >= startdiscard && pos >= 16) {
     masked = low_rev >> ((64 - 2*7) - 2*pos);
     masked &= MASK7;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9160,7 +9326,7 @@ count_7mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = low_rev >> ((64 - 2*7) - 2*pos);
     masked |= high_rev << (2*pos - (32 - 2*7));
     masked &= MASK7;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9168,7 +9334,7 @@ count_7mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
   while (pos >= startdiscard) {
     masked = high_rev >> ((32 - 2*7) - 2*pos);
     masked &= MASK7;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9177,7 +9343,7 @@ count_7mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
 }
 
 static int
-store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -9190,9 +9356,8 @@ store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= low_rev << (2*pos - (64 - 2*7));
     masked &= MASK7;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9202,9 +9367,8 @@ store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rev >> ((64 - 2*7) - 2*pos);
     masked &= MASK7;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9215,9 +9379,8 @@ store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rev << (2*pos - (32 - 2*7));
     masked &= MASK7;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9227,9 +9390,8 @@ store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rev >> ((32 - 2*7) - 2*pos);
     masked &= MASK7;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9240,7 +9402,7 @@ store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 
 
 static void
-count_6mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev,
+count_6mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev,
 			 Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -9251,7 +9413,7 @@ count_6mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = nexthigh_rev >> ((96 - 2*6) - 2*pos);
     masked |= low_rev << (2*pos - (64 - 2*6));
     masked &= MASK6;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9259,7 +9421,7 @@ count_6mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
   while (pos >= startdiscard && pos >= 16) {
     masked = low_rev >> ((64 - 2*6) - 2*pos);
     masked &= MASK6;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9268,7 +9430,7 @@ count_6mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = low_rev >> ((64 - 2*6) - 2*pos);
     masked |= high_rev << (2*pos - (32 - 2*6));
     masked &= MASK6;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9276,7 +9438,7 @@ count_6mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
   while (pos >= startdiscard) {
     masked = high_rev >> ((32 - 2*6) - 2*pos);
     masked &= MASK6;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9285,7 +9447,7 @@ count_6mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
 }
 
 static int
-store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -9298,9 +9460,8 @@ store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= low_rev << (2*pos - (64 - 2*6));
     masked &= MASK6;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9310,9 +9471,8 @@ store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rev >> ((64 - 2*6) - 2*pos);
     masked &= MASK6;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9323,9 +9483,8 @@ store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rev << (2*pos - (32 - 2*6));
     masked &= MASK6;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9335,9 +9494,8 @@ store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rev >> ((32 - 2*6) - 2*pos);
     masked &= MASK6;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9348,7 +9506,7 @@ store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 
 
 static void
-count_5mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev,
+count_5mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev,
 			 Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -9359,7 +9517,7 @@ count_5mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = nexthigh_rev >> ((96 - 2*5) - 2*pos);
     masked |= low_rev << (2*pos - (64 - 2*5));
     masked &= MASK5;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9367,7 +9525,7 @@ count_5mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
   while (pos >= startdiscard && pos >= 16) {
     masked = low_rev >> ((64 - 2*5) - 2*pos);
     masked &= MASK5;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9376,7 +9534,7 @@ count_5mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
     masked = low_rev >> ((64 - 2*5) - 2*pos);
     masked |= high_rev << (2*pos - (32 - 2*5));
     masked &= MASK5;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9384,7 +9542,7 @@ count_5mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
   while (pos >= startdiscard) {
     masked = high_rev >> ((32 - 2*5) - 2*pos);
     masked &= MASK5;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos--;
   }
@@ -9393,7 +9551,7 @@ count_5mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_
 }
 
 static int
-store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -9406,9 +9564,8 @@ store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= low_rev << (2*pos - (64- 2*5));
     masked &= MASK5;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9418,9 +9575,8 @@ store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rev >> ((64 - 2*5) - 2*pos);
     masked &= MASK5;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9431,9 +9587,8 @@ store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rev << (2*pos - (32 - 2*5));
     masked &= MASK5;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9443,9 +9598,8 @@ store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rev >> ((32 - 2*5) - 2*pos);
     masked &= MASK5;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos--;
@@ -9455,62 +9609,6 @@ store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 }
 
 
-
-#if 0
-/* Note; for AVX2 and AVX512 */
-/* Variable bit shift right logical (VPSRLVD/Q) */
-_varcount is  16, 14, 12, 10, 8, 6, 4 2 in eight 32-bit quantities in __m256i
-_high_rev is broadcast in eight 32-bit quantities in __m256i
-
-  _mm256_slrv_epi32(_high_rev,_varcount);
-  Then need to mask
-  (Gather in AVX2)
-  (Scatter in AVX-512)
-
-
-  Previously, did
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  _counts = _mm256_add_epi32(_counts,ones256);
-
-   Problem: Cannot add ones if any oligo repeats itself within the same SIMD register
-   Need to wait for conflict instructions from AVX512:
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask9);
-  _conflict = _mm256_conflict_epi32(_masked);
-  if (_conflict is zero) then
-    _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-    _counts = _mm256_add_epi32(_counts,ones256);
-  } else {
-    Increment counts manually, or account for conflict
-  }
-
-
-
-  _counts = _mm_i32gather_epi32(counts,_masked,/*scale*/4);
-  _counts = _mm_add_epi32(_counts,ones);
-  _mm_i32scatter_epi32(counts,_masked,/*scale*/4);
-
-  /* Need to change pointers and positions to be indices into a set of values */
-  _pointers = _mm256_i32gather_epi32(pointers,_masked,/*scale*/4);
-  _positions = _mm256_i32gather_epi32(positions,_masked,/*scale*/4);
-  _space = _mm256_sub_epi32(_positions,_pointers);
-  _pointers = _mm256_sub_epi32(_pointers,ones); /* New pointers */
-  _chrpos = _mm256_sub_epi32(_mm256_set1_epi32(chrpos),ramp);
-
-  _mm256_mask_i32scatter_epi32(pointers,_masked,_pointers,/*scale*/4);
-  _mm256_mask_i32scatter_epi32(values,_pointers,_chrpos,/*scale*/4);
-
-  if (EXTRACT256(_space,0)) {
-    pointer = EXTRACT256(_pointers,0);
-    *pointer = EXTRACT256(_chrpos,0);
-  }
-
-#endif
-
-
 #if 0
   /* Replaced by individual count_*mer_{fwd|rev}_simd procedures */
   /* array is filled by extract_*mers_{fwd|rev}_simd */
@@ -9639,2571 +9737,1728 @@ count_fwdrev_simd (Count_T *counts, UINT4 *array) {
 #endif
 
 
-#ifdef USE_SIMD_FOR_COUNTS
-/* Forward and reverse procedures are identical, because forward has
-   chrpos ascending from left and reverse has chrpos ascending from
-   right */
-static Chrpos_T
-store_fwdrev_simd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		   UINT4 *array) {
-#if defined(HAVE_AVX2) && defined(USE_GATHER)
-  __m256i _counts, _masked;
+#ifdef HAVE_AVX512
+/* Uses gather, conflict detection, and scatter.  Not worth it for
+   AVX2, since we don't have conflict detection or scatter */
+static void
+count_fwdrev_simd_n (Count_T *counts, UINT4 *array, int n) {
+  UINT4 *ptr;
+  __m512i _envelopes, _increment;
+  __m512i _masked, _conflicts, _blocks, _address_mask;
+  __m512i _zeroes;
+  __mmask16 pending_mask, current_mask;
+  int i;
+
+#if defined(HAVE_AVX512BW)
+  __m512i _addresses, _ones;
+#elif defined(USE_ROTATE)
+  __m512i _rotates;
 #else
-  Genomecomp_T masked;
+  __m512i _new_envelopes, _addresses, _add_mask, _byte_mask, _ones;
 #endif
 
-  /* Row 3 */
-#if defined(HAVE_AVX2) && defined(USE_GATHER)
-  _masked = _mm256_i32gather_epi32((int *) &(array[32+3]),byfours,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[63]]);
-  assert(EXTRACT256(_counts,1) == counts[array[59]]);
-  assert(EXTRACT256(_counts,2) == counts[array[55]]);
-  assert(EXTRACT256(_counts,3) == counts[array[51]]);
-  assert(EXTRACT256(_counts,4) == counts[array[47]]);
-  assert(EXTRACT256(_counts,5) == counts[array[43]]);
-  assert(EXTRACT256(_counts,6) == counts[array[39]]);
-  assert(EXTRACT256(_counts,7) == counts[array[35]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos,chrpos,0));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 1,chrpos,1));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 1;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 2,chrpos,2));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 2;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 3,chrpos,3));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 3;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 4,chrpos,4));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 4;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 5,chrpos,5));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 5;
+  
+#ifdef DEBUG
+  if (n == 64) {
+    printf("Counting of %d\n",n);
+    for (i = 0; i < n; i += 4) {
+      printf("%d: %08X %08X %08X %08X\n",i,array[i],array[i+1],array[i+2],array[i+3]);
     }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 6,chrpos,6));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 6;
+  } else if (n == 128) {
+    printf("Counting of %d\n",n);
+    for (i = 0; i < n; i += 8) {
+      printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	     i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7]);
     }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 7,chrpos,7));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 7;
+  } else if (n == 256) {
+    printf("Counting of %d\n",n);
+    for (i = 0; i < n; i += 16) {
+      printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	     i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7],
+	     array[i+8],array[i+9],array[i+10],array[i+11],array[i+12],array[i+13],array[i+14],array[i+15]);
+      
     }
   }
+#endif
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[3]),byfours,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[31]]);
-  assert(EXTRACT256(_counts,1) == counts[array[27]]);
-  assert(EXTRACT256(_counts,2) == counts[array[23]]);
-  assert(EXTRACT256(_counts,3) == counts[array[19]]);
-  assert(EXTRACT256(_counts,4) == counts[array[15]]);
-  assert(EXTRACT256(_counts,5) == counts[array[11]]);
-  assert(EXTRACT256(_counts,6) == counts[array[7]]);
-  assert(EXTRACT256(_counts,7) == counts[array[3]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 8,chrpos,8));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 8;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 9,chrpos,9));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 9;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 10,chrpos,10));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 10;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 11,chrpos,11));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 11;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 12,chrpos,12));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 12;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 13,chrpos,13));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 13;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 14,chrpos,14));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 14;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 15,chrpos,15));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 15;
-    }
-  }
-#else
-  masked = array[63];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos,chrpos,0));
-    table[--pointers[masked]] = chrpos;
-  }
+  _address_mask = _mm512_set1_epi32(0x3);
+  _zeroes = _mm512_setzero_si512();
 
-  masked = array[59];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 1,chrpos,1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
 
-  masked = array[55];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 2,chrpos,2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
+#if defined(HAVE_AVX512BW)
+  _ones = _mm512_set1_epi32(1);
+#elif defined(USE_ROTATE)
+  _increment = _mm512_set1_epi32(0x01000000); /* Add 1 to most significante byte */
+#else
+  _ones = _mm512_set1_epi32(1);
+#endif
 
-  masked = array[51];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 3,chrpos,3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
 
-  masked = array[47];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 4,chrpos,4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
+  ptr = &(array[0]);
+#ifdef HAVE_AVX512BW
+  while (ptr < &(array[n])) {
+    _masked = _mm512_loadu_si512((__m512i *) ptr);
+    _blocks = _mm512_srli_epi32(_masked,2); /* div by 4 bytes/int */
+
+    _addresses = _mm512_and_si512(_masked,_address_mask);
+    _addresses = _mm512_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+
+    /* Note: Have to check for conflicts in _blocks, not _masked, since we update one address per block */
+    _conflicts = _mm512_conflict_epi32(_blocks);
+    pending_mask = 0xFFFF;
+    while (pending_mask) {
+      current_mask = _mm512_cmpeq_epi32_mask(_conflicts,_zeroes);
+      current_mask = current_mask & pending_mask;
+#if 0
+      _envelopes = _mm512_mask_i32gather_epi32(_zeroes,current_mask,_blocks,(const void *) counts,/*scale*/4);
+#else
+      _envelopes = _mm512_i32gather_epi32(_blocks,(const void *) counts,/*scale*/4); /* Not using mask */
+#endif
+      /* _increment = _mm512_sllv_epi32(_mm512_mask_set1_epi32(_zeroes,current_mask,1),_addresses); */
+      _increment = _mm512_sllv_epi32(_ones,_addresses); /* Puts 1 in correct byte, but not masked */
+      _envelopes = _mm512_add_epi8(_envelopes,_increment);
 
-  masked = array[43];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 5,chrpos,5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
+      _mm512_mask_i32scatter_epi32((void *) counts,current_mask,_blocks,_envelopes,/*scale*/4);
+      _conflicts = _mm512_andnot_si512(_mm512_set1_epi32(current_mask),_conflicts);
+      pending_mask = pending_mask & (~current_mask);
+    }
 
-  masked = array[39];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 6,chrpos,6));
-    table[--pointers[masked]] = chrpos - 6;
+    ptr += 16;
   }
 
-  masked = array[35];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 7,chrpos,7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
+#elif defined(USE_ROTATE)
+  /* rolv command is slow */
+  while (ptr < &(array[n])) {
+    _masked = _mm512_loadu_si512((__m512i *) ptr);
+    _blocks = _mm512_srli_epi32(_masked,2); /* div by 4 bytes/int */
+    _rotates = _mm512_andnot_si512(_masked,_address_mask); /* Faster way to subtract addresses from 3 */
+    _rotates = _mm512_slli_epi32(_rotates,3); /* Multiply by 8 bits/byte */
 
-  masked = array[31];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 8,chrpos,8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
+    /* Note: Have to check for conflicts in _blocks, not _masked, since we update one address per block */
+    _conflicts = _mm512_conflict_epi32(_blocks);
+    pending_mask = 0xFFFF;
+    while (pending_mask) {
+      current_mask = _mm512_cmpeq_epi32_mask(_conflicts,_zeroes);
+      current_mask = current_mask & pending_mask;
+#if 0
+      _envelopes = _mm512_mask_i32gather_epi32(_zeroes,current_mask,_blocks,(const void *) counts,/*scale*/4);
+#else
+      _envelopes = _mm512_i32gather_epi32(_blocks,(const void *) counts,/*scale*/4); /* Not using mask */
+#endif
 
-  masked = array[27];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 9,chrpos,9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
+      /* rolv command is slow */
+      _envelopes = _mm512_rolv_epi32(_envelopes,_rotates);
+      _envelopes = _mm512_add_epi32(_envelopes,_increment);
+      _envelopes = _mm512_rorv_epi32(_envelopes,_rotates);
 
-  masked = array[23];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 10,chrpos,10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
+      _mm512_mask_i32scatter_epi32((void *) counts,current_mask,_blocks,_envelopes,/*scale*/4);
+      _conflicts = _mm512_andnot_si512(_mm512_set1_epi32(current_mask),_conflicts);
+      pending_mask = pending_mask & (~current_mask);
+    }
 
-  masked = array[19];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 11,chrpos,11));
-    table[--pointers[masked]] = chrpos - 11;
+    ptr += 16;
   }
 
-  masked = array[15];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 12,chrpos,12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
+#else
+  _byte_mask = _mm512_set1_epi32(0xFF);
 
-  masked = array[11];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 13,chrpos,13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
+  while (ptr < &(array[n])) {
+    _masked = _mm512_loadu_si512((__m512i *) ptr);
+    _blocks = _mm512_srli_epi32(_masked,2); /* div by 4 bytes/int */
 
-  masked = array[7];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 14,chrpos,14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
+    _addresses = _mm512_and_si512(_masked,_address_mask);
+    _addresses = _mm512_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
 
-  masked = array[3];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 15,chrpos,15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
+    /* Note: Have to check for conflicts in _blocks, not _masked, since we update one address per block */
+    _conflicts = _mm512_conflict_epi32(_blocks);
+    pending_mask = 0xFFFF;
+    while (pending_mask) {
+      current_mask = _mm512_cmpeq_epi32_mask(_conflicts,_zeroes);
+      current_mask = current_mask & pending_mask;
+#if 0
+      _envelopes = _mm512_mask_i32gather_epi32(_zeroes,current_mask,_blocks,(const void *) counts,/*scale*/4);
+#else
+      _envelopes = _mm512_i32gather_epi32(_blocks,(const void *) counts,/*scale*/4); /* Not using mask */
 #endif
+      /* _increment = _mm512_sllv_epi32(_mm512_mask_set1_epi32(_zeroes,current_mask,1),_addresses); */
+      _increment = _mm512_sllv_epi32(_ones,_addresses); /* Puts 1 in correct byte, but not masked */
 
+      /* Need to add epi32, mask the carry, and combine previous solution */
+      _add_mask = _mm512_sllv_epi32(_byte_mask,_addresses);
+      _new_envelopes = _mm512_add_epi32(_envelopes,_increment);
+#if 0
+      _envelopes = _mm512_or_si512(_mm512_andnot_si512(_add_mask,_envelopes),_mm512_and_si512(_add_mask,_new_envelopes));
+#else
+      _envelopes = _mm512_ternarylogic_epi32(_add_mask,_envelopes,_new_envelopes,0xAC);
+#endif
 
-  /* Row 2 */
-#if defined(HAVE_AVX2) && defined(USE_GATHER)
-  _masked = _mm256_i32gather_epi32((int *) &(array[32+2]),byfours,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[62]]);
-  assert(EXTRACT256(_counts,1) == counts[array[58]]);
-  assert(EXTRACT256(_counts,2) == counts[array[54]]);
-  assert(EXTRACT256(_counts,3) == counts[array[50]]);
-  assert(EXTRACT256(_counts,4) == counts[array[46]]);
-  assert(EXTRACT256(_counts,5) == counts[array[42]]);
-  assert(EXTRACT256(_counts,6) == counts[array[38]]);
-  assert(EXTRACT256(_counts,7) == counts[array[34]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 16,chrpos,16));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 16;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 17,chrpos,17));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 17;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 18,chrpos,18));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 18;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 19,chrpos,19));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 19;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 20,chrpos,20));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 20;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 21,chrpos,21));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 21;
+      _mm512_mask_i32scatter_epi32((void *) counts,current_mask,_blocks,_envelopes,/*scale*/4);
+      _conflicts = _mm512_andnot_si512(_mm512_set1_epi32(current_mask),_conflicts);
+      pending_mask = pending_mask & (~current_mask);
     }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 22,chrpos,22));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 22;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 23,chrpos,23));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 23;
-    }
-  }
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[2]),byfours,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[30]]);
-  assert(EXTRACT256(_counts,1) == counts[array[26]]);
-  assert(EXTRACT256(_counts,2) == counts[array[22]]);
-  assert(EXTRACT256(_counts,3) == counts[array[18]]);
-  assert(EXTRACT256(_counts,4) == counts[array[14]]);
-  assert(EXTRACT256(_counts,5) == counts[array[10]]);
-  assert(EXTRACT256(_counts,6) == counts[array[6]]);
-  assert(EXTRACT256(_counts,7) == counts[array[2]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 24,chrpos,24));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 24;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 25,chrpos,25));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 25;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 26,chrpos,26));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 26;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 27,chrpos,27));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 27;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 28,chrpos,28));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 28;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 29,chrpos,29));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 29;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 30,chrpos,30));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 30;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 31,chrpos,31));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 31;
-    }
+    ptr += 16;
   }
+#endif
+
+  return;
+}
+
 #else
-  masked = array[62];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 16,chrpos,16));
-    table[--pointers[masked]] = chrpos - 16;
+/* Serial */
+static void
+count_fwdrev_simd_n (Count_T *counts, UINT4 *array, int n) {
+  UINT4 *ptr;
+  
+#ifdef DEBUG
+  int i;
+  printf("Counting of %d\n",n);
+  for (i = 0; i < n; i += 16) {
+    printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	   i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7],
+	   array[i+8],array[i+9],array[i+10],array[i+11],array[i+12],array[i+13],array[i+14],array[i+15]);
   }
+#endif
 
-  masked = array[58];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 17,chrpos,17));
-    table[--pointers[masked]] = chrpos - 17;
+  ptr = &(array[0]);
+  while (ptr < &(array[n])) {
+    counts[*ptr++] += 1;
   }
 
-  masked = array[54];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 18,chrpos,18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
+  return;
+}
+#endif
 
-  masked = array[50];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 19,chrpos,19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
 
-  masked = array[46];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 20,chrpos,20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
+#define nonzero_p_32(diff) diff
 
-  masked = array[42];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 21,chrpos,21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
+#if !defined(HAVE_SSE4_2)
+#define count_trailing_zeroes_32(diff) mod_37_bit_position[(-diff & diff) % 37]
+#elif defined(HAVE_TZCNT)
+#define count_trailing_zeroes_32(diff) _tzcnt_u32(diff)
+#elif defined(HAVE_BUILTIN_CTZ)
+#define count_trailing_zeroes_32(diff) __builtin_ctz(diff)
+#else
+/* lowbit = -diff & diff */
+#define count_trailing_zeroes_32(diff) mod_37_bit_position[(-diff & diff) % 37]
+#endif
 
-  masked = array[38];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 22,chrpos,22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
+/* Slower: clear_lowbit(diff,relpos) diff -= (1 << relpos) */
+#define clear_lowbit_32(diff,relpos) (diff & (diff - 1));
 
-  masked = array[34];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 23,chrpos,23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
 
-  masked = array[30];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 24,chrpos,24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
 
-  masked = array[26];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 25,chrpos,25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
+#ifdef HAVE_SSE2
+/* Forward and reverse procedures are identical, because forward has
+   chrpos ascending from left and reverse has chrpos ascending from
+   right.  Right now using SSE2.  For AVX2, can use gather by shifting
+   bytes. */
+static Chrpos_T
+store_fwdrev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		      UINT4 *array) {
+  Genomecomp_T masked;
+  int relpos;
 
-  masked = array[22];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 26,chrpos,26));
-    table[--pointers[masked]] = chrpos - 26;
+#ifdef DEBUG
+  int i;
+  printf("Storing of %d\n",64);
+  for (i = 0; i < 64; i += 4) {
+    printf("%d: %08X %08X %08X %08X\n",i,array[i],array[i+1],array[i+2],array[i+3]);
   }
+#endif
 
-  masked = array[18];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 27,chrpos,27));
-    table[--pointers[masked]] = chrpos - 27;
+  /* Row 3 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[63 - relpos*4];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[14];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 28,chrpos,28));
-    table[--pointers[masked]] = chrpos - 28;
+  /* Row 2 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[62 - relpos*4];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[10];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 29,chrpos,29));
-    table[--pointers[masked]] = chrpos - 29;
+  /* Row 1 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[61 - relpos*4];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[6];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 30,chrpos,30));
-    table[--pointers[masked]] = chrpos - 30;
+  /* Row 0 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[60 - relpos*4];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[2];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 31,chrpos,31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
+  return chrpos;
+}
 #endif
 
+#ifdef HAVE_AVX2
+static Chrpos_T
+store_fwdrev_simd_64_ordered (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			      UINT4 *array) {
+  Genomecomp_T masked;
+  UINT4 *ptr;
+  __m128i _present, _counts, _zeroes;
+  __m128i _masked, _blocks, _envelopes, _addresses, _address_mask, _byte_mask;
+  unsigned int diff_32;
+  int relpos;
+  int i;
 
-  /* Row 1 */
-#if defined(HAVE_AVX2) && defined(USE_GATHER)
-  _masked = _mm256_i32gather_epi32((int *) &(array[32+1]),byfours,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[61]]);
-  assert(EXTRACT256(_counts,1) == counts[array[57]]);
-  assert(EXTRACT256(_counts,2) == counts[array[53]]);
-  assert(EXTRACT256(_counts,3) == counts[array[49]]);
-  assert(EXTRACT256(_counts,4) == counts[array[45]]);
-  assert(EXTRACT256(_counts,5) == counts[array[41]]);
-  assert(EXTRACT256(_counts,6) == counts[array[37]]);
-  assert(EXTRACT256(_counts,7) == counts[array[33]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 32,chrpos,32));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 32;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 33,chrpos,33));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 33;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 34,chrpos,34));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 34;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 35,chrpos,35));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 35;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 36,chrpos,36));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 36;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 37,chrpos,37));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 37;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 38,chrpos,38));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 38;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 39,chrpos,39));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 39;
-    }
+#ifdef DEBUG
+  printf("Storing of %d\n",64);
+  for (i = 0; i < 64; i += 4) {
+    printf("%d: %08X %08X %08X %08X\n",i,array[i],array[i+1],array[i+2],array[i+3]);
   }
+#endif
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[1]),byfours,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[29]]);
-  assert(EXTRACT256(_counts,1) == counts[array[25]]);
-  assert(EXTRACT256(_counts,2) == counts[array[21]]);
-  assert(EXTRACT256(_counts,3) == counts[array[17]]);
-  assert(EXTRACT256(_counts,4) == counts[array[13]]);
-  assert(EXTRACT256(_counts,5) == counts[array[9]]);
-  assert(EXTRACT256(_counts,6) == counts[array[5]]);
-  assert(EXTRACT256(_counts,7) == counts[array[1]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 40,chrpos,40));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 40;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 41,chrpos,41));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 41;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 42,chrpos,42));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 42;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 43,chrpos,43));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 43;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 44,chrpos,44));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 44;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 45,chrpos,45));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 45;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 46,chrpos,46));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 46;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 47,chrpos,47));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 47;
+  _address_mask = _mm_set1_epi32(0x3);
+  _byte_mask = _mm_set1_epi32(0xFF);
+  _zeroes = _mm_setzero_si128();
+
+  ptr = &(array[0]);
+  for (i = 0; i < 16; i++) {
+    _masked = _mm_load_si128((__m128i *) ptr);
+    _blocks = _mm_srli_epi32(_masked,2); /* div by 4 bytes/int */
+    _addresses = _mm_and_si128(_masked,_address_mask);
+    _addresses = _mm_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+    _envelopes = _mm_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+    _counts = _mm_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+    _counts = _mm_and_si128(_counts,_byte_mask);    /* Ignore bytes to left */
+
+    _present = _mm_cmpgt_epi32(_counts,_zeroes);
+    diff_32 = _mm_movemask_ps(_mm_castsi128_ps(_present));
+
+    while (nonzero_p_32(diff_32)) {
+      relpos = count_trailing_zeroes_32(diff_32);
+      masked = ptr[relpos];
+      if (counts[masked]) {
+	debug(printf("64: Storing masked %u (%08X) at %u (%u - %d) using relpos\n",masked,masked,chrpos - relpos,chrpos,relpos));
+	table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+      }
+      diff_32 = clear_lowbit_32(diff_32,relpos);
     }
-  }
-#else
-  masked = array[61];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 32,chrpos,32));
-    table[--pointers[masked]] = chrpos - 32;
-  }
 
-  masked = array[57];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 33,chrpos,33));
-    table[--pointers[masked]] = chrpos - 33;
+    chrpos -= 4;
+    ptr += 4;
   }
 
-  masked = array[53];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 34,chrpos,34));
-    table[--pointers[masked]] = chrpos - 34;
-  }
+  return chrpos;
+}
+#endif
 
-  masked = array[49];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 35,chrpos,35));
-    table[--pointers[masked]] = chrpos - 35;
-  }
 
-  masked = array[45];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 36,chrpos,36));
-    table[--pointers[masked]] = chrpos - 36;
-  }
+#ifdef HAVE_AVX2
+static Chrpos_T
+store_fwdrev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		       UINT4 *array) {
+  Genomecomp_T masked;
+  int relpos;
+#ifdef DEBUG
+  int i;
+#endif
 
-  masked = array[41];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 37,chrpos,37));
-    table[--pointers[masked]] = chrpos - 37;
+#ifdef DEBUG
+  printf("Storage of 128\n");
+  for (i = 0; i < 128; i += 8) {
+    printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	   i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7]);
   }
+#endif
 
-  masked = array[37];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 38,chrpos,38));
-    table[--pointers[masked]] = chrpos - 38;
+  /* Row 7 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[127 - relpos*8];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[33];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 39,chrpos,39));
-    table[--pointers[masked]] = chrpos - 39;
+  /* Row 6 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[126 - relpos*8];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[29];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 40,chrpos,40));
-    table[--pointers[masked]] = chrpos - 40;
-  }
 
-  masked = array[25];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 41,chrpos,41));
-    table[--pointers[masked]] = chrpos - 41;
+  /* Row 5 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[125 - relpos*8];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[21];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 42,chrpos,42));
-    table[--pointers[masked]] = chrpos - 42;
+
+  /* Row 4 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[124 - relpos*8];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[17];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 43,chrpos,43));
-    table[--pointers[masked]] = chrpos - 43;
+
+  /* Row 3 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[123 - relpos*8];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[13];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 44,chrpos,44));
-    table[--pointers[masked]] = chrpos - 44;
+
+  /* Row 2 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[122 - relpos*8];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[9];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 45,chrpos,45));
-    table[--pointers[masked]] = chrpos - 45;
+
+  /* Row 1 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[121 - relpos*8];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[5];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 46,chrpos,46));
-    table[--pointers[masked]] = chrpos - 46;
+
+  /* Row 0 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[120 - relpos*8];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[1];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 47,chrpos,47));
-    table[--pointers[masked]] = chrpos - 47;
+  return chrpos;
+}
+
+static Chrpos_T
+store_fwdrev_simd_128_ordered (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			       UINT4 *array) {
+  Genomecomp_T masked;
+  UINT4 *ptr;
+  __m256i _present, _counts, _zeroes;
+  __m256i _masked, _blocks, _envelopes, _addresses, _address_mask, _count_mask;
+  unsigned int diff_32;
+  int relpos;
+  int i;
+
+#ifdef DEBUG
+  printf("Storage of 128\n");
+  for (i = 0; i < 128; i += 8) {
+    printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	   i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7]);
   }
 #endif
 
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
+  _zeroes = _mm256_setzero_si256();
 
-  /* Row 0 */
-#if defined(HAVE_AVX2) && defined(USE_GATHER)
-  _masked = _mm256_i32gather_epi32((int *) &(array[32+0]),byfours,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[60]]);
-  assert(EXTRACT256(_counts,1) == counts[array[56]]);
-  assert(EXTRACT256(_counts,2) == counts[array[52]]);
-  assert(EXTRACT256(_counts,3) == counts[array[48]]);
-  assert(EXTRACT256(_counts,4) == counts[array[44]]);
-  assert(EXTRACT256(_counts,5) == counts[array[40]]);
-  assert(EXTRACT256(_counts,6) == counts[array[36]]);
-  assert(EXTRACT256(_counts,7) == counts[array[32]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 48,chrpos,48));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 48;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 49,chrpos,49));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 49;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 50,chrpos,50));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 50;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 51,chrpos,51));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 51;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 52,chrpos,52));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 52;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 53,chrpos,53));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 53;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 54,chrpos,54));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 54;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 55,chrpos,55));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 55;
+  ptr = &(array[0]);
+  for (i = 0; i < 16; i++) {
+    _masked = _mm256_load_si256((__m256i *) ptr);
+    _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+    _addresses = _mm256_and_si256(_masked,_address_mask);
+    _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+    _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+    _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+    _counts = _mm256_and_si256(_counts,_count_mask);    /* Ignore bytes to left */
+
+    _present = _mm256_cmpgt_epi32(_counts,_zeroes);
+    diff_32 = _mm256_movemask_ps(_mm256_castsi256_ps(_present));
+
+    while (nonzero_p_32(diff_32)) {
+      relpos = count_trailing_zeroes_32(diff_32);
+      masked = ptr[relpos];
+      if (counts[masked]) {
+	debug(printf("128: Storing masked %u (%08X) at %u (%u - %d) using relpos\n",masked,masked,chrpos - relpos,chrpos,relpos));
+	table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+      }
+      diff_32 = clear_lowbit_32(diff_32,relpos);
     }
+
+    chrpos -= 8;
+    ptr += 8;
   }
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[0]),byfours,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[28]]);
-  assert(EXTRACT256(_counts,1) == counts[array[24]]);
-  assert(EXTRACT256(_counts,2) == counts[array[20]]);
-  assert(EXTRACT256(_counts,3) == counts[array[16]]);
-  assert(EXTRACT256(_counts,4) == counts[array[12]]);
-  assert(EXTRACT256(_counts,5) == counts[array[8]]);
-  assert(EXTRACT256(_counts,6) == counts[array[4]]);
-  assert(EXTRACT256(_counts,7) == counts[array[0]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 56,chrpos,56));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 56;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 57,chrpos,57));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 57;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 58,chrpos,58));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 58;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 59,chrpos,59));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 59;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 60,chrpos,60));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 60;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 61,chrpos,61));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 61;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 62,chrpos,62));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 62;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 63,chrpos,63));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 63;
-    }
+  return chrpos;
+}
+#endif	/* HAVE_AVX2 */
+
+
+#ifdef HAVE_AVX512
+static Chrpos_T
+store_fwdrev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		       UINT4 *array) {
+  Genomecomp_T masked;
+  int relpos;
+#ifdef DEBUG
+  int i;
+#endif
+
+#ifdef DEBUG
+  printf("Storage of 256\n");
+  for (i = 0; i < 256; i += 16) {
+    printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	   i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7],
+	   array[i+8],array[i+9],array[i+10],array[i+11],array[i+12],array[i+13],array[i+14],array[i+15]);
   }
-#else
-  masked = array[60];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 48,chrpos,48));
-    table[--pointers[masked]] = chrpos - 48;
+#endif
+
+  /* Row 15 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[255 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[56];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 49,chrpos,49));
-    table[--pointers[masked]] = chrpos - 49;
+  /* Row 14 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[254 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[52];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 50,chrpos,50));
-    table[--pointers[masked]] = chrpos - 50;
+  /* Row 13 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[253 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[48];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 51,chrpos,51));
-    table[--pointers[masked]] = chrpos - 51;
+  /* Row 12 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[252 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[44];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 52,chrpos,52));
-    table[--pointers[masked]] = chrpos - 52;
+  /* Row 11 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[251 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[40];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 53,chrpos,53));
-    table[--pointers[masked]] = chrpos - 53;
+  /* Row 10 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[250 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[36];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 54,chrpos,54));
-    table[--pointers[masked]] = chrpos - 54;
+  /* Row 9 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[249 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[32];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 55,chrpos,55));
-    table[--pointers[masked]] = chrpos - 55;
+  /* Row 8 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[248 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[28];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 56,chrpos,56));
-    table[--pointers[masked]] = chrpos - 56;
+  /* Row 7 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[247 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[24];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 57,chrpos,57));
-    table[--pointers[masked]] = chrpos - 57;
+  /* Row 6 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[246 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[20];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 58,chrpos,58));
-    table[--pointers[masked]] = chrpos - 58;
+  /* Row 5 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[245 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[16];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 59,chrpos,59));
-    table[--pointers[masked]] = chrpos - 59;
+  /* Row 4 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[244 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[12];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 60,chrpos,60));
-    table[--pointers[masked]] = chrpos - 60;
+  /* Row 3 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[243 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[8];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 61,chrpos,61));
-    table[--pointers[masked]] = chrpos - 61;
+  /* Row 2 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[242 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[4];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 62,chrpos,62));
-    table[--pointers[masked]] = chrpos - 62;
+  /* Row 1 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[241 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
+  chrpos -= 16;
 
-  masked = array[0];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 63,chrpos,63));
-    table[--pointers[masked]] = chrpos - 63;
+  /* Row 0 */
+  for (relpos = 0; relpos < 16; relpos++) {
+    masked = array[240 - relpos*16];
+    if (counts[masked]) {
+      debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos));
+      table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+    }
   }
-#endif
+  chrpos -= 16;
 
-  return chrpos - 64;
+  return chrpos;
 }
 
-
-#ifdef HAVE_AVX2
-
-/* testz(counts,counts) == 0 implies there is a nonzero count */
 static Chrpos_T
-store_fwdrev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		       UINT4 *array) {
-#ifdef USE_GATHER
-  __m256i _counts, _masked;
-#else
+store_fwdrev_simd_256_ordered (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			       UINT4 *array) {
   Genomecomp_T masked;
-#endif
+  UINT4 *ptr;
+  __m512i _counts, _zeroes;
+  __m512i _masked, _blocks, _envelopes, _addresses, _address_mask, _count_mask;
+  __mmask16 diff_32;
+  int relpos;
+  int i;
 
 #ifdef DEBUG
-  int i;
-  for (i = 0; i < 128; i += 8) {
-    printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X\n",
-	   i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7]);
+  printf("Storage of 256\n");
+  for (i = 0; i < 256; i += 16) {
+    printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	   i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7],
+	   array[i+8],array[i+9],array[i+10],array[i+11],array[i+12],array[i+13],array[i+14],array[i+15]);
   }
 #endif
 
-  /* Row 7 */
-#ifdef USE_GATHER
-  _masked = _mm256_i32gather_epi32((int *) &(array[64+7]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[127]]);
-  assert(EXTRACT256(_counts,1) == counts[array[119]]);
-  assert(EXTRACT256(_counts,2) == counts[array[111]]);
-  assert(EXTRACT256(_counts,3) == counts[array[103]]);
-  assert(EXTRACT256(_counts,4) == counts[array[95]]);
-  assert(EXTRACT256(_counts,5) == counts[array[87]]);
-  assert(EXTRACT256(_counts,6) == counts[array[79]]);
-  assert(EXTRACT256(_counts,7) == counts[array[71]]);
-
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos,chrpos,0));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 1,chrpos,1));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 1;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 2,chrpos,2));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 2;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 3,chrpos,3));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 3;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 4,chrpos,4));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 4;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 5,chrpos,5));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 5;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 6,chrpos,6));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 6;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 7,chrpos,7));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 7;
-    }
-  }
+  _address_mask = _mm512_set1_epi32(0x3);
+  _count_mask = _mm512_set1_epi32(0xFF);
+  _zeroes = _mm512_setzero_si512();
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[7]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[63]]);
-  assert(EXTRACT256(_counts,1) == counts[array[55]]);
-  assert(EXTRACT256(_counts,2) == counts[array[47]]);
-  assert(EXTRACT256(_counts,3) == counts[array[39]]);
-  assert(EXTRACT256(_counts,4) == counts[array[31]]);
-  assert(EXTRACT256(_counts,5) == counts[array[23]]);
-  assert(EXTRACT256(_counts,6) == counts[array[15]]);
-  assert(EXTRACT256(_counts,7) == counts[array[7]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 8,chrpos,8));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 8;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 9,chrpos,9));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 9;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 10,chrpos,10));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 10;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 11,chrpos,11));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 11;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 12,chrpos,12));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 12;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 13,chrpos,13));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 13;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 14,chrpos,14));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 14;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 15,chrpos,15));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 15;
+  ptr = &(array[0]);
+  for (i = 0; i < 16; i++) {
+    _masked = _mm512_load_si512((__m512i *) ptr);
+    _blocks = _mm512_srli_epi32(_masked,2); /* div by 4 bytes/int */
+    _addresses = _mm512_and_si512(_masked,_address_mask);
+    _addresses = _mm512_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+    _envelopes = _mm512_i32gather_epi32(_blocks,(const void *) counts,/*scale*/4);
+    _counts = _mm512_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+    _counts = _mm512_and_si512(_counts,_count_mask);    /* Ignore bytes to left */
+
+    diff_32 = _mm512_cmpgt_epi32_mask(_counts,_zeroes);
+    while (nonzero_p_32(diff_32)) {
+      relpos = count_trailing_zeroes_32(diff_32);
+      masked = ptr[relpos];
+      if (counts[masked]) {
+	debug(printf("256: Storing masked %u (%08X) at %u (%u - %d) using relpos\n",masked,masked,chrpos - relpos,chrpos,relpos));
+	table[positions[masked] + (--counts[masked])] = chrpos - relpos;
+      }
+      diff_32 = clear_lowbit_32(diff_32,relpos);
     }
-  }
-#else
-  masked = array[127];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos,chrpos,0));
-    table[--pointers[masked]] = chrpos;
-  }
 
-  masked = array[119];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 1,chrpos,1));
-    table[--pointers[masked]] = chrpos - 1;
+    chrpos -= 16;
+    ptr += 16;
   }
 
-  masked = array[111];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 2,chrpos,2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
+  return chrpos;
+}
+#endif	/* HAVE_AVX512 */
 
-  masked = array[103];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 3,chrpos,3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
 
-  masked = array[95];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 4,chrpos,4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
 
-  masked = array[87];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 5,chrpos,5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
 
-  masked = array[79];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 6,chrpos,6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
+#if !defined(HAVE_AVX2)
 
-  masked = array[71];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 7,chrpos,7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
+static void
+count_9mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+  UINT4 _masked[4] __attribute__ ((aligned (16)));
+  __m128i _oligo;
+#else
+  __m128i _oligo, _masked;
+#endif
 
-  masked = array[63];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 8,chrpos,8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
 
-  masked = array[55];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 9,chrpos,9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
+  oligo = nexthigh_rev >> 16;	/* For 31..24 */
+  oligo |= low_rev << 16;
 
-  masked = array[47];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 10,chrpos,10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK9; /* 31 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[39];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 11,chrpos,11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
+  masked = (oligo >> 2) & MASK9; /* 30 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[31];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 12,chrpos,12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
+  masked = (oligo >> 4) & MASK9; /* 29 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[23];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 13,chrpos,13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
+  masked = (oligo >> 6) & MASK9; /* 28 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[15];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 14,chrpos,14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
+  masked = (oligo >> 8) & MASK9; /* 27 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[7];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 15,chrpos,15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
-#endif
+  masked = (oligo >> 10) & MASK9; /* 26 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
+  masked = (oligo >> 12) & MASK9; /* 25 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-  /* Row 6 */
-#ifdef USE_GATHER
-  _masked = _mm256_i32gather_epi32((int *) &(array[64+6]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[126]]);
-  assert(EXTRACT256(_counts,1) == counts[array[118]]);
-  assert(EXTRACT256(_counts,2) == counts[array[110]]);
-  assert(EXTRACT256(_counts,3) == counts[array[102]]);
-  assert(EXTRACT256(_counts,4) == counts[array[94]]);
-  assert(EXTRACT256(_counts,5) == counts[array[86]]);
-  assert(EXTRACT256(_counts,6) == counts[array[78]]);
-  assert(EXTRACT256(_counts,7) == counts[array[70]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 16,chrpos,16));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 16;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 17,chrpos,17));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 17;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 18,chrpos,18));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 18;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 19,chrpos,19));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 19;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 20,chrpos,20));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 20;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 21,chrpos,21));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 21;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 22,chrpos,22));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 22;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 23,chrpos,23));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 23;
-    }
-  }
+  masked = (oligo >> 14) & MASK9; /* 24 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[6]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[62]]);
-  assert(EXTRACT256(_counts,1) == counts[array[54]]);
-  assert(EXTRACT256(_counts,2) == counts[array[46]]);
-  assert(EXTRACT256(_counts,3) == counts[array[38]]);
-  assert(EXTRACT256(_counts,4) == counts[array[30]]);
-  assert(EXTRACT256(_counts,5) == counts[array[22]]);
-  assert(EXTRACT256(_counts,6) == counts[array[14]]);
-  assert(EXTRACT256(_counts,7) == counts[array[6]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 24,chrpos,24));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 24;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 25,chrpos,25));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 25;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 26,chrpos,26));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 26;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 27,chrpos,27));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 27;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 28,chrpos,28));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 28;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 29,chrpos,29));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 29;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 30,chrpos,30));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 30;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 31,chrpos,31));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 31;
-    }
-  }
 #else
-  masked = array[126];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 16,chrpos,16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
+  _masked = _mm_and_si128(_oligo, mask9);
+#endif
 
-  masked = array[118];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 17,chrpos,17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[110];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 18,chrpos,18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[102];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 19,chrpos,19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[94];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 20,chrpos,20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[86];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 21,chrpos,21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
 
-  masked = array[78];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 22,chrpos,22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
+  _masked = _mm_and_si128(_oligo, mask9);
+#endif
 
-  masked = array[70];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 23,chrpos,23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[62];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 24,chrpos,24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[54];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 25,chrpos,25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[46];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 26,chrpos,26));
-    table[--pointers[masked]] = chrpos - 26;
-  }
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+#endif
 
-  masked = array[38];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 27,chrpos,27));
-    table[--pointers[masked]] = chrpos - 27;
-  }
 
-  masked = array[30];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 28,chrpos,28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
+#ifdef INDIVIDUAL_SHIFTS
+  masked = low_rev & MASK9;	/* 23 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[22];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 29,chrpos,29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
+  masked = (low_rev >> 2) & MASK9;	/* 22 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[14];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 30,chrpos,30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
+  masked = (low_rev >> 4) & MASK9;	/* 21 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[6];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 31,chrpos,31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-#endif
+  masked = (low_rev >> 6) & MASK9;	/* 20 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-  /* Row 5 */
-#ifdef USE_GATHER
-  _masked = _mm256_i32gather_epi32((int *) &(array[64+5]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[125]]);
-  assert(EXTRACT256(_counts,1) == counts[array[117]]);
-  assert(EXTRACT256(_counts,2) == counts[array[109]]);
-  assert(EXTRACT256(_counts,3) == counts[array[101]]);
-  assert(EXTRACT256(_counts,4) == counts[array[93]]);
-  assert(EXTRACT256(_counts,5) == counts[array[85]]);
-  assert(EXTRACT256(_counts,6) == counts[array[77]]);
-  assert(EXTRACT256(_counts,7) == counts[array[69]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 32,chrpos,32));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 32;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 33,chrpos,33));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 33;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 34,chrpos,34));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 34;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 35,chrpos,35));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 35;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 36,chrpos,36));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 36;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 37,chrpos,37));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 37;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 38,chrpos,38));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 38;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 39,chrpos,39));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 39;
-    }
-  }
+  masked = (low_rev >> 8) & MASK9; /* 19 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[5]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[61]]);
-  assert(EXTRACT256(_counts,1) == counts[array[53]]);
-  assert(EXTRACT256(_counts,2) == counts[array[45]]);
-  assert(EXTRACT256(_counts,3) == counts[array[37]]);
-  assert(EXTRACT256(_counts,4) == counts[array[29]]);
-  assert(EXTRACT256(_counts,5) == counts[array[21]]);
-  assert(EXTRACT256(_counts,6) == counts[array[13]]);
-  assert(EXTRACT256(_counts,7) == counts[array[5]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 40,chrpos,40));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 40;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 41,chrpos,41));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 41;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 42,chrpos,42));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 42;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 43,chrpos,43));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 43;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 44,chrpos,44));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 44;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 45,chrpos,45));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 45;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 46,chrpos,46));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 46;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 47,chrpos,47));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 47;
-    }
-  }
-#else
-  masked = array[125];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 32,chrpos,32));
-    table[--pointers[masked]] = chrpos - 32;
-  }
+  masked = (low_rev >> 10) & MASK9; /* 18 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[117];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 33,chrpos,33));
-    table[--pointers[masked]] = chrpos - 33;
-  }
+  masked = (low_rev >> 12) & MASK9; /* 17 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[109];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 34,chrpos,34));
-    table[--pointers[masked]] = chrpos - 34;
-  }
+  masked = low_rev >> 14;		/* 16, No mask necessary */
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[101];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 35,chrpos,35));
-    table[--pointers[masked]] = chrpos - 35;
-  }
+#else
+  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
+  _masked = _mm_and_si128(_oligo, mask9);
+#endif
 
-  masked = array[93];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 36,chrpos,36));
-    table[--pointers[masked]] = chrpos - 36;
-  }
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[85];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 37,chrpos,37));
-    table[--pointers[masked]] = chrpos - 37;
-  }
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[77];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 38,chrpos,38));
-    table[--pointers[masked]] = chrpos - 38;
-  }
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[69];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 39,chrpos,39));
-    table[--pointers[masked]] = chrpos - 39;
-  }
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[61];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 40,chrpos,40));
-    table[--pointers[masked]] = chrpos - 40;
-  }
 
-  masked = array[53];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 41,chrpos,41));
-    table[--pointers[masked]] = chrpos - 41;
-  }
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
+  _masked = _mm_and_si128(_oligo, mask9);
+#endif
 
-  masked = array[45];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 42,chrpos,42));
-    table[--pointers[masked]] = chrpos - 42;
-  }
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[37];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 43,chrpos,43));
-    table[--pointers[masked]] = chrpos - 43;
-  }
-
-  masked = array[29];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 44,chrpos,44));
-    table[--pointers[masked]] = chrpos - 44;
-  }
-
-  masked = array[21];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 45,chrpos,45));
-    table[--pointers[masked]] = chrpos - 45;
-  }
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[13];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 46,chrpos,46));
-    table[--pointers[masked]] = chrpos - 46;
-  }
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[5];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 47,chrpos,47));
-    table[--pointers[masked]] = chrpos - 47;
-  }
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
-  /* Row 4 */
-#ifdef USE_GATHER
-  _masked = _mm256_i32gather_epi32((int *) &(array[64+4]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[124]]);
-  assert(EXTRACT256(_counts,1) == counts[array[116]]);
-  assert(EXTRACT256(_counts,2) == counts[array[108]]);
-  assert(EXTRACT256(_counts,3) == counts[array[100]]);
-  assert(EXTRACT256(_counts,4) == counts[array[92]]);
-  assert(EXTRACT256(_counts,5) == counts[array[84]]);
-  assert(EXTRACT256(_counts,6) == counts[array[76]]);
-  assert(EXTRACT256(_counts,7) == counts[array[68]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 48,chrpos,48));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 48;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 49,chrpos,49));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 49;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 50,chrpos,50));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 50;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 51,chrpos,51));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 51;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 52,chrpos,52));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 52;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 53,chrpos,53));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 53;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 54,chrpos,54));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 54;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 55,chrpos,55));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 55;
-    }
-  }
-
-  _masked = _mm256_i32gather_epi32((int *) &(array[4]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[60]]);
-  assert(EXTRACT256(_counts,1) == counts[array[52]]);
-  assert(EXTRACT256(_counts,2) == counts[array[44]]);
-  assert(EXTRACT256(_counts,3) == counts[array[36]]);
-  assert(EXTRACT256(_counts,4) == counts[array[28]]);
-  assert(EXTRACT256(_counts,5) == counts[array[20]]);
-  assert(EXTRACT256(_counts,6) == counts[array[12]]);
-  assert(EXTRACT256(_counts,7) == counts[array[4]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 56,chrpos,56));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 56;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 57,chrpos,57));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 57;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 58,chrpos,58));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 58;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 59,chrpos,59));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 59;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 60,chrpos,60));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 60;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 61,chrpos,61));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 61;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 62,chrpos,62));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 62;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 63,chrpos,63));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 63;
-    }
-  }
-#else
-  masked = array[124];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 48,chrpos,48));
-    table[--pointers[masked]] = chrpos - 48;
-  }
+  oligo = low_rev >> 16;		/* For 15..8 */
+  oligo |= high_rev << 16;
 
-  masked = array[116];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 49,chrpos,49));
-    table[--pointers[masked]] = chrpos - 49;
-  }
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK9; /* 15 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[108];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 50,chrpos,50));
-    table[--pointers[masked]] = chrpos - 50;
-  }
+  masked = (oligo >> 2) & MASK9; /* 14 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[100];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 51,chrpos,51));
-    table[--pointers[masked]] = chrpos - 51;
-  }
+  masked = (oligo >> 4) & MASK9; /* 13 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[92];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 52,chrpos,52));
-    table[--pointers[masked]] = chrpos - 52;
-  }
+  masked = (oligo >> 6) & MASK9; /* 12 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[84];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 53,chrpos,53));
-    table[--pointers[masked]] = chrpos - 53;
-  }
+  masked = (oligo >> 8) & MASK9; /* 11 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[76];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 54,chrpos,54));
-    table[--pointers[masked]] = chrpos - 54;
-  }
+  masked = (oligo >> 10) & MASK9; /* 10 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[68];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 55,chrpos,55));
-    table[--pointers[masked]] = chrpos - 55;
-  }
+  masked = (oligo >> 12) & MASK9; /* 9 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[60];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 56,chrpos,56));
-    table[--pointers[masked]] = chrpos - 56;
-  }
+  masked = (oligo >> 14) & MASK9; /* 8 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[52];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 57,chrpos,57));
-    table[--pointers[masked]] = chrpos - 57;
-  }
+#else
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
+  _masked = _mm_and_si128(_oligo, mask9);
+#endif
 
-  masked = array[44];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 58,chrpos,58));
-    table[--pointers[masked]] = chrpos - 58;
-  }
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[36];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 59,chrpos,59));
-    table[--pointers[masked]] = chrpos - 59;
-  }
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[28];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 60,chrpos,60));
-    table[--pointers[masked]] = chrpos - 60;
-  }
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[20];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 61,chrpos,61));
-    table[--pointers[masked]] = chrpos - 61;
-  }
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[12];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 62,chrpos,62));
-    table[--pointers[masked]] = chrpos - 62;
-  }
 
-  masked = array[4];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 63,chrpos,63));
-    table[--pointers[masked]] = chrpos - 63;
-  }
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
+  _masked = _mm_and_si128(_oligo, mask9);
 #endif
 
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
-  /* Row 3 */
-#ifdef USE_GATHER
-  _masked = _mm256_i32gather_epi32((int *) &(array[64+3]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[123]]);
-  assert(EXTRACT256(_counts,1) == counts[array[115]]);
-  assert(EXTRACT256(_counts,2) == counts[array[107]]);
-  assert(EXTRACT256(_counts,3) == counts[array[99]]);
-  assert(EXTRACT256(_counts,4) == counts[array[91]]);
-  assert(EXTRACT256(_counts,5) == counts[array[83]]);
-  assert(EXTRACT256(_counts,6) == counts[array[75]]);
-  assert(EXTRACT256(_counts,7) == counts[array[67]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 64,chrpos,64));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 64;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 65,chrpos,65));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 65;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 66,chrpos,66));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 66;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 67,chrpos,67));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 67;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 68,chrpos,68));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 68;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 69,chrpos,69));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 69;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 70,chrpos,70));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 70;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 71,chrpos,71));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 71;
-    }
-  }
-
-  _masked = _mm256_i32gather_epi32((int *) &(array[3]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[59]]);
-  assert(EXTRACT256(_counts,1) == counts[array[51]]);
-  assert(EXTRACT256(_counts,2) == counts[array[43]]);
-  assert(EXTRACT256(_counts,3) == counts[array[35]]);
-  assert(EXTRACT256(_counts,4) == counts[array[27]]);
-  assert(EXTRACT256(_counts,5) == counts[array[19]]);
-  assert(EXTRACT256(_counts,6) == counts[array[11]]);
-  assert(EXTRACT256(_counts,7) == counts[array[3]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 72,chrpos,72));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 72;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 73,chrpos,73));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 73;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 74,chrpos,74));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 74;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 75,chrpos,75));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 75;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 76,chrpos,76));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 76;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 77,chrpos,77));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 77;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 78,chrpos,78));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 78;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 79,chrpos,79));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 79;
-    }
-  }
-#else
-  masked = array[123];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 64,chrpos,64));
-    table[--pointers[masked]] = chrpos - 64;
-  }
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[115];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 65,chrpos,65));
-    table[--pointers[masked]] = chrpos - 65;
-  }
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[107];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 66,chrpos,66));
-    table[--pointers[masked]] = chrpos - 66;
-  }
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+#endif
 
-  masked = array[99];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 67,chrpos,67));
-    table[--pointers[masked]] = chrpos - 67;
-  }
 
-  masked = array[91];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 68,chrpos,68));
-    table[--pointers[masked]] = chrpos - 68;
-  }
+#ifdef INDIVIDUAL_SHIFTS
+  masked = high_rev & MASK9;		/* 7 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[83];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 69,chrpos,69));
-    table[--pointers[masked]] = chrpos - 69;
-  }
+  masked = (high_rev >> 2) & MASK9;	/* 6 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[75];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 70,chrpos,70));
-    table[--pointers[masked]] = chrpos - 70;
-  }
+  masked = (high_rev >> 4) & MASK9;	/* 5 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[67];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 71,chrpos,71));
-    table[--pointers[masked]] = chrpos - 71;
-  }
+  masked = (high_rev >> 6) & MASK9;	/* 4 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[59];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 72,chrpos,72));
-    table[--pointers[masked]] = chrpos - 72;
-  }
+  masked = (high_rev >> 8) & MASK9;	/* 3 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[51];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 73,chrpos,73));
-    table[--pointers[masked]] = chrpos - 73;
-  }
+  masked = (high_rev >> 10) & MASK9;	/* 2 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[43];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 74,chrpos,74));
-    table[--pointers[masked]] = chrpos - 74;
-  }
+  masked = (high_rev >> 12) & MASK9;	/* 1 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[35];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 75,chrpos,75));
-    table[--pointers[masked]] = chrpos - 75;
-  }
+  masked = high_rev >> 14;		/* 0, No mask necessary */
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[27];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 76,chrpos,76));
-    table[--pointers[masked]] = chrpos - 76;
-  }
+#else
+  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+#else
+  _masked = _mm_and_si128(_oligo, mask9);
+#endif
 
-  masked = array[19];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 77,chrpos,77));
-    table[--pointers[masked]] = chrpos - 77;
-  }
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[11];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 78,chrpos,78));
-    table[--pointers[masked]] = chrpos - 78;
-  }
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[3];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 79,chrpos,79));
-    table[--pointers[masked]] = chrpos - 79;
-  }
-#endif
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-  /* Row 2 */
-#ifdef USE_GATHER
-  _masked = _mm256_i32gather_epi32((int *) &(array[64+2]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[122]]);
-  assert(EXTRACT256(_counts,1) == counts[array[114]]);
-  assert(EXTRACT256(_counts,2) == counts[array[106]]);
-  assert(EXTRACT256(_counts,3) == counts[array[98]]);
-  assert(EXTRACT256(_counts,4) == counts[array[90]]);
-  assert(EXTRACT256(_counts,5) == counts[array[82]]);
-  assert(EXTRACT256(_counts,6) == counts[array[74]]);
-  assert(EXTRACT256(_counts,7) == counts[array[66]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 80,chrpos,80));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 80;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 81,chrpos,81));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 81;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 82,chrpos,82));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 82;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 83,chrpos,83));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 83;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 84,chrpos,84));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 84;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 85,chrpos,85));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 85;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 86,chrpos,86));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 86;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 87,chrpos,87));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 87;
-    }
-  }
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[2]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[58]]);
-  assert(EXTRACT256(_counts,1) == counts[array[50]]);
-  assert(EXTRACT256(_counts,2) == counts[array[42]]);
-  assert(EXTRACT256(_counts,3) == counts[array[34]]);
-  assert(EXTRACT256(_counts,4) == counts[array[26]]);
-  assert(EXTRACT256(_counts,5) == counts[array[18]]);
-  assert(EXTRACT256(_counts,6) == counts[array[10]]);
-  assert(EXTRACT256(_counts,7) == counts[array[2]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 88,chrpos,88));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 88;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 89,chrpos,89));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 89;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 90,chrpos,90));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 90;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 91,chrpos,91));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 91;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 92,chrpos,92));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 92;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 93,chrpos,93));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 93;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 94,chrpos,94));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 94;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 95,chrpos,95));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 95;
-    }
-  }
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
 #else
-  masked = array[122];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 80,chrpos,80));
-    table[--pointers[masked]] = chrpos - 80;
-  }
+  _masked = _mm_and_si128(_oligo, mask9);
+#endif
 
-  masked = array[114];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 81,chrpos,81));
-    table[--pointers[masked]] = chrpos - 81;
-  }
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[106];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 82,chrpos,82));
-    table[--pointers[masked]] = chrpos - 82;
-  }
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[98];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 83,chrpos,83));
-    table[--pointers[masked]] = chrpos - 83;
-  }
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[90];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 84,chrpos,84));
-    table[--pointers[masked]] = chrpos - 84;
-  }
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
+#endif
 
-  masked = array[82];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 85,chrpos,85));
-    table[--pointers[masked]] = chrpos - 85;
-  }
+  return;
+}
 
-  masked = array[74];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 86,chrpos,86));
-    table[--pointers[masked]] = chrpos - 86;
-  }
+#else
 
-  masked = array[66];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 87,chrpos,87));
-    table[--pointers[masked]] = chrpos - 87;
-  }
-
-  masked = array[58];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 88,chrpos,88));
-    table[--pointers[masked]] = chrpos - 88;
-  }
-
-  masked = array[50];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 89,chrpos,89));
-    table[--pointers[masked]] = chrpos - 89;
-  }
+static void
+count_9mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+  __m256i _oligo, _masked;
 
-  masked = array[42];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 90,chrpos,90));
-    table[--pointers[masked]] = chrpos - 90;
-  }
 
-  masked = array[34];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 91,chrpos,91));
-    table[--pointers[masked]] = chrpos - 91;
-  }
+  oligo = nexthigh_rev >> 16;	/* For 31..24 */
+  oligo |= low_rev << 16;
 
-  masked = array[26];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 92,chrpos,92));
-    table[--pointers[masked]] = chrpos - 92;
-  }
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask9);
 
-  masked = array[18];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 93,chrpos,93));
-    table[--pointers[masked]] = chrpos - 93;
-  }
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[10];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 94,chrpos,94));
-    table[--pointers[masked]] = chrpos - 94;
-  }
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[2];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 95,chrpos,95));
-    table[--pointers[masked]] = chrpos - 95;
-  }
-#endif
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  /* Row 1 */
-#ifdef USE_GATHER
-  _masked = _mm256_i32gather_epi32((int *) &(array[64+1]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[121]]);
-  assert(EXTRACT256(_counts,1) == counts[array[113]]);
-  assert(EXTRACT256(_counts,2) == counts[array[105]]);
-  assert(EXTRACT256(_counts,3) == counts[array[97]]);
-  assert(EXTRACT256(_counts,4) == counts[array[89]]);
-  assert(EXTRACT256(_counts,5) == counts[array[81]]);
-  assert(EXTRACT256(_counts,6) == counts[array[73]]);
-  assert(EXTRACT256(_counts,7) == counts[array[65]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 96,chrpos,96));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 96;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 97,chrpos,97));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 97;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 98,chrpos,98));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 98;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 99,chrpos,99));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 99;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 100,chrpos,100));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 100;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 101,chrpos,101));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 101;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 102,chrpos,102));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 102;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 103,chrpos,103));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 103;
-    }
-  }
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[1]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[57]]);
-  assert(EXTRACT256(_counts,1) == counts[array[49]]);
-  assert(EXTRACT256(_counts,2) == counts[array[41]]);
-  assert(EXTRACT256(_counts,3) == counts[array[33]]);
-  assert(EXTRACT256(_counts,4) == counts[array[25]]);
-  assert(EXTRACT256(_counts,5) == counts[array[17]]);
-  assert(EXTRACT256(_counts,6) == counts[array[9]]);
-  assert(EXTRACT256(_counts,7) == counts[array[1]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 104,chrpos,104));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 104;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 105,chrpos,105));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 105;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 106,chrpos,106));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 106;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 107,chrpos,107));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 107;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 108,chrpos,108));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 108;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 109,chrpos,109));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 109;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 110,chrpos,110));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 110;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 111,chrpos,111));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 111;
-    }
-  }
-#else
-  masked = array[121];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 96,chrpos,96));
-    table[--pointers[masked]] = chrpos - 96;
-  }
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[113];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 97,chrpos,97));
-    table[--pointers[masked]] = chrpos - 97;
-  }
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[105];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 98,chrpos,98));
-    table[--pointers[masked]] = chrpos - 98;
-  }
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[97];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 99,chrpos,99));
-    table[--pointers[masked]] = chrpos - 99;
-  }
 
-  masked = array[89];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 100,chrpos,100));
-    table[--pointers[masked]] = chrpos - 100;
-  }
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask9);
 
-  masked = array[81];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 101,chrpos,101));
-    table[--pointers[masked]] = chrpos - 101;
-  }
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[73];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 102,chrpos,102));
-    table[--pointers[masked]] = chrpos - 102;
-  }
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[65];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 103,chrpos,103));
-    table[--pointers[masked]] = chrpos - 103;
-  }
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[57];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 104,chrpos,104));
-    table[--pointers[masked]] = chrpos - 104;
-  }
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[49];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 105,chrpos,105));
-    table[--pointers[masked]] = chrpos - 105;
-  }
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[41];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 106,chrpos,106));
-    table[--pointers[masked]] = chrpos - 106;
-  }
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[33];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 107,chrpos,107));
-    table[--pointers[masked]] = chrpos - 107;
-  }
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[25];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 108,chrpos,108));
-    table[--pointers[masked]] = chrpos - 108;
-  }
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[17];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 109,chrpos,109));
-    table[--pointers[masked]] = chrpos - 109;
-  }
 
-  masked = array[9];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 110,chrpos,110));
-    table[--pointers[masked]] = chrpos - 110;
-  }
+  oligo = low_rev >> 16;		/* For 15..8 */
+  oligo |= high_rev << 16;
 
-  masked = array[1];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 111,chrpos,111));
-    table[--pointers[masked]] = chrpos - 111;
-  }
-#endif
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask9);
 
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-  /* Row 0 */
-#ifdef USE_GATHER
-  _masked = _mm256_i32gather_epi32((int *) &(array[64+0]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[120]]);
-  assert(EXTRACT256(_counts,1) == counts[array[112]]);
-  assert(EXTRACT256(_counts,2) == counts[array[104]]);
-  assert(EXTRACT256(_counts,3) == counts[array[96]]);
-  assert(EXTRACT256(_counts,4) == counts[array[88]]);
-  assert(EXTRACT256(_counts,5) == counts[array[80]]);
-  assert(EXTRACT256(_counts,6) == counts[array[72]]);
-  assert(EXTRACT256(_counts,7) == counts[array[64]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 112,chrpos,112));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 112;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 113,chrpos,113));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 113;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 114,chrpos,114));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 114;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 115,chrpos,115));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 115;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 116,chrpos,116));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 116;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 117,chrpos,117));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 117;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 118,chrpos,118));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 118;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 119,chrpos,119));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 119;
-    }
-  }
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  _masked = _mm256_i32gather_epi32((int *) &(array[0]),byeights,/*scale*/4);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-  assert(EXTRACT256(_counts,0) == counts[array[56]]);
-  assert(EXTRACT256(_counts,1) == counts[array[48]]);
-  assert(EXTRACT256(_counts,2) == counts[array[40]]);
-  assert(EXTRACT256(_counts,3) == counts[array[32]]);
-  assert(EXTRACT256(_counts,4) == counts[array[24]]);
-  assert(EXTRACT256(_counts,5) == counts[array[16]]);
-  assert(EXTRACT256(_counts,6) == counts[array[8]]);
-  assert(EXTRACT256(_counts,7) == counts[array[0]]);
-  if (_mm256_testz_si256(_counts,_counts) == 0) {
-    if (EXTRACT256(_counts,0)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 120,chrpos,120));
-      table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 120;
-    }
-    if (EXTRACT256(_counts,1)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 121,chrpos,121));
-      table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 121;
-    }
-    if (EXTRACT256(_counts,2)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 122,chrpos,122));
-      table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 122;
-    }
-    if (EXTRACT256(_counts,3)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 123,chrpos,123));
-      table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 123;
-    }
-    if (EXTRACT256(_counts,4)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 124,chrpos,124));
-      table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 124;
-    }
-    if (EXTRACT256(_counts,5)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 125,chrpos,125));
-      table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 125;
-    }
-    if (EXTRACT256(_counts,6)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 126,chrpos,126));
-      table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 126;
-    }
-    if (EXTRACT256(_counts,7)) {
-      debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 127,chrpos,127));
-      table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 127;
-    }
-  }
-#else
-  masked = array[120];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 112,chrpos,112));
-    table[--pointers[masked]] = chrpos - 112;
-  }
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[112];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 113,chrpos,113));
-    table[--pointers[masked]] = chrpos - 113;
-  }
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[104];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 114,chrpos,114));
-    table[--pointers[masked]] = chrpos - 114;
-  }
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[96];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 115,chrpos,115));
-    table[--pointers[masked]] = chrpos - 115;
-  }
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[88];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 116,chrpos,116));
-    table[--pointers[masked]] = chrpos - 116;
-  }
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[80];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 117,chrpos,117));
-    table[--pointers[masked]] = chrpos - 117;
-  }
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[72];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 118,chrpos,118));
-    table[--pointers[masked]] = chrpos - 118;
-  }
 
-  masked = array[64];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 119,chrpos,119));
-    table[--pointers[masked]] = chrpos - 119;
-  }
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask9);
 
-  masked = array[56];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 120,chrpos,120));
-    table[--pointers[masked]] = chrpos - 120;
-  }
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[48];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 121,chrpos,121));
-    table[--pointers[masked]] = chrpos - 121;
-  }
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[40];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 122,chrpos,122));
-    table[--pointers[masked]] = chrpos - 122;
-  }
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[32];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 123,chrpos,123));
-    table[--pointers[masked]] = chrpos - 123;
-  }
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[24];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 124,chrpos,124));
-    table[--pointers[masked]] = chrpos - 124;
-  }
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[16];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 125,chrpos,125));
-    table[--pointers[masked]] = chrpos - 125;
-  }
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[8];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 126,chrpos,126));
-    table[--pointers[masked]] = chrpos - 126;
-  }
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
-  masked = array[0];
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 127,chrpos,127));
-    table[--pointers[masked]] = chrpos - 127;
-  }
-#endif
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
-  return chrpos - 128;
+  return;
 }
 
 #endif	/* HAVE_AVX2 */
 
-#endif	/* USE_SIMD_FOR_COUNTS */
 
+/* Expecting current to have {high0_rev, low0_rev, high1_rev,
+   low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
+   high2_rev} */
+#ifdef HAVE_SSE2
+static void
+extract_9mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) {
+  __m128i oligo;
 
+  _mm_store_si128(out++, _mm_srli_epi32(current,14)); /* No mask necessary */
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
+  _mm_store_si128(out++, _mm_and_si128( current, mask9));
 
-#if !defined(HAVE_AVX2)
+  oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask9));
 
-static void
-count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-#ifdef INDIVIDUAL_SHIFTS
-#elif defined(SIMD_MASK_THEN_STORE)
-  UINT4 _masked[4] __attribute__ ((aligned (16)));
-  __m128i _oligo;
-#else
-  __m128i _oligo, _masked;
-#endif
+  return;
+}
 
+#ifdef USE_UNORDERED_9
+static Chrpos_T
+store_9mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_9mers_fwd_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-  oligo = nexthigh_rev >> 16;	/* For 31..24 */
-  oligo |= low_rev << 16;
+#else
+/* Includes extract_9mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_9mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7,
+    _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK9; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
+  out = &(array[0]);
 
-  masked = (oligo >> 2) & MASK9; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
+  oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16));
+  _row0 = _mm_and_si128( oligo, mask9);
+  _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask9);
+  _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask9);
+  _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask9);
+  _row4 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask9);
+  _row5 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask9);
+  _row6 = _mm_and_si128( _mm_srli_epi32(oligo,12), mask9);
+  _row7 = _mm_and_si128( _mm_srli_epi32(oligo,14), mask9);
+
+  _row8 = _mm_and_si128( current, mask9);
+  _row9 = _mm_and_si128( _mm_srli_epi32(current,2), mask9);
+  _row10 = _mm_and_si128( _mm_srli_epi32(current,4), mask9);
+  _row11 = _mm_and_si128( _mm_srli_epi32(current,6), mask9);
+  _row12 = _mm_and_si128( _mm_srli_epi32(current,8), mask9);
+  _row13 = _mm_and_si128( _mm_srli_epi32(current,10), mask9);
+  _row14 = _mm_and_si128( _mm_srli_epi32(current,12), mask9);
+  _row15 = _mm_srli_epi32(current,14); /* No mask necessary */
+
+
+  /* Split: top half */
+  _t0 = _mm_unpackhi_epi32(_row0,_row1);
+  _t1 = _mm_unpackhi_epi32(_row2,_row3);
+  _t2 = _mm_unpackhi_epi32(_row4,_row5);
+  _t3 = _mm_unpackhi_epi32(_row6,_row7);
+  _t4 = _mm_unpackhi_epi32(_row8,_row9);
+  _t5 = _mm_unpackhi_epi32(_row10,_row11);
+  _t6 = _mm_unpackhi_epi32(_row12,_row13);
+  _t7 = _mm_unpackhi_epi32(_row14,_row15);
+
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t0,_t1));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t2,_t3));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t4,_t5));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t6,_t7));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t0,_t1));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t2,_t3));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t4,_t5));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t6,_t7));
+
+
+  /* Split: bottom half */
+  _t0 = _mm_unpacklo_epi32(_row0,_row1);
+  _t1 = _mm_unpacklo_epi32(_row2,_row3);
+  _t2 = _mm_unpacklo_epi32(_row4,_row5);
+  _t3 = _mm_unpacklo_epi32(_row6,_row7);
+  _t4 = _mm_unpacklo_epi32(_row8,_row9);
+  _t5 = _mm_unpacklo_epi32(_row10,_row11);
+  _t6 = _mm_unpacklo_epi32(_row12,_row13);
+  _t7 = _mm_unpacklo_epi32(_row14,_row15);
+
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t0,_t1));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t2,_t3));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t4,_t5));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t6,_t7));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t0,_t1));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t2,_t3));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t4,_t5));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t6,_t7));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
 
-  masked = (oligo >> 4) & MASK9; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+#ifdef HAVE_AVX2
+static void
+extract_9mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
+  __m256i oligo;
+
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,14)); /* No mask necessary */
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask9));
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,16), _mm256_slli_epi32(current,16));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask9));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_9
+static Chrpos_T
+store_9mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			  __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_9mers_fwd_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_9mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_9mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			  __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7,
+    _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,16), _mm256_slli_epi32(current,16));
+  _row0 = _mm256_and_si256( oligo, bigmask9);
+  _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9);
+  _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9);
+  _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9);
+  _row4 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9);
+  _row5 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9);
+  _row6 = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9);
+  _row7 = _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9);
+
+  _row8 = _mm256_and_si256( current, bigmask9);
+  _row9 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9);
+  _row10 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9);
+  _row11 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9);
+  _row12 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9);
+  _row13 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9);
+  _row14 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9);
+  _row15 = _mm256_srli_epi32(current,14);
+
+
+  /* Split: top half */
+  _t0 = _mm256_unpackhi_epi32(_row0,_row1);
+  _t1 = _mm256_unpackhi_epi32(_row2,_row3);
+  _t2 = _mm256_unpackhi_epi32(_row4,_row5);
+  _t3 = _mm256_unpackhi_epi32(_row6,_row7);
+  _t4 = _mm256_unpackhi_epi32(_row8,_row9);
+  _t5 = _mm256_unpackhi_epi32(_row10,_row11);
+  _t6 = _mm256_unpackhi_epi32(_row12,_row13);
+  _t7 = _mm256_unpackhi_epi32(_row14,_row15);
+
+  _u0 = _mm256_unpackhi_epi64(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi64(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi64(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi64(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi64(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi64(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi64(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi64(_t6,_t7);
+
+  /* Split: bottom half */
+  _t0 = _mm256_unpacklo_epi32(_row0,_row1);
+  _t1 = _mm256_unpacklo_epi32(_row2,_row3);
+  _t2 = _mm256_unpacklo_epi32(_row4,_row5);
+  _t3 = _mm256_unpacklo_epi32(_row6,_row7);
+  _t4 = _mm256_unpacklo_epi32(_row8,_row9);
+  _t5 = _mm256_unpacklo_epi32(_row10,_row11);
+  _t6 = _mm256_unpacklo_epi32(_row12,_row13);
+  _t7 = _mm256_unpacklo_epi32(_row14,_row15);
+
+  _row8 = _mm256_unpackhi_epi64(_t0,_t1);
+  _row9 = _mm256_unpackhi_epi64(_t2,_t3);
+  _row10 = _mm256_unpackhi_epi64(_t4,_t5);
+  _row11 = _mm256_unpackhi_epi64(_t6,_t7);
+  _row12 = _mm256_unpacklo_epi64(_t0,_t1);
+  _row13 = _mm256_unpacklo_epi64(_t2,_t3);
+  _row14 = _mm256_unpacklo_epi64(_t4,_t5);
+  _row15 = _mm256_unpacklo_epi64(_t6,_t7);
+
+
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u0, _u1, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u2, _u3, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u4, _u5, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u6, _u7, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row8, _row9, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row10, _row11, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row12, _row13, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row14, _row15, 0x31));
+
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u0, _u1, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u2, _u3, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u4, _u5, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u6, _u7, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row8, _row9, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row10, _row11, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row12, _row13, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row14, _row15, 0x20));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+#ifdef HAVE_AVX512
+static void
+extract_9mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,14)); /* No mask necessary */
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask9));
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,16), _mm512_slli_epi32(current,16));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,14), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask9));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_9
+static Chrpos_T
+store_9mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_9mers_fwd_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_9mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_9mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7,
+    _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,16), _mm512_slli_epi32(current,16));
+  _row0 = _mm512_and_si512( oligo, hugemask9);
+  _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask9);
+  _row2 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask9);
+  _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask9);
+  _row4 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask9);
+  _row5 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask9);
+  _row6 = _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask9);
+  _row7 = _mm512_and_si512( _mm512_srli_epi32(oligo,14), hugemask9);
+
+  _row8 = _mm512_and_si512( current, hugemask9);
+  _row9 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask9);
+  _row10 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask9);
+  _row11 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask9);
+  _row12 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask9);
+  _row13 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask9);
+  _row14 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask9);
+  _row15 = _mm512_srli_epi32(current,14); /* No mask necessary */
+
+
+  /* Split: top half */
+  _t0 = _mm512_unpackhi_epi32(_row0,_row1);
+  _t1 = _mm512_unpackhi_epi32(_row2,_row3);
+  _t2 = _mm512_unpackhi_epi32(_row4,_row5);
+  _t3 = _mm512_unpackhi_epi32(_row6,_row7);
+  _t4 = _mm512_unpackhi_epi32(_row8,_row9);
+  _t5 = _mm512_unpackhi_epi32(_row10,_row11);
+  _t6 = _mm512_unpackhi_epi32(_row12,_row13);
+  _t7 = _mm512_unpackhi_epi32(_row14,_row15);
+
+  _u0 = _mm512_unpackhi_epi64(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi64(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi64(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi64(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi64(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi64(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi64(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi64(_t6,_t7);
+
+  /* Split: bottom half */
+  _t0 = _mm512_unpacklo_epi32(_row0,_row1);
+  _t1 = _mm512_unpacklo_epi32(_row2,_row3);
+  _t2 = _mm512_unpacklo_epi32(_row4,_row5);
+  _t3 = _mm512_unpacklo_epi32(_row6,_row7);
+  _t4 = _mm512_unpacklo_epi32(_row8,_row9);
+  _t5 = _mm512_unpacklo_epi32(_row10,_row11);
+  _t6 = _mm512_unpacklo_epi32(_row12,_row13);
+  _t7 = _mm512_unpacklo_epi32(_row14,_row15);
+
+  _row8 = _mm512_unpackhi_epi64(_t0,_t1);
+  _row9 = _mm512_unpackhi_epi64(_t2,_t3);
+  _row10 = _mm512_unpackhi_epi64(_t4,_t5);
+  _row11 = _mm512_unpackhi_epi64(_t6,_t7);
+  _row12 = _mm512_unpacklo_epi64(_t0,_t1);
+  _row13 = _mm512_unpacklo_epi64(_t2,_t3);
+  _row14 = _mm512_unpacklo_epi64(_t4,_t5);
+  _row15 = _mm512_unpacklo_epi64(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(6, 7, 8+6, 8+7, 4, 5, 8+4, 8+5);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+  _t4 = _mm512_permutex2var_epi64(_row8, _shuffle0, _row9);
+  _t5 = _mm512_permutex2var_epi64(_row10, _shuffle0, _row11);
+  _t6 = _mm512_permutex2var_epi64(_row12, _shuffle0, _row13);
+  _t7 = _mm512_permutex2var_epi64(_row14, _shuffle0, _row15);
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3);
+  _shuffle2 = _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7);
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle1, _t1));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle1, _t3));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle1, _t5));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle1, _t7));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle2, _t1));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle2, _t3));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle2, _t5));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle2, _t7));
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(2, 3, 8+2, 8+3, 0, 1, 8+0, 8+1);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+  _t4 = _mm512_permutex2var_epi64(_row8, _shuffle0, _row9);
+  _t5 = _mm512_permutex2var_epi64(_row10, _shuffle0, _row11);
+  _t6 = _mm512_permutex2var_epi64(_row12, _shuffle0, _row13);
+  _t7 = _mm512_permutex2var_epi64(_row14, _shuffle0, _row15);
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); */
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); */
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle1, _t1));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle1, _t3));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle1, _t5));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle1, _t7));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle2, _t1));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle2, _t3));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle2, _t5));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle2, _t7));
+
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+
+#if !defined(HAVE_AVX2)
+static int
+store_9mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+  UINT4 _masked[4] __attribute__ ((aligned (16)));
+  __m128i _oligo;
+#else
+  __m128i _oligo, _masked;
+#endif
+
+
+  oligo = nexthigh_rev >> 16;	/* For 31..24 */
+  oligo |= low_rev << 16;
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK9; /* 31 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
+  }
+
+  masked = (oligo >> 2) & MASK9; /* 30 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
+  }
+
+  masked = (oligo >> 4) & MASK9; /* 29 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
+  }
 
   masked = (oligo >> 6) & MASK9; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
+  }
 
   masked = (oligo >> 8) & MASK9; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
+  }
 
   masked = (oligo >> 10) & MASK9; /* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
+  }
 
   masked = (oligo >> 12) & MASK9; /* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
+  }
 
   masked = (oligo >> 14) & MASK9; /* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
+  }
 
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
@@ -12214,20 +11469,28 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
+  }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
@@ -12238,55 +11501,79 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
+  }
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rev & MASK9;	/* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
+  }
 
   masked = (low_rev >> 2) & MASK9;	/* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
+  }
 
   masked = (low_rev >> 4) & MASK9;	/* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
+  }
 
   masked = (low_rev >> 6) & MASK9;	/* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
+  }
 
   masked = (low_rev >> 8) & MASK9; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
+  }
 
   masked = (low_rev >> 10) & MASK9; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
+  }
 
   masked = (low_rev >> 12) & MASK9; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
+  }
 
   masked = low_rev >> 14;		/* 16, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
+  }
 
 #else
   _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
@@ -12297,20 +11584,28 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
+  }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
@@ -12321,20 +11616,28 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
+  }
 #endif
 
 
@@ -12343,36 +11646,52 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK9; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
+  }
 
   masked = (oligo >> 2) & MASK9; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
+  }
 
   masked = (oligo >> 4) & MASK9; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
+  }
 
   masked = (oligo >> 6) & MASK9; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
+  }
 
   masked = (oligo >> 8) & MASK9; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
+  }
 
   masked = (oligo >> 10) & MASK9; /* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
+  }
 
   masked = (oligo >> 12) & MASK9; /* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
+  }
 
-  masked = (oligo >> 14) & MASK9; /* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  masked = (oligo >> 14) & MASK9; /* 9 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
+  }
 
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
@@ -12383,20 +11702,28 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
+  }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
@@ -12407,55 +11734,79 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
+  }
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rev & MASK9;		/* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
+  }
 
   masked = (high_rev >> 2) & MASK9;	/* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
+  }
 
   masked = (high_rev >> 4) & MASK9;	/* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
+  }
 
   masked = (high_rev >> 6) & MASK9;	/* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
+  }
 
   masked = (high_rev >> 8) & MASK9;	/* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
+  }
 
   masked = (high_rev >> 10) & MASK9;	/* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
+  }
 
   masked = (high_rev >> 12) & MASK9;	/* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
+  }
 
   masked = high_rev >> 14;		/* 0, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
+  }
 
 #else
   _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
@@ -12466,20 +11817,28 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
+  }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
@@ -12490,2801 +11849,1733 @@ count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
+  }
 #endif
 
-  return;
+  return chrpos - 32;
 }
 
 #else  /* HAVE_AVX2 */
 
-static void
-count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+static int
+store_9mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
-  __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
+  __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
+
+
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
 
 
   oligo = nexthigh_rev >> 16;	/* For 31..24 */
   oligo |= low_rev << 16;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
     }
   }
-#endif	/* CHECK_FOR_OVERFLOW */
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask9);
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
+  }
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
+  }
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }
+  }
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
+  }
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
+  }
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
+  }
 
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask9);
 
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
     }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
     }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
     }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
     }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
     }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
     }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
     }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
     }
   }
-#endif	/* CHECK_FOR_OVERFLOW */
 
 
   oligo = low_rev >> 16;		/* For 15..8 */
   oligo |= high_rev << 16;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
     }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
     }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
     }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
     }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
     }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
     }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
     }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
     }
   }
-#endif	/* CHECK_FOR_OVERFLOW */
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
+  }
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
     }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
     }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
     }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
     }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
     }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
     }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
+  }
+
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
     }
   }
-#endif	/* CHECK_FOR_OVERFLOW */
 
-  return;
+  return chrpos - 32;
 }
 
 #endif	/* HAVE_AVX2 */
 
 
-/* Expecting current to have {high0_rev, low0_rev, high1_rev,
-   low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
-   high2_rev} */
-#ifdef USE_SIMD_FOR_COUNTS
+#if !defined(HAVE_AVX2)
+
 static void
-extract_9mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
-  __m128i oligo;
+count_8mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+  UINT4 _masked[4] __attribute__ ((aligned (16)));
+  __m128i _oligo;
+#else
+  __m128i _oligo, _masked;
+#endif
 
-  _mm_store_si128(out++, _mm_srli_epi32(current,14)); /* No mask necessary */
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
-  _mm_store_si128(out++, _mm_and_si128( current, mask9));
 
-  oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask9));
+  oligo = nexthigh_rev >> 18;	/* For 31..25 */
+  oligo |= low_rev << 14;
 
-  return;
-}
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK8; /* 31 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
+  masked = (oligo >> 2) & MASK8; /* 30 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_AVX2
-static void
-extract_9mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
+  masked = (oligo >> 4) & MASK8; /* 29 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,14)); /* No mask necessary */
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask9));
+  masked = (oligo >> 6) & MASK8; /* 28 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,16), _mm256_slli_epi32(current,16));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask9));
+  masked = (oligo >> 8) & MASK8; /* 27 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-  return;
-}
-#endif
+  masked = (oligo >> 10) & MASK8; /* 26 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
+  masked = (oligo >> 12) & MASK8; /* 25 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-static void
-count_9mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
 #else
-  Genomecomp_T array[4];
-#endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
-#endif
-
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,14); /* No mask necessary */
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,14)); /* No mask necessary */
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("16 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("32 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("48 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */
-#endif
-  debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("17 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("33 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("49 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */
-#endif
-  debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("18 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("34 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("50 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */
-#endif
-  debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask9);
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("19 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("35 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("51 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */
-#endif
-  debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("20 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("36 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("52 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 #endif
-  debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("21 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("37 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("53 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */
-#endif
-  debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+#ifdef INDIVIDUAL_SHIFTS
+  masked = low_rev & MASK8;	/* 24 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("22 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("38 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("54 %04X => %d\n",array[3],counts[array[3]]));
+  masked = (low_rev >> 2) & MASK8;	/* 23 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */
-#endif
-  debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = (low_rev >> 4) & MASK8;	/* 22 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("23 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("39 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("55 %04X => %d\n",array[3],counts[array[3]]));
+  masked = (low_rev >> 6) & MASK8;	/* 21 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */
-#endif
-  debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = (low_rev >> 8) & MASK8;	/* 20 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-  oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,14), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("24 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("40 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("56 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */
-#endif
-  debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,12), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("25 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("41 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("57 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */
-#endif
-  debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("26 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("42 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("58 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */
-#endif
-  debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = (low_rev >> 10) & MASK8; /* 19 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("27 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("43 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("59 %04X => %d\n",array[3],counts[array[3]]));
+  masked = (low_rev >> 12) & MASK8; /* 18 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */
-#endif
-  debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = (low_rev >> 14) & MASK8; /* 17 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask9);
+  masked = low_rev >> 16;		/* 16, No mask necessary */
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+  
 #else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("28 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("44 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("60 %04X => %d\n",array[3],counts[array[3]]));
-
+  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
-  debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("29 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("45 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("61 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */
-#endif
-  debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("30 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("46 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("62 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */
-#endif
-  debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("31 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("47 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("63 %04X => %d\n",array[3],counts[array[3]]));
 
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */
-#endif
-  debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-  return;
-}
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
 
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_AVX2
-static void
-count_9mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
-  __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
-  array = _mm256_srli_epi32(current,14); /* No mask necessary */
-  counts[EXTRACT256(array,0)] += 1;	 /* 0 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 16 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 32 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 48 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 64 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 80 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 96 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 112 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;	 /* 1 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 17 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 33 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 49 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 65 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 81 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 97 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 113 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 2 */
-  counts[EXTRACT256(array,1)] += 1; /* 18 */
-  counts[EXTRACT256(array,2)] += 1; /* 34 */
-  counts[EXTRACT256(array,3)] += 1; /* 50 */
-  counts[EXTRACT256(array,4)] += 1; /* 66 */
-  counts[EXTRACT256(array,5)] += 1; /* 82 */
-  counts[EXTRACT256(array,6)] += 1; /* 98 */
-  counts[EXTRACT256(array,7)] += 1; /* 114 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 3 */
-  counts[EXTRACT256(array,1)] += 1; /* 19 */
-  counts[EXTRACT256(array,2)] += 1; /* 35 */
-  counts[EXTRACT256(array,3)] += 1; /* 51 */
-  counts[EXTRACT256(array,4)] += 1; /* 67 */
-  counts[EXTRACT256(array,5)] += 1; /* 83 */
-  counts[EXTRACT256(array,6)] += 1; /* 99 */
-  counts[EXTRACT256(array,7)] += 1; /* 115 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 4 */
-  counts[EXTRACT256(array,1)] += 1; /* 20 */
-  counts[EXTRACT256(array,2)] += 1; /* 36 */
-  counts[EXTRACT256(array,3)] += 1; /* 52 */
-  counts[EXTRACT256(array,4)] += 1; /* 68 */
-  counts[EXTRACT256(array,5)] += 1; /* 84 */
-  counts[EXTRACT256(array,6)] += 1; /* 100 */
-  counts[EXTRACT256(array,7)] += 1; /* 116 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 5 */
-  counts[EXTRACT256(array,1)] += 1; /* 21 */
-  counts[EXTRACT256(array,2)] += 1; /* 37 */
-  counts[EXTRACT256(array,3)] += 1; /* 53 */
-  counts[EXTRACT256(array,4)] += 1; /* 69 */
-  counts[EXTRACT256(array,5)] += 1; /* 85 */
-  counts[EXTRACT256(array,6)] += 1; /* 101 */
-  counts[EXTRACT256(array,7)] += 1; /* 117 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 6 */
-  counts[EXTRACT256(array,1)] += 1; /* 22 */
-  counts[EXTRACT256(array,2)] += 1; /* 38 */
-  counts[EXTRACT256(array,3)] += 1; /* 54 */
-  counts[EXTRACT256(array,4)] += 1; /* 70 */
-  counts[EXTRACT256(array,5)] += 1; /* 86 */
-  counts[EXTRACT256(array,6)] += 1; /* 102 */
-  counts[EXTRACT256(array,7)] += 1; /* 118 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 7 */
-  counts[EXTRACT256(array,1)] += 1; /* 23 */
-  counts[EXTRACT256(array,2)] += 1; /* 39 */
-  counts[EXTRACT256(array,3)] += 1; /* 55 */
-  counts[EXTRACT256(array,4)] += 1; /* 71 */
-  counts[EXTRACT256(array,5)] += 1; /* 87 */
-  counts[EXTRACT256(array,6)] += 1; /* 103 */
-  counts[EXTRACT256(array,7)] += 1; /* 119 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,16), _mm256_slli_epi32(current,16));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 8 */
-  counts[EXTRACT256(array,1)] += 1; /* 24 */
-  counts[EXTRACT256(array,2)] += 1; /* 40 */
-  counts[EXTRACT256(array,3)] += 1; /* 56 */
-  counts[EXTRACT256(array,4)] += 1; /* 72 */
-  counts[EXTRACT256(array,5)] += 1; /* 88 */
-  counts[EXTRACT256(array,6)] += 1; /* 104 */
-  counts[EXTRACT256(array,7)] += 1; /* 120 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 9 */
-  counts[EXTRACT256(array,1)] += 1; /* 25 */
-  counts[EXTRACT256(array,2)] += 1; /* 41 */
-  counts[EXTRACT256(array,3)] += 1; /* 57 */
-  counts[EXTRACT256(array,4)] += 1; /* 73 */
-  counts[EXTRACT256(array,5)] += 1; /* 89 */
-  counts[EXTRACT256(array,6)] += 1; /* 105 */
-  counts[EXTRACT256(array,7)] += 1; /* 121 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 10 */
-  counts[EXTRACT256(array,1)] += 1; /* 26 */
-  counts[EXTRACT256(array,2)] += 1; /* 42 */
-  counts[EXTRACT256(array,3)] += 1; /* 58 */
-  counts[EXTRACT256(array,4)] += 1; /* 74 */
-  counts[EXTRACT256(array,5)] += 1; /* 90 */
-  counts[EXTRACT256(array,6)] += 1; /* 106 */
-  counts[EXTRACT256(array,7)] += 1; /* 122 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 11 */
-  counts[EXTRACT256(array,1)] += 1; /* 27 */
-  counts[EXTRACT256(array,2)] += 1; /* 43 */
-  counts[EXTRACT256(array,3)] += 1; /* 59 */
-  counts[EXTRACT256(array,4)] += 1; /* 75 */
-  counts[EXTRACT256(array,5)] += 1; /* 91 */
-  counts[EXTRACT256(array,6)] += 1; /* 107 */
-  counts[EXTRACT256(array,7)] += 1; /* 123 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 12 */
-  counts[EXTRACT256(array,1)] += 1; /* 28 */
-  counts[EXTRACT256(array,2)] += 1; /* 44 */
-  counts[EXTRACT256(array,3)] += 1; /* 60 */
-  counts[EXTRACT256(array,4)] += 1; /* 76 */
-  counts[EXTRACT256(array,5)] += 1; /* 92 */
-  counts[EXTRACT256(array,6)] += 1; /* 108 */
-  counts[EXTRACT256(array,7)] += 1; /* 124 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 13 */
-  counts[EXTRACT256(array,1)] += 1; /* 29 */
-  counts[EXTRACT256(array,2)] += 1; /* 45 */
-  counts[EXTRACT256(array,3)] += 1; /* 61 */
-  counts[EXTRACT256(array,4)] += 1; /* 77 */
-  counts[EXTRACT256(array,5)] += 1; /* 93 */
-  counts[EXTRACT256(array,6)] += 1; /* 109 */
-  counts[EXTRACT256(array,7)] += 1; /* 125 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 14 */
-  counts[EXTRACT256(array,1)] += 1; /* 30 */
-  counts[EXTRACT256(array,2)] += 1; /* 46 */
-  counts[EXTRACT256(array,3)] += 1; /* 62 */
-  counts[EXTRACT256(array,4)] += 1; /* 78 */
-  counts[EXTRACT256(array,5)] += 1; /* 94 */
-  counts[EXTRACT256(array,6)] += 1; /* 110 */
-  counts[EXTRACT256(array,7)] += 1; /* 126 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask9);
-  counts[EXTRACT256(array,0)] += 1; /* 15 */
-  counts[EXTRACT256(array,1)] += 1; /* 31 */
-  counts[EXTRACT256(array,2)] += 1; /* 47 */
-  counts[EXTRACT256(array,3)] += 1; /* 63 */
-  counts[EXTRACT256(array,4)] += 1; /* 79 */
-  counts[EXTRACT256(array,5)] += 1; /* 95 */
-  counts[EXTRACT256(array,6)] += 1; /* 111 */
-  counts[EXTRACT256(array,7)] += 1; /* 127 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  return;
-}
-#endif
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-#if !defined(HAVE_AVX2)
 
-static int
-store_9mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-#ifdef INDIVIDUAL_SHIFTS
-#elif defined(SIMD_MASK_THEN_STORE)
-  UINT4 _masked[4] __attribute__ ((aligned (16)));
-  __m128i _oligo;
-#else
-  __m128i _oligo, _masked;
+  masked = low_rev >> 16;		/* 16, No mask necessary */
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
-  oligo = nexthigh_rev >> 16;	/* For 31..24 */
-  oligo |= low_rev << 16;
+  oligo = low_rev >> 18;		/* For 15..9 */
+  oligo |= high_rev << 14;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK9; /* 31 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
-
-  masked = (oligo >> 2) & MASK9; /* 30 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
+  masked = oligo & MASK8; /* 15 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 4) & MASK9; /* 29 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
+  masked = (oligo >> 2) & MASK8; /* 14 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 6) & MASK9; /* 28 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
+  masked = (oligo >> 4) & MASK8; /* 13 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 8) & MASK9; /* 27 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
+  masked = (oligo >> 6) & MASK8; /* 12 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 10) & MASK9; /* 26 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
+  masked = (oligo >> 8) & MASK8; /* 11 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 12) & MASK9; /* 25 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
+  masked = (oligo >> 10) & MASK8; /* 10 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 14) & MASK9; /* 24 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
+  masked = (oligo >> 12) & MASK8; /* 9 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  _masked = _mm_and_si128(_oligo, mask9);
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
 
   masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  _masked = _mm_and_si128(_oligo, mask9);
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
 
   masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK9;	/* 23 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
-
-  masked = (low_rev >> 2) & MASK9;	/* 22 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
+  masked = high_rev & MASK8;		/* 8 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 4) & MASK9;	/* 21 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
+  masked = (high_rev >> 2) & MASK8;	/* 7 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 6) & MASK9;	/* 20 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
+  masked = (high_rev >> 4) & MASK8;	/* 6 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 8) & MASK9; /* 19 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
+  masked = (high_rev >> 6) & MASK8;	/* 5 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+ 
+  masked = (high_rev >> 8) & MASK8;	/* 4 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 10) & MASK9; /* 18 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
+  masked = (high_rev >> 10) & MASK8;	/* 3 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 12) & MASK9; /* 17 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
+  masked = (high_rev >> 12) & MASK8;	/* 2 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
-  masked = low_rev >> 14;		/* 16, No mask necessary */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
+  masked = (high_rev >> 14) & MASK8;	/* 1 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
+  masked = high_rev >> 16;		/* 0, No mask necessary */
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
+  
 #else
-  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  _masked = _mm_and_si128(_oligo, mask9);
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
 
   masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  _masked = _mm_and_si128(_oligo, mask9);
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
 
   masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
-#endif
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
 
-  oligo = low_rev >> 16;		/* For 15..8 */
-  oligo |= high_rev << 16;
+  masked = high_rev >> 16;		/* 0, No mask necessary */
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
+#endif
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK9; /* 15 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
+  return;
+}
 
-  masked = (oligo >> 2) & MASK9; /* 14 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
+#else	/* HAVE_AVX2 */
 
-  masked = (oligo >> 4) & MASK9; /* 13 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
+static void
+count_8mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+  __m256i _oligo, _masked;
 
-  masked = (oligo >> 6) & MASK9; /* 12 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
 
-  masked = (oligo >> 8) & MASK9; /* 11 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
+  oligo = nexthigh_rev >> 18;	/* For 31..25 */
+  oligo |= low_rev << 14;
 
-  masked = (oligo >> 10) & MASK9; /* 10 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask8);
 
-  masked = (oligo >> 12) & MASK9; /* 9 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 14) & MASK9; /* 9 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
-#else
-  _masked = _mm_and_si128(_oligo, mask9);
-#endif
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
-#else
-  _masked = _mm_and_si128(_oligo, mask9);
-#endif
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask8);
 
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
-#endif
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK9;		/* 7 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 2) & MASK9;	/* 6 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 4) & MASK9;	/* 5 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
-  }
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 6) & MASK9;	/* 4 */
+
+  masked = low_rev >> 16;		/* 16, No mask necessary */
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+
+  oligo = low_rev >> 18;		/* For 15..9 */
+  oligo |= high_rev << 14;
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask8);
+
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask8);
+
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+
+  masked = high_rev >> 16;		/* 0, No mask necessary */
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
+
+  return;
+}
+
+#endif  /* HAVE_AVX2 */
+
+
+
+/* Expecting current to have {high0_rev, low0_rev, high1_rev,
+   low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
+   high2_rev} */
+#ifdef HAVE_SSE2
+static void
+extract_8mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) {
+  __m128i oligo;
+
+  _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
+  _mm_store_si128(out++, _mm_and_si128( current, mask8));
+
+  oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask8));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_8
+static Chrpos_T
+store_8mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_8mers_fwd_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_8mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_8mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m128i _u0, _u1, _u2, _u3;
+
+  out = &(array[0]);
+
+  /* As a special case, 8_mers don't need to be masked, since they fill each 16-mer */
+
+  oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14));
+  /* _row0 = _mm_and_si128( oligo, mask8); */
+  /* _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask8); */
+  _t0 = _mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55);
+
+  /* _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask8); */
+  /* _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask8); */
+  _t1 = _mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo,4), 0x55);
+
+  /* _row4 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask8); */
+  /* _row5 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask8); */
+  _t2 = _mm_blend_epi16(_mm_slli_epi32(oligo,6), _mm_srli_epi32(oligo,8), 0x55);
+
+
+  /* _row6 = _mm_and_si128( _mm_srli_epi32(oligo,12), mask8);*/
+  /* _row7 = _mm_and_si128( current, mask8); */
+  _t3 = _mm_blend_epi16(_mm_slli_epi32(current,16), _mm_srli_epi32(oligo,12), 0x55);
+
+  /* _row8 = _mm_and_si128( _mm_srli_epi32(current,2), mask8); */
+  /* _row9 = _mm_and_si128( _mm_srli_epi32(current,4), mask8); */
+  _t4 = _mm_blend_epi16(_mm_slli_epi32(current,12), _mm_srli_epi32(current,2), 0x55);
+
+  /* _row10 = _mm_and_si128( _mm_srli_epi32(current,6), mask8); */
+  /* _row11 = _mm_and_si128( _mm_srli_epi32(current,8), mask8); */
+  _t5 = _mm_blend_epi16(_mm_slli_epi32(current,8), _mm_srli_epi32(current,6), 0x55);
+
+  /* _row12 = _mm_and_si128( _mm_srli_epi32(current,10), mask8); */
+  /* _row13 = _mm_and_si128( _mm_srli_epi32(current,12), mask8); */
+  _t6 = _mm_blend_epi16(_mm_slli_epi32(current,4), _mm_srli_epi32(current,10), 0x55);
+
+  /* _row14 = _mm_and_si128( _mm_srli_epi32(current,14), mask8); */
+  /* _row15 = _mm_srli_epi32(current,16); */ /* No mask necessary */
+  _t7 = _mm_blend_epi16(current, _mm_srli_epi32(current,14), 0x55);
+
+
+  /* Split: top half */
+  _u0 = _mm_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm_unpackhi_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  /* Split: bottom half */
+  _u0 = _mm_unpacklo_epi32(_t0,_t1);
+  _u1 = _mm_unpacklo_epi32(_t2,_t3);
+  _u2 = _mm_unpacklo_epi32(_t4,_t5);
+  _u3 = _mm_unpacklo_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+#ifdef HAVE_AVX2
+static void
+extract_8mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
+  __m256i oligo;
+
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,16)); /* No mask necessary */
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask8));
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,18), _mm256_slli_epi32(current,14));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask8));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_8
+static Chrpos_T
+store_8mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_8mers_fwd_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_8mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_8mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  /* As a special case, 8_mers don't need to be masked, since they fill each 16-mer */
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,18), _mm256_slli_epi32(current,14));
+  /* _row0 = _mm256_and_si256( oligo, bigmask8); */
+  /* _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8); */
+  _t0 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55);
+
+  /* _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8); */
+  /* _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8); */
+  _t1 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55);
+
+  /* _row4 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8); */
+  /* _row5 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8); */
+  _t2 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,6), _mm256_srli_epi32(oligo,8), 0x55);
+
+  /* _row6 = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8); */
+  /* _row7 = _mm256_and_si256( current, bigmask8); */
+  _t3 = _mm256_blend_epi16(_mm256_slli_epi32(current,16), _mm256_srli_epi32(oligo,12), 0x55);
+
+  /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8); */
+  /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8); */
+  _t4 = _mm256_blend_epi16(_mm256_slli_epi32(current,12), _mm256_srli_epi32(current,2), 0x55);
+
+  /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8); */
+  /* _row11 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8); */
+  _t5 = _mm256_blend_epi16(_mm256_slli_epi32(current,8), _mm256_srli_epi32(current,6), 0x55);
+
+  /* _row12 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8); */
+  /* _row13 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8); */
+  _t6 = _mm256_blend_epi16(_mm256_slli_epi32(current,4), _mm256_srli_epi32(current,10), 0x55);
+
+  /* _row14 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8); */
+  /* _row15 = _mm256_srli_epi32(current,16); */ /* No mask necessary */
+  _t7 = _mm256_blend_epi16(current, _mm256_srli_epi32(current,14), 0x55);
+
+
+  _u0 = _mm256_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi32(_t6,_t7);
+
+
+  _t0 = _mm256_unpackhi_epi64(_u0,_u1);
+  _t1 = _mm256_unpackhi_epi64(_u2,_u3);
+  _t2 = _mm256_unpacklo_epi64(_u0,_u1);
+  _t3 = _mm256_unpacklo_epi64(_u2,_u3);
+  _t4 = _mm256_unpackhi_epi64(_u4,_u5);
+  _t5 = _mm256_unpackhi_epi64(_u6,_u7);
+  _t6 = _mm256_unpacklo_epi64(_u4,_u5);
+  _t7 = _mm256_unpacklo_epi64(_u6,_u7);
+
+
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0)));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+#ifdef HAVE_AVX512
+static void
+extract_8mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,16)); /* No mask necessary */
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask8));
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,18), _mm512_slli_epi32(current,14));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask8));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_8
+static Chrpos_T
+store_8mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_8mers_fwd_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_8mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_8mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,18), _mm512_slli_epi32(current,14));
+  _u0 = _mm512_and_si512( oligo, hugemask8);
+  /* _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask8);
+  _t0 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask8);
+  /* _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask8); */
+  _u1 = _mm512_and_si512(_mm512_slli_epi32(oligo,10), highmask8);
+  _t1 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask8);
+  /* _row5 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,6), highmask8);
+  _t2 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask8);
+  /* _row7 = _mm512_and_si512( current, hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,16), highmask8);
+  _t3 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask8);
+  /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,12), highmask8);
+  _t4 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask8);
+  /* _row11 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,8), highmask8);
+  _t5 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask8);
+  /* _row13 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask8); */
+  _u1 = _mm512_and_si512(_mm512_slli_epi32(current,4), highmask8);
+  _t6 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask8);
+  /* _row15 = _mm512_srli_epi32(current,16); */ /* No mask necessary */
+  _u1 = _mm512_and_si512(current, highmask8);
+  _t7 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi32(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+
+#if !defined(HAVE_AVX2)
+
+static int
+store_8mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+  UINT4 _masked[4] __attribute__ ((aligned (16)));
+  __m128i _oligo;
+#else
+  __m128i _oligo, _masked;
+#endif
+
+
+  oligo = nexthigh_rev >> 18;	/* For 31..25 */
+  oligo |= low_rev << 14;
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK8; /* 31 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
-  masked = (high_rev >> 8) & MASK9;	/* 3 */
+  masked = (oligo >> 2) & MASK8; /* 30 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
-  masked = (high_rev >> 10) & MASK9;	/* 2 */
+  masked = (oligo >> 4) & MASK8; /* 29 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
-  masked = (high_rev >> 12) & MASK9;	/* 1 */
+  masked = (oligo >> 6) & MASK8; /* 28 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
-  masked = high_rev >> 14;		/* 0, No mask necessary */
+  masked = (oligo >> 8) & MASK8; /* 27 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
+  }
+
+  masked = (oligo >> 10) & MASK8; /* 26 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
+  }
+
+  masked = (oligo >> 12) & MASK8; /* 25 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
 #else
-  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  _masked = _mm_and_si128(_oligo, mask9);
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
 #else
-  _masked = _mm_and_si128(_oligo, mask9);
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
+#endif
 
-  masked = EXTRACT(_masked,3);
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = low_rev & MASK8;	/* 24 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
-#endif
-
-  return chrpos - 32;
-}
 
-#else  /* HAVE_AVX2 */
+  masked = (low_rev >> 2) & MASK8;	/* 23 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
+  }
 
-static int
-store_9mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-  __m256i _oligo, _masked, _counts;
-
-
-  oligo = nexthigh_rev >> 16;	/* For 31..24 */
-  oligo |= low_rev << 16;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+  masked = (low_rev >> 4) & MASK8;	/* 22 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+  masked = (low_rev >> 6) & MASK8;	/* 21 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+  masked = (low_rev >> 8) & MASK8;	/* 20 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+  masked = (low_rev >> 10) & MASK8; /* 19 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+  masked = (low_rev >> 12) & MASK8; /* 18 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+  masked = (low_rev >> 14) & MASK8; /* 17 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+  masked = low_rev >> 16;		/* 16, No mask necessary */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
+#else
+  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
+  _masked = _mm_and_si128(_oligo, mask8);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
+  _masked = _mm_and_si128(_oligo, mask8);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
+
+  masked = low_rev >> 16;		/* 16, No mask necessary */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
+#endif
 
 
-  oligo = low_rev >> 16;		/* For 15..8 */
-  oligo |= high_rev << 16;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  oligo = low_rev >> 18;		/* For 9..15 */
+  oligo |= high_rev << 14;
 
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK8; /* 15 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
+  masked = (oligo >> 2) & MASK8; /* 14 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
+  masked = (oligo >> 4) & MASK8; /* 13 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
+  masked = (oligo >> 6) & MASK8; /* 12 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
+  masked = (oligo >> 8) & MASK8; /* 11 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
+  masked = (oligo >> 10) & MASK8; /* 10 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
+  masked = (oligo >> 12) & MASK8; /* 9 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+#else
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
+  _masked = _mm_and_si128(_oligo, mask8);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
+  }
+
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
+  }
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
+  }
 
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
+  _masked = _mm_and_si128(_oligo, mask8);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
+  }
+
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
+  }
+
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
+  }
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = high_rev & MASK8;		/* 8 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
+  }
+
+  masked = (high_rev >> 2) & MASK8;	/* 7 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rev >> 4) & MASK8;	/* 6 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rev >> 6) & MASK8;	/* 5 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rev >> 8) & MASK8;	/* 4 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rev >> 10) & MASK8;	/* 3 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rev >> 12) & MASK8;	/* 2 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rev >> 14) & MASK8;	/* 1 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
+  masked = high_rev >> 16;		/* 0, No mask necessary */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 
-  return chrpos - 32;
-}
-
-#endif	/* HAVE_AVX2 */
-
-
-#if !defined(HAVE_AVX2)
-
-static void
-count_8mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-#ifdef INDIVIDUAL_SHIFTS
-#elif defined(SIMD_MASK_THEN_STORE)
-  UINT4 _masked[4] __attribute__ ((aligned (16)));
-  __m128i _oligo;
 #else
-  __m128i _oligo, _masked;
-#endif
-
-
-  oligo = nexthigh_rev >> 18;	/* For 31..25 */
-  oligo |= low_rev << 14;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK8; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 2) & MASK8; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 4) & MASK8; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 6) & MASK8; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 8) & MASK8; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 10) & MASK8; /* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 12) & MASK8; /* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
-#else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+#else
+  _masked = _mm_and_si128(_oligo, mask8);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
+  }
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
+  }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
@@ -15295,2177 +13586,1322 @@ count_8mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
+  }
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
+  }
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
-#endif
-
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
+  }
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK8;	/* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
+  }
 
-  masked = (low_rev >> 2) & MASK8;	/* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  masked = high_rev >> 16;		/* 0, No mask necessary */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
+  }
+#endif
 
-  masked = (low_rev >> 4) & MASK8;	/* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  return chrpos - 32;
+}
 
-  masked = (low_rev >> 6) & MASK8;	/* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+#else	/* HAVE_AVX2 */
 
-  masked = (low_rev >> 8) & MASK8;	/* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+static int
+store_8mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+  __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
 
-  masked = (low_rev >> 10) & MASK8; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 12) & MASK8; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
 
-  masked = (low_rev >> 14) & MASK8; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = low_rev >> 16;		/* 16, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
-  
-#else
-  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
-#else
-  _masked = _mm_and_si128(_oligo, mask8);
-#endif
+  oligo = nexthigh_rev >> 18;	/* For 31..25 */
+  oligo |= low_rev << 14;
 
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask8);
 
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
+    }
+  }
 
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
+  }
 
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
+  }
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
-#else
-  _masked = _mm_and_si128(_oligo, mask8);
-#endif
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
+  }
 
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }
+  }
 
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
+  }
 
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
+  }
 
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask8);
 
-  masked = low_rev >> 16;		/* 16, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
-#endif
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
+  }
 
-  oligo = low_rev >> 18;		/* For 15..9 */
-  oligo |= high_rev << 14;
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
+    }
+  }
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK8; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
+  }
 
-  masked = (oligo >> 2) & MASK8; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
+  }
 
-  masked = (oligo >> 4) & MASK8; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
+  }
 
-  masked = (oligo >> 6) & MASK8; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }
+  }
 
-  masked = (oligo >> 8) & MASK8; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
+  }
 
-  masked = (oligo >> 10) & MASK8; /* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
+  }
 
-  masked = (oligo >> 12) & MASK8; /* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
-#else
-  _masked = _mm_and_si128(_oligo, mask8);
-#endif
+  masked = low_rev >> 16;		/* 16, No mask necessary */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
+  }
 
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
+  oligo = low_rev >> 18;		/* For 9..15 */
+  oligo |= high_rev << 14;
 
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask8);
 
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
-#else
-  _masked = _mm_and_si128(_oligo, mask8);
-#endif
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
+  }
 
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
+  }
 
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
+  }
 
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
-#endif
-
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK8;		/* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
+  }
 
-  masked = (high_rev >> 2) & MASK8;	/* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }
+  }
 
-  masked = (high_rev >> 4) & MASK8;	/* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }
+  }
 
-  masked = (high_rev >> 6) & MASK8;	/* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
- 
-  masked = (high_rev >> 8) & MASK8;	/* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
+  }
 
-  masked = (high_rev >> 10) & MASK8;	/* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 12) & MASK8;	/* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask8);
 
-  masked = (high_rev >> 14) & MASK8;	/* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-  masked = high_rev >> 16;		/* 0, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
-  
-#else
-  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
-#else
-  _masked = _mm_and_si128(_oligo, mask8);
-#endif
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
+  }
 
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
+  }
 
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
+  }
 
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
+    }
+  }
 
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
+  }
 
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
+    }
+  }
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
-#else
-  _masked = _mm_and_si128(_oligo, mask8);
-#endif
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6); 
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
+  }
 
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
+  }
 
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+  masked = high_rev >> 16;		/* 0, No mask necessary */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
+  }
 
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+  return chrpos - 32;
+}
 
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+#endif  /* HAVE_AVX2 */
 
 
-  masked = high_rev >> 16;		/* 0, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
-#endif
 
-  return;
-}
 
-#else	/* HAVE_AVX2 */
+#if !defined(HAVE_AVX2)
 
 static void
-count_8mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+count_7mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
-  __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+  UINT4 _masked[4] __attribute__ ((aligned (16)));
+  __m128i _oligo;
+#else
+  __m128i _oligo, _masked;
 #endif
 
 
-  oligo = nexthigh_rev >> 18;	/* For 31..25 */
-  oligo |= low_rev << 14;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask8);
+  oligo = nexthigh_rev >> 20;	/* For 31..26 */
+  oligo |= low_rev << 12;
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK7; /* 31 */
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  masked = (oligo >> 2) & MASK7; /* 30 */
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  masked = (oligo >> 4) & MASK7; /* 29 */
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  masked = (oligo >> 6) & MASK7; /* 28 */
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  masked = (oligo >> 8) & MASK7; /* 27 */
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  masked = (oligo >> 10) & MASK7; /* 26 */
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+#else
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low7);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask8);
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
+
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = low_rev & MASK7;	/* 25 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+
+  masked = (low_rev >> 2) & MASK7;	/* 24 */
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  masked = (low_rev >> 4) & MASK7;	/* 23 */
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  masked = (low_rev >> 6) & MASK7;	/* 22 */
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  masked = (low_rev >> 8) & MASK7;	/* 21 */
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  masked = (low_rev >> 10) & MASK7;	/* 20 */
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  masked = (low_rev >> 12) & MASK7; /* 19 */
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  masked = (low_rev >> 14) & MASK7; /* 18 */
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  masked = (low_rev >> 16) & MASK7; /* 17 */
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-  masked = low_rev >> 16;		/* 16, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = low_rev >> 18;		/* 16, No mask necessary */
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
+#else
+  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
 
-  oligo = low_rev >> 18;		/* For 15..9 */
-  oligo |= high_rev << 14;
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask8);
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+  oligo = low_rev >> 20;	/* For 15..10 */
+  oligo |= high_rev << 12;
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK7; /* 15 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+  masked = (oligo >> 2) & MASK7; /* 14 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
+
+  masked = (oligo >> 4) & MASK7; /* 13 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
+
+  masked = (oligo >> 6) & MASK7; /* 12 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+  masked = (oligo >> 8) & MASK7; /* 11 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+  masked = (oligo >> 10) & MASK7; /* 10 */
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+#else
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low7);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask8);
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
+
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = high_rev & MASK7;	/* 9 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+  masked = (high_rev >> 2) & MASK7; /* 8 */
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  masked = (high_rev >> 4) & MASK7;	/* 7 */
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  masked = (high_rev >> 6) & MASK7;	/* 6 */
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  masked = (high_rev >> 8) & MASK7;	/* 5 */
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  masked = (high_rev >> 10) & MASK7;	/* 4 */
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  masked = (high_rev >> 12) & MASK7;	/* 3 */
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  masked = (high_rev >> 14) & MASK7;	/* 2 */
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  masked = (high_rev >> 16) & MASK7;	/* 1 */
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-  masked = high_rev >> 16;		/* 0, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = high_rev >> 18;		/* 0, No mask necessary */
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
-  return;
-}
+#else
+  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
 
-#endif  /* HAVE_AVX2 */
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-/* Expecting current to have {high0_rev, low0_rev, high1_rev,
-   low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
-   high2_rev} */
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_8mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
-  __m128i oligo;
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-  _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
-  _mm_store_si128(out++, _mm_and_si128( current, mask8));
 
-  oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask8));
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
 
-  return;
-}
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_AVX2
-static void
-extract_8mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,16)); /* No mask necessary */
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask8));
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,18), _mm256_slli_epi32(current,14));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask8));
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
+#endif
 
   return;
 }
-#endif
 
+#else	/* HAVE_AVX2 */
 
 static void
-count_8mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
-#else
-  Genomecomp_T array[4];
-#endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
-#endif
+count_7mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+  __m256i _oligo, _masked;
 
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,16); /* No mask necessary */
-#else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,16)); /* No mask necessary */
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("16 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("32 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("48 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */
-#endif
-  debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  oligo = nexthigh_rev >> 20;	/* For 31..26 */
+  oligo |= low_rev << 12;
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,14), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("17 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("33 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("49 %04X => %d\n",array[3],counts[array[3]]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */
-#endif
-  debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("18 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("34 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("50 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */
-#endif
-  debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("19 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("35 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("51 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */
-#endif
-  debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("20 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("36 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("52 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */
-#endif
-  debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("21 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("37 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("53 %04X => %d\n",array[3],counts[array[3]]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */
-#endif
-  debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("22 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("38 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("54 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */
-#endif
-  debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("23 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("39 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("55 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */
-#endif
-  debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("24 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("40 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("56 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */
-#endif
-  debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,12), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("25 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("41 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("57 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */
-#endif
-  debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("26 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("42 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("58 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */
-#endif
-  debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("27 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("43 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("59 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */
-#endif
-  debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("28 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("44 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("60 %04X => %d\n",array[3],counts[array[3]]));
+  oligo = low_rev >> 20;	/* For 15..10 */
+  oligo |= high_rev << 12;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */
-#endif
-  debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("29 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("45 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("61 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */
-#endif
-  debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("30 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("46 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("62 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */
-#endif
-  debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("31 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("47 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("63 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */
-#endif
-  debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
+
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+
+
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
+
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   return;
 }
-#endif
 
+#endif  /* HAVE_AVX2 */
 
-#ifdef HAVE_AVX2
+
+
+/* Expecting current to have {high0_rev, low0_rev, high1_rev,
+   low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
+   high2_rev} */
+#ifdef HAVE_SSE2
+static void
+extract_7mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) {
+  __m128i oligo;
+
+  _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
+  _mm_store_si128(out++, _mm_and_si128( current, mask7));
+
+  oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask7));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_7
+static Chrpos_T
+store_7mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_7mers_fwd_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_7mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_7mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m128i _u0, _u1, _u2, _u3;
+
+  out = &(array[0]);
+
+  oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12));
+  /* _row0 = _mm_and_si128( oligo, mask7); */
+  /* _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask7); */
+  _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55), mask7_epi16);
+
+  /* _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask7); */
+  /* _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask7); */
+  _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask7_epi16);
+
+  /* _row4 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask7); */
+  /* _row5 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask7); */
+  _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,6), _mm_srli_epi32(oligo, 8), 0x55), mask7_epi16);
+
+
+  /* _row6 = _mm_and_si128( current, mask7); */
+  /* _row7 = _mm_and_si128( _mm_srli_epi32(current,2), mask7); */
+  _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask7_epi16);
+
+  /* _row8 = _mm_and_si128( _mm_srli_epi32(current,4), mask7); */
+  /* _row9 = _mm_and_si128( _mm_srli_epi32(current,6), mask7); */
+  _t4 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask7_epi16);
+
+  /* _row10 = _mm_and_si128( _mm_srli_epi32(current,8), mask7); */
+  /* _row11 = _mm_and_si128( _mm_srli_epi32(current,10), mask7); */
+  _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask7_epi16);
+
+  /* _row12 = _mm_and_si128( _mm_srli_epi32(current,12), mask7); */
+  /* _row13 = _mm_and_si128( _mm_srli_epi32(current,14), mask7); */
+  _t6 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask7_epi16);
+  
+  /* _row14 = _mm_and_si128( _mm_srli_epi32(current,16), mask7); */
+  /* _row15 = _mm_srli_epi32(current,18); */ /* No mask necessary */
+  _t7 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask7_epi16);
+
+
+  /* Split: top half */
+  _u0 = _mm_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm_unpackhi_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  /* Split: bottom half */
+  _u0 = _mm_unpacklo_epi32(_t0,_t1);
+  _u1 = _mm_unpacklo_epi32(_t2,_t3);
+  _u2 = _mm_unpacklo_epi32(_t4,_t5);
+  _u3 = _mm_unpacklo_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+#ifdef HAVE_AVX2
 static void
-count_8mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
+extract_7mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
   __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
-  array = _mm256_srli_epi32(current,16); /* No mask necessary */
-  counts[EXTRACT256(array,0)] += 1;	 /* 0 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 16 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 32 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 48 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 64 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 80 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 96 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 112 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;	 /* 1 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 17 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 33 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 49 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 65 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 81 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 97 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 113 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 2 */
-  counts[EXTRACT256(array,1)] += 1; /* 18 */
-  counts[EXTRACT256(array,2)] += 1; /* 34 */
-  counts[EXTRACT256(array,3)] += 1; /* 50 */
-  counts[EXTRACT256(array,4)] += 1; /* 66 */
-  counts[EXTRACT256(array,5)] += 1; /* 82 */
-  counts[EXTRACT256(array,6)] += 1; /* 98 */
-  counts[EXTRACT256(array,7)] += 1; /* 114 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 3 */
-  counts[EXTRACT256(array,1)] += 1; /* 19 */
-  counts[EXTRACT256(array,2)] += 1; /* 35 */
-  counts[EXTRACT256(array,3)] += 1; /* 51 */
-  counts[EXTRACT256(array,4)] += 1; /* 67 */
-  counts[EXTRACT256(array,5)] += 1; /* 83 */
-  counts[EXTRACT256(array,6)] += 1; /* 99 */
-  counts[EXTRACT256(array,7)] += 1; /* 115 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 4 */
-  counts[EXTRACT256(array,1)] += 1; /* 20 */
-  counts[EXTRACT256(array,2)] += 1; /* 36 */
-  counts[EXTRACT256(array,3)] += 1; /* 52 */
-  counts[EXTRACT256(array,4)] += 1; /* 68 */
-  counts[EXTRACT256(array,5)] += 1; /* 84 */
-  counts[EXTRACT256(array,6)] += 1; /* 100 */
-  counts[EXTRACT256(array,7)] += 1; /* 116 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 5 */
-  counts[EXTRACT256(array,1)] += 1; /* 21 */
-  counts[EXTRACT256(array,2)] += 1; /* 37 */
-  counts[EXTRACT256(array,3)] += 1; /* 53 */
-  counts[EXTRACT256(array,4)] += 1; /* 69 */
-  counts[EXTRACT256(array,5)] += 1; /* 85 */
-  counts[EXTRACT256(array,6)] += 1; /* 101 */
-  counts[EXTRACT256(array,7)] += 1; /* 117 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 6 */
-  counts[EXTRACT256(array,1)] += 1; /* 22 */
-  counts[EXTRACT256(array,2)] += 1; /* 38 */
-  counts[EXTRACT256(array,3)] += 1; /* 54 */
-  counts[EXTRACT256(array,4)] += 1; /* 70 */
-  counts[EXTRACT256(array,5)] += 1; /* 86 */
-  counts[EXTRACT256(array,6)] += 1; /* 102 */
-  counts[EXTRACT256(array,7)] += 1; /* 118 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 7 */
-  counts[EXTRACT256(array,1)] += 1; /* 23 */
-  counts[EXTRACT256(array,2)] += 1; /* 39 */
-  counts[EXTRACT256(array,3)] += 1; /* 55 */
-  counts[EXTRACT256(array,4)] += 1; /* 71 */
-  counts[EXTRACT256(array,5)] += 1; /* 87 */
-  counts[EXTRACT256(array,6)] += 1; /* 103 */
-  counts[EXTRACT256(array,7)] += 1; /* 119 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 8 */
-  counts[EXTRACT256(array,1)] += 1; /* 24 */
-  counts[EXTRACT256(array,2)] += 1; /* 40 */
-  counts[EXTRACT256(array,3)] += 1; /* 56 */
-  counts[EXTRACT256(array,4)] += 1; /* 72 */
-  counts[EXTRACT256(array,5)] += 1; /* 88 */
-  counts[EXTRACT256(array,6)] += 1; /* 104 */
-  counts[EXTRACT256(array,7)] += 1; /* 120 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,18), _mm256_slli_epi32(current,14));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 9 */
-  counts[EXTRACT256(array,1)] += 1; /* 25 */
-  counts[EXTRACT256(array,2)] += 1; /* 41 */
-  counts[EXTRACT256(array,3)] += 1; /* 57 */
-  counts[EXTRACT256(array,4)] += 1; /* 73 */
-  counts[EXTRACT256(array,5)] += 1; /* 89 */
-  counts[EXTRACT256(array,6)] += 1; /* 105 */
-  counts[EXTRACT256(array,7)] += 1; /* 121 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 10 */
-  counts[EXTRACT256(array,1)] += 1; /* 26 */
-  counts[EXTRACT256(array,2)] += 1; /* 42 */
-  counts[EXTRACT256(array,3)] += 1; /* 58 */
-  counts[EXTRACT256(array,4)] += 1; /* 74 */
-  counts[EXTRACT256(array,5)] += 1; /* 90 */
-  counts[EXTRACT256(array,6)] += 1; /* 106 */
-  counts[EXTRACT256(array,7)] += 1; /* 122 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 11 */
-  counts[EXTRACT256(array,1)] += 1; /* 27 */
-  counts[EXTRACT256(array,2)] += 1; /* 43 */
-  counts[EXTRACT256(array,3)] += 1; /* 59 */
-  counts[EXTRACT256(array,4)] += 1; /* 75 */
-  counts[EXTRACT256(array,5)] += 1; /* 91 */
-  counts[EXTRACT256(array,6)] += 1; /* 107 */
-  counts[EXTRACT256(array,7)] += 1; /* 123 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 12 */
-  counts[EXTRACT256(array,1)] += 1; /* 28 */
-  counts[EXTRACT256(array,2)] += 1; /* 44 */
-  counts[EXTRACT256(array,3)] += 1; /* 60 */
-  counts[EXTRACT256(array,4)] += 1; /* 76 */
-  counts[EXTRACT256(array,5)] += 1; /* 92 */
-  counts[EXTRACT256(array,6)] += 1; /* 108 */
-  counts[EXTRACT256(array,7)] += 1; /* 124 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 13 */
-  counts[EXTRACT256(array,1)] += 1; /* 29 */
-  counts[EXTRACT256(array,2)] += 1; /* 45 */
-  counts[EXTRACT256(array,3)] += 1; /* 61 */
-  counts[EXTRACT256(array,4)] += 1; /* 77 */
-  counts[EXTRACT256(array,5)] += 1; /* 93 */
-  counts[EXTRACT256(array,6)] += 1; /* 109 */
-  counts[EXTRACT256(array,7)] += 1; /* 125 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 14 */
-  counts[EXTRACT256(array,1)] += 1; /* 30 */
-  counts[EXTRACT256(array,2)] += 1; /* 46 */
-  counts[EXTRACT256(array,3)] += 1; /* 62 */
-  counts[EXTRACT256(array,4)] += 1; /* 78 */
-  counts[EXTRACT256(array,5)] += 1; /* 94 */
-  counts[EXTRACT256(array,6)] += 1; /* 110 */
-  counts[EXTRACT256(array,7)] += 1; /* 126 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask8);
-  counts[EXTRACT256(array,0)] += 1; /* 15 */
-  counts[EXTRACT256(array,1)] += 1; /* 31 */
-  counts[EXTRACT256(array,2)] += 1; /* 47 */
-  counts[EXTRACT256(array,3)] += 1; /* 63 */
-  counts[EXTRACT256(array,4)] += 1; /* 79 */
-  counts[EXTRACT256(array,5)] += 1; /* 95 */
-  counts[EXTRACT256(array,6)] += 1; /* 111 */
-  counts[EXTRACT256(array,7)] += 1; /* 127 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,18)); /* No mask necessary */
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask7));
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,20), _mm256_slli_epi32(current,12));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask7));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_7
+static Chrpos_T
+store_7mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_7mers_fwd_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_7mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_7mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,20), _mm256_slli_epi32(current,12));
+  /* _row0 = _mm256_and_si256( oligo, bigmask7); */
+  /* _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7); */
+  _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask7_epi16);
+
+  /* _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7); */
+  /* _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7); */
+  _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask7_epi16);
+
+  /* _row4 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7); */
+  /* _row5 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7); */
+  _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,6), _mm256_srli_epi32(oligo,8), 0x55), bigmask7_epi16);
+
+
+  /* _row6 = _mm256_and_si256( current, bigmask7); */
+  /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7); */
+  _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask7_epi16);
+
+  /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7); */
+  /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7); */
+  _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask7_epi16);
+
+  /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7); */
+  /* _row11 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7); */
+  _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask7_epi16);
+
+  /* _row12 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7); */
+  /* _row13 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7); */
+  _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask7_epi16);
+
+  /* _row14 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7); */
+  /* _row15 = _mm256_srli_epi32(current,18)); */ /* No mask necessary */
+  _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask7_epi16);
+
+
+  _u0 = _mm256_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi32(_t6,_t7);
+
+
+  _t0 = _mm256_unpackhi_epi64(_u0,_u1);
+  _t1 = _mm256_unpackhi_epi64(_u2,_u3);
+  _t2 = _mm256_unpacklo_epi64(_u0,_u1);
+  _t3 = _mm256_unpacklo_epi64(_u2,_u3);
+  _t4 = _mm256_unpackhi_epi64(_u4,_u5);
+  _t5 = _mm256_unpackhi_epi64(_u6,_u7);
+  _t6 = _mm256_unpacklo_epi64(_u4,_u5);
+  _t7 = _mm256_unpacklo_epi64(_u6,_u7);
+
+
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0)));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+#ifdef HAVE_AVX512
+static void
+extract_7mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,18)); /* No mask necessary */
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask7));
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,20), _mm512_slli_epi32(current,12));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask7));
 
   return;
 }
+
+#ifdef USE_UNORDERED_7
+static Chrpos_T
+store_7mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_7mers_fwd_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_7mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_7mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,20), _mm512_slli_epi32(current,12));
+  _u0 = _mm512_and_si512( oligo, hugemask7);
+  /* _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask7);
+  _t0 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask7);
+  /* _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask7);
+  _t1 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask7);
+  /* _row5 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,6), highmask7);
+  _t2 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_and_si512( current, hugemask7);
+  /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask7);
+  _t3 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask7);
+  /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask7);
+  _t4 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask7);
+  /* _row11 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask7);
+  _t5 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask7);
+  /* _row13 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask7);
+  _t6 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask7);
+  /* _row15 = _mm512_srli_epi32(current,18)); */ /* No mask necessary */
+  _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask7);
+  _t7 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi32(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
 #endif
+#endif
+
 
 
 #if !defined(HAVE_AVX2)
 
 static int
-store_8mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+store_7mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -17475,534 +14911,485 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   __m128i _oligo, _masked;
 #endif
 
-
-  oligo = nexthigh_rev >> 18;	/* For 31..25 */
-  oligo |= low_rev << 14;
+  
+  oligo = nexthigh_rev >> 20;	/* For 31..26 */
+  oligo |= low_rev << 12;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK8; /* 31 */
+  masked = oligo & MASK7; /* 31 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
-  masked = (oligo >> 2) & MASK8; /* 30 */
+  masked = (oligo >> 2) & MASK7; /* 30 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
-  masked = (oligo >> 4) & MASK8; /* 29 */
+  masked = (oligo >> 4) & MASK7; /* 29 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
-  masked = (oligo >> 6) & MASK8; /* 28 */
+  masked = (oligo >> 6) & MASK7; /* 28 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
-  masked = (oligo >> 8) & MASK8; /* 27 */
+  masked = (oligo >> 8) & MASK7; /* 27 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
-  masked = (oligo >> 10) & MASK8; /* 26 */
+  masked = (oligo >> 10) & MASK7; /* 26 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
-
-  masked = (oligo >> 12) & MASK8; /* 25 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
 #else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _masked = _mm_and_si128(_oligo, mask7);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
 #else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _masked = _mm_and_si128(_oligo, mask7);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
+#endif
 
-  masked = EXTRACT(_masked,2);
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = low_rev & MASK7;	/* 25 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
-#endif
-
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK8;	/* 24 */
+  masked = (low_rev >> 2) & MASK7;	/* 24 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
-  masked = (low_rev >> 2) & MASK8;	/* 23 */
+  masked = (low_rev >> 4) & MASK7;	/* 23 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
-  masked = (low_rev >> 4) & MASK8;	/* 22 */
+  masked = (low_rev >> 6) & MASK7;	/* 22 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
-  masked = (low_rev >> 6) & MASK8;	/* 21 */
+  masked = (low_rev >> 8) & MASK7;	/* 21 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
-  masked = (low_rev >> 8) & MASK8;	/* 20 */
+  masked = (low_rev >> 10) & MASK7;	/* 20 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
-  masked = (low_rev >> 10) & MASK8; /* 19 */
+  masked = (low_rev >> 12) & MASK7; /* 19 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
-  masked = (low_rev >> 12) & MASK8; /* 18 */
+  masked = (low_rev >> 14) & MASK7; /* 18 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
-  masked = (low_rev >> 14) & MASK8; /* 17 */
+  masked = (low_rev >> 16) & MASK7; /* 17 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
-  masked = low_rev >> 16;		/* 16, No mask necessary */
+  masked = low_rev >> 18;		/* 16, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 
 #else
   _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
 #else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _masked = _mm_and_si128(_oligo, mask7);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
 #else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _masked = _mm_and_si128(_oligo, mask7);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
 
-  masked = low_rev >> 16;		/* 16, No mask necessary */
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
+  }
+
+  masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 #endif
 
 
-  oligo = low_rev >> 18;		/* For 9..15 */
-  oligo |= high_rev << 14;
+  oligo = low_rev >> 20;	/* For 15..10 */
+  oligo |= high_rev << 12;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK8; /* 15 */
+  masked = oligo & MASK7; /* 15 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
-  masked = (oligo >> 2) & MASK8; /* 14 */
+  masked = (oligo >> 2) & MASK7; /* 14 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
-  masked = (oligo >> 4) & MASK8; /* 13 */
+  masked = (oligo >> 4) & MASK7; /* 13 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
-  masked = (oligo >> 6) & MASK8; /* 12 */
+  masked = (oligo >> 6) & MASK7; /* 12 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
-  masked = (oligo >> 8) & MASK8; /* 11 */
+  masked = (oligo >> 8) & MASK7; /* 11 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
-  masked = (oligo >> 10) & MASK8; /* 10 */
+  masked = (oligo >> 10) & MASK7; /* 10 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
-
-  masked = (oligo >> 12) & MASK8; /* 9 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
 #else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _masked = _mm_and_si128(_oligo, mask7);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
 #else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _masked = _mm_and_si128(_oligo, mask7);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
+#endif
 
-  masked = EXTRACT(_masked,2);
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = high_rev & MASK7;	/* 9 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
-#endif
 
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK8;		/* 8 */
+  masked = (high_rev >> 2) & MASK7; /* 8 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
-  masked = (high_rev >> 2) & MASK8;	/* 7 */
+  masked = (high_rev >> 4) & MASK7;	/* 7 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
-  masked = (high_rev >> 4) & MASK8;	/* 6 */
+  masked = (high_rev >> 6) & MASK7;	/* 6 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
-  masked = (high_rev >> 6) & MASK8;	/* 5 */
+  masked = (high_rev >> 8) & MASK7;	/* 5 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
-  masked = (high_rev >> 8) & MASK8;	/* 4 */
+  masked = (high_rev >> 10) & MASK7;	/* 4 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
-  masked = (high_rev >> 10) & MASK8;	/* 3 */
+  masked = (high_rev >> 12) & MASK7;	/* 3 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
-  masked = (high_rev >> 12) & MASK8;	/* 2 */
+  masked = (high_rev >> 14) & MASK7;	/* 2 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
-  masked = (high_rev >> 14) & MASK8;	/* 1 */
+  masked = (high_rev >> 16) & MASK7;	/* 1 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
-  masked = high_rev >> 16;		/* 0, No mask necessary */
+  masked = high_rev >> 18;		/* 0, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
-
+  
 #else
   _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
 #else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _masked = _mm_and_si128(_oligo, mask7);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
 #else
-  _masked = _mm_and_si128(_oligo, mask8);
+  _masked = _mm_and_si128(_oligo, mask7);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
+  }
+
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+#else
+  _masked = _mm_and_si128(_oligo, mask7);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
-  masked = high_rev >> 16;		/* 0, No mask necessary */
+  masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 #endif
 
@@ -18012,261 +15399,342 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #else	/* HAVE_AVX2 */
 
 static int
-store_8mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+store_7mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
 
 
-  oligo = nexthigh_rev >> 18;	/* For 31..25 */
-  oligo |= low_rev << 14;
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask8);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  oligo = nexthigh_rev >> 20;	/* For 31..26 */
+  oligo |= low_rev << 12;
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
   }
 
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
 
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask8);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
   }
 
 
-  masked = low_rev >> 16;		/* 16, No mask necessary */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
+
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
+  }
+
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
+    }
   }
 
 
+  oligo = low_rev >> 20;	/* For 15..10 */
+  oligo |= high_rev << 12;
 
-  oligo = low_rev >> 18;		/* For 9..15 */
-  oligo |= high_rev << 14;
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask8);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }
   }
 
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
 
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask8);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6); 
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
   }
 
-  masked = high_rev >> 16;		/* 0, No mask necessary */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask7);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
+
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
+  }
+
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
+    }
   }
 
   return chrpos - 32;
@@ -18276,11 +15744,10 @@ store_8mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
 
 
-
 #if !defined(HAVE_AVX2)
 
 static void
-count_7mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+count_6mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -18291,356 +15758,342 @@ count_7mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
 
-  oligo = nexthigh_rev >> 20;	/* For 31..26 */
-  oligo |= low_rev << 12;
+  oligo = nexthigh_rev >> 22;	/* For 31..27 */
+  oligo |= low_rev << 10;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK7; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = oligo & MASK6; /* 31 */
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 2) & MASK7; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 2) & MASK6; /* 30 */
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 4) & MASK7; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 4) & MASK6; /* 29 */
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 6) & MASK7; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 6) & MASK6; /* 28 */
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 8) & MASK7; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 8) & MASK6; /* 27 */
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 10) & MASK7; /* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
-#else
-  _masked = _mm_and_si128(_oligo, mask7);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 8) & MASK6; /* 27 */
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK7;	/* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = low_rev & MASK6;	/* 26 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
+
+  masked = (low_rev >> 2) & MASK6;	/* 25 */
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 2) & MASK7;	/* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 4) & MASK6;	/* 24 */
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 4) & MASK7;	/* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 6) & MASK6;	/* 23 */
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 6) & MASK7;	/* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 8) & MASK6;	/* 22 */
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 8) & MASK7;	/* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 10) & MASK6;	/* 21 */
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 10) & MASK7;	/* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 12) & MASK6;	/* 20 */
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 12) & MASK7; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 14) & MASK6; /* 19 */
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 14) & MASK7; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 16) & MASK6; /* 18 */
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 16) & MASK7; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 18) & MASK6; /* 17 */
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = low_rev >> 18;		/* 16, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = low_rev >> 20;	/* 16, No mask necessary */
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
 #else
   _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
-  oligo = low_rev >> 20;	/* For 15..10 */
-  oligo |= high_rev << 12;
+  oligo = low_rev >> 22;	/* For 15..11 */
+  oligo |= high_rev << 10;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK7; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = oligo & MASK6; /* 15 */
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 2) & MASK7; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 2) & MASK6; /* 14 */
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 4) & MASK7; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 4) & MASK6; /* 13 */
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 6) & MASK7; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 6) & MASK6; /* 12 */
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 8) & MASK7; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 8) & MASK6; /* 11 */
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 10) & MASK7; /* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
-#else
-  _masked = _mm_and_si128(_oligo, mask7);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 8) & MASK6; /* 11 */
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK7;	/* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = high_rev & MASK6;	/* 10 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+
+  masked = (high_rev >> 2) & MASK6; /* 9 */
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 2) & MASK7; /* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 4) & MASK6; /* 8 */
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 4) & MASK7;	/* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 6) & MASK6;	/* 7 */
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 6) & MASK7;	/* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 8) & MASK6;	/* 6 */
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 8) & MASK7;	/* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 10) & MASK6;	/* 5 */
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 10) & MASK7;	/* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 12) & MASK6;	/* 4 */
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 12) & MASK7;	/* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 14) & MASK6;	/* 3 */
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 14) & MASK7;	/* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 16) & MASK6;	/* 2 */
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 16) & MASK7;	/* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 18) & MASK6;	/* 1 */
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
-  masked = high_rev >> 18;		/* 0, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = high_rev >> 20;		/* 0, No mask necessary */
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
 #else
   _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -18650,288 +16103,166 @@ count_7mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #else	/* HAVE_AVX2 */
 
 static void
-count_7mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+count_6mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
 
 
-  oligo = nexthigh_rev >> 20;	/* For 31..26 */
-  oligo |= low_rev << 12;
+  oligo = nexthigh_rev >> 22;	/* For 31..27 */
+  oligo |= low_rev << 10;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low6);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
 
-  oligo = low_rev >> 20;	/* For 15..10 */
-  oligo |= high_rev << 12;
+  oligo = low_rev >> 22;	/* For 15..11 */
+  oligo |= high_rev << 10;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low6);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   return;
@@ -18944,2441 +16275,1252 @@ count_7mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 /* Expecting current to have {high0_rev, low0_rev, high1_rev,
    low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
    high2_rev} */
-#ifdef USE_SIMD_FOR_COUNTS
+#ifdef HAVE_SSE2
 static void
-extract_7mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
+extract_6mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) {
   __m128i oligo;
 
-  _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
-  _mm_store_si128(out++, _mm_and_si128( current, mask7));
+  _mm_store_si128(out++, _mm_srli_epi32(current,20)); /* No mask necessary */;
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
+  _mm_store_si128(out++, _mm_and_si128( current, mask6));
 
-  oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask7));
+  oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask6));
 
   return;
 }
 
-#ifdef HAVE_AVX2
-static void
-extract_7mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
+#ifdef USE_UNORDERED_6
+static Chrpos_T
+store_6mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_6mers_fwd_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,18)); /* No mask necessary */
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask7));
+#else
+/* Includes extract_6mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_6mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m128i _u0, _u1, _u2, _u3;
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,20), _mm256_slli_epi32(current,12));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask7));
+  out = &(array[0]);
 
-  return;
+  oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10));
+  /* _row0 = _mm_and_si128( oligo, mask6); */
+  /* _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask6); */
+  _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55), mask6_epi16);
+
+  /* _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask6); */
+  /* _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask6); */
+  _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask6_epi16);
+
+  /* _row4 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask6); */
+  /* _row5 = _mm_and_si128( current, mask6); */
+  _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,16), _mm_srli_epi32(oligo, 8), 0x55), mask6_epi16);
+
+
+  /* _row6 = _mm_and_si128( _mm_srli_epi32(current,2), mask6); */
+  /* _row7 = _mm_and_si128( _mm_srli_epi32(current,4), mask6); */
+  _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,12), _mm_srli_epi32(current, 2), 0x55), mask6_epi16);
+
+  /* _row8 = _mm_and_si128( _mm_srli_epi32(current,6), mask6); */
+  /* _row9 = _mm_and_si128( _mm_srli_epi32(current,8), mask6); */
+  _t4 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,8), _mm_srli_epi32(current, 6), 0x55), mask6_epi16);
+
+  /* _row10 = _mm_and_si128( _mm_srli_epi32(current,10), mask6); */
+  /* _row11 = _mm_and_si128( _mm_srli_epi32(current,12), mask6); */
+  _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,4), _mm_srli_epi32(current, 10), 0x55), mask6_epi16);
+
+  /* _row12 = _mm_and_si128( _mm_srli_epi32(current,14), mask6); */
+  /* _row13 = _mm_and_si128( _mm_srli_epi32(current,16), mask6); */
+  _t6 = _mm_and_si128(_mm_blend_epi16(current, _mm_srli_epi32(current, 14), 0x55), mask6_epi16);
+
+  /* _row14 = _mm_and_si128( _mm_srli_epi32(current,18), mask6); */
+  /* _row15 = _mm_srli_epi32(current,20); */ /* No mask necessary */;
+  _t7 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,4), _mm_srli_epi32(current, 18), 0x55), mask6_epi16);
+
+
+  /* Split: top half */
+  _u0 = _mm_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm_unpackhi_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  /* Split: bottom half */
+  _u0 = _mm_unpacklo_epi32(_t0,_t1);
+  _u1 = _mm_unpacklo_epi32(_t2,_t3);
+  _u2 = _mm_unpacklo_epi32(_t4,_t5);
+  _u3 = _mm_unpacklo_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
 }
 #endif
+#endif
 
+#ifdef HAVE_AVX2
 static void
-count_7mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
+extract_6mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
+  __m256i oligo;
+
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,20)); /* No mask necessary */;
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask6));
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,22), _mm256_slli_epi32(current,10));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask6));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_6
+static Chrpos_T
+store_6mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_6mers_fwd_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
 #else
-  Genomecomp_T array[4];
+/* Includes extract_6mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_6mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,22), _mm256_slli_epi32(current,10));
+  /* _row0 = _mm256_and_si256( oligo, bigmask6); */
+  /* _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6); */
+  _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask6_epi16);
+
+  /* _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6); */
+  /* _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6); */
+  _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask6_epi16);
+
+
+  /* _row4 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6); */
+  /* _row5 = _mm256_and_si256( current, bigmask6); */
+  _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,16), _mm256_srli_epi32(oligo,8), 0x55), bigmask6_epi16);
+
+  /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6); */
+  /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6); */
+  _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,12), _mm256_srli_epi32(current,2), 0x55), bigmask6_epi16);
+
+  /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6); */
+  /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6); */
+  _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,8), _mm256_srli_epi32(current,6), 0x55), bigmask6_epi16);
+
+  /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6); */
+  /* _row11 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6); */
+  _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,4), _mm256_srli_epi32(current,10), 0x55), bigmask6_epi16);
+
+  /* _row12 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6); */
+  /* _row13 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6); */
+  _t6 = _mm256_and_si256(_mm256_blend_epi16(current, _mm256_srli_epi32(current,14), 0x55), bigmask6_epi16);
+
+  /* _row14 = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6); */
+  /* _row15 = _mm256_srli_epi32(current,20); */ /* No mask necessary */;
+  _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,4), _mm256_srli_epi32(current,18), 0x55), bigmask6_epi16);
+
+
+  _u0 = _mm256_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi32(_t6,_t7);
+
+
+  _t0 = _mm256_unpackhi_epi64(_u0,_u1);
+  _t1 = _mm256_unpackhi_epi64(_u2,_u3);
+  _t2 = _mm256_unpacklo_epi64(_u0,_u1);
+  _t3 = _mm256_unpacklo_epi64(_u2,_u3);
+  _t4 = _mm256_unpackhi_epi64(_u4,_u5);
+  _t5 = _mm256_unpackhi_epi64(_u6,_u7);
+  _t6 = _mm256_unpacklo_epi64(_u4,_u5);
+  _t7 = _mm256_unpacklo_epi64(_u6,_u7);
+
+
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0)));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
 #endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
 #endif
 
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,18); /* No mask necessary */
-#else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,18)); /* No mask necessary */
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("16 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("32 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("48 %04X => %d\n",array[3],counts[array[3]]));
+#ifdef HAVE_AVX512
+static void
+extract_6mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,20)); /* No mask necessary */;
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask6));
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,22), _mm512_slli_epi32(current,10));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask6));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */
-#endif
-  debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  return;
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,16), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("17 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("33 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("49 %04X => %d\n",array[3],counts[array[3]]));
+#ifdef USE_UNORDERED_6
+static Chrpos_T
+store_6mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_6mers_fwd_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */
-#endif
-  debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+/* Includes extract_6mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_6mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,22), _mm512_slli_epi32(current,10));
+  _u0 = _mm512_and_si512( oligo, hugemask6);
+  /* _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask6);
+  _t0 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask6);
+  /* _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask6);
+  _t1 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask6);
+  /* _row5 = _mm512_and_si512( current, hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,16), highmask6);
+  _t2 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask6);
+  /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,12), highmask6);
+  _t3 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask6);
+  /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,8), highmask6);
+  _t4 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask6);
+  /* _row11 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,4), highmask6);
+  _t5 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask6);
+  /* _row13 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask6); */
+  _u1 = _mm512_and_si512( current, highmask6);
+  _t6 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask6);
+  /* _row15 = _mm512_srli_epi32(current,20); */ /* No mask necessary */;
+  _u1 = _mm512_and_si512( _mm512_srli_epi32(current,4), highmask6);
+  _t7 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi32(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,14), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("18 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("34 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("50 %04X => %d\n",array[3],counts[array[3]]));
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
 #endif
-  debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("19 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("35 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("51 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */
-#endif
-  debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("20 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("36 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("52 %04X => %d\n",array[3],counts[array[3]]));
+#if !defined(HAVE_AVX2)
 
+static int
+store_6mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+  UINT4 _masked[4] __attribute__ ((aligned (16)));
+  __m128i _oligo;
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */
+  __m128i _oligo, _masked;
 #endif
-  debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("21 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("37 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("53 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */
-#endif
-  debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  oligo = nexthigh_rev >> 22;	/* For 27..31 */
+  oligo |= low_rev << 10;
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK6; /* 31 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("22 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("38 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("54 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */
-#endif
-  debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = (oligo >> 2) & MASK6; /* 30 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
+  }
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+  masked = (oligo >> 4) & MASK6; /* 29 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("23 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("39 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("55 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */
-#endif
-  debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = (oligo >> 6) & MASK6; /* 28 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
+  }
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+  masked = (oligo >> 8) & MASK6; /* 27 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("24 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("40 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("56 %04X => %d\n",array[3],counts[array[3]]));
 
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */
-#endif
-  debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask7);
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask7));
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("25 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("41 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("57 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */
-#endif
-  debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
+  }
 
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
+  }
 
-  oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("26 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("42 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("58 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */
-#endif
-  debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+  masked = (oligo >> 8) & MASK6; /* 27 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("27 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("43 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("59 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */
 #endif
-  debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("28 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("44 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("60 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */
-#endif
-  debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+#ifdef INDIVIDUAL_SHIFTS
+  masked = low_rev & MASK6;	/* 26 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("29 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("45 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("61 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */
-#endif
-  debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+  masked = (low_rev >> 2) & MASK6;	/* 25 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("30 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("46 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("62 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */
-#endif
-  debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+  masked = (low_rev >> 4) & MASK6;	/* 24 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("31 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("47 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("63 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */
-#endif
-  debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-  return;
-}
-
-#endif
 
-#ifdef HAVE_AVX2
-static void
-count_7mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
-  __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
-  array = _mm256_srli_epi32(current,18); /* No mask necessary */
-  counts[EXTRACT256(array,0)] += 1;	 /* 0 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 16 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 32 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 48 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 64 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 80 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 96 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 112 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;	 /* 1 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 17 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 33 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 49 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 65 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 81 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 97 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 113 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 2 */
-  counts[EXTRACT256(array,1)] += 1; /* 18 */
-  counts[EXTRACT256(array,2)] += 1; /* 34 */
-  counts[EXTRACT256(array,3)] += 1; /* 50 */
-  counts[EXTRACT256(array,4)] += 1; /* 66 */
-  counts[EXTRACT256(array,5)] += 1; /* 82 */
-  counts[EXTRACT256(array,6)] += 1; /* 98 */
-  counts[EXTRACT256(array,7)] += 1; /* 114 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 3 */
-  counts[EXTRACT256(array,1)] += 1; /* 19 */
-  counts[EXTRACT256(array,2)] += 1; /* 35 */
-  counts[EXTRACT256(array,3)] += 1; /* 51 */
-  counts[EXTRACT256(array,4)] += 1; /* 67 */
-  counts[EXTRACT256(array,5)] += 1; /* 83 */
-  counts[EXTRACT256(array,6)] += 1; /* 99 */
-  counts[EXTRACT256(array,7)] += 1; /* 115 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 4 */
-  counts[EXTRACT256(array,1)] += 1; /* 20 */
-  counts[EXTRACT256(array,2)] += 1; /* 36 */
-  counts[EXTRACT256(array,3)] += 1; /* 52 */
-  counts[EXTRACT256(array,4)] += 1; /* 68 */
-  counts[EXTRACT256(array,5)] += 1; /* 84 */
-  counts[EXTRACT256(array,6)] += 1; /* 100 */
-  counts[EXTRACT256(array,7)] += 1; /* 116 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 5 */
-  counts[EXTRACT256(array,1)] += 1; /* 21 */
-  counts[EXTRACT256(array,2)] += 1; /* 37 */
-  counts[EXTRACT256(array,3)] += 1; /* 53 */
-  counts[EXTRACT256(array,4)] += 1; /* 69 */
-  counts[EXTRACT256(array,5)] += 1; /* 85 */
-  counts[EXTRACT256(array,6)] += 1; /* 101 */
-  counts[EXTRACT256(array,7)] += 1; /* 117 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 6 */
-  counts[EXTRACT256(array,1)] += 1; /* 22 */
-  counts[EXTRACT256(array,2)] += 1; /* 38 */
-  counts[EXTRACT256(array,3)] += 1; /* 54 */
-  counts[EXTRACT256(array,4)] += 1; /* 70 */
-  counts[EXTRACT256(array,5)] += 1; /* 86 */
-  counts[EXTRACT256(array,6)] += 1; /* 102 */
-  counts[EXTRACT256(array,7)] += 1; /* 118 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 7 */
-  counts[EXTRACT256(array,1)] += 1; /* 23 */
-  counts[EXTRACT256(array,2)] += 1; /* 39 */
-  counts[EXTRACT256(array,3)] += 1; /* 55 */
-  counts[EXTRACT256(array,4)] += 1; /* 71 */
-  counts[EXTRACT256(array,5)] += 1; /* 87 */
-  counts[EXTRACT256(array,6)] += 1; /* 103 */
-  counts[EXTRACT256(array,7)] += 1; /* 119 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 8 */
-  counts[EXTRACT256(array,1)] += 1; /* 24 */
-  counts[EXTRACT256(array,2)] += 1; /* 40 */
-  counts[EXTRACT256(array,3)] += 1; /* 56 */
-  counts[EXTRACT256(array,4)] += 1; /* 72 */
-  counts[EXTRACT256(array,5)] += 1; /* 88 */
-  counts[EXTRACT256(array,6)] += 1; /* 104 */
-  counts[EXTRACT256(array,7)] += 1; /* 120 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 9 */
-  counts[EXTRACT256(array,1)] += 1; /* 25 */
-  counts[EXTRACT256(array,2)] += 1; /* 41 */
-  counts[EXTRACT256(array,3)] += 1; /* 57 */
-  counts[EXTRACT256(array,4)] += 1; /* 73 */
-  counts[EXTRACT256(array,5)] += 1; /* 89 */
-  counts[EXTRACT256(array,6)] += 1; /* 105 */
-  counts[EXTRACT256(array,7)] += 1; /* 121 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
+  masked = (low_rev >> 6) & MASK6;	/* 23 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,20), _mm256_slli_epi32(current,12));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 10 */
-  counts[EXTRACT256(array,1)] += 1; /* 26 */
-  counts[EXTRACT256(array,2)] += 1; /* 42 */
-  counts[EXTRACT256(array,3)] += 1; /* 58 */
-  counts[EXTRACT256(array,4)] += 1; /* 74 */
-  counts[EXTRACT256(array,5)] += 1; /* 90 */
-  counts[EXTRACT256(array,6)] += 1; /* 106 */
-  counts[EXTRACT256(array,7)] += 1; /* 122 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 11 */
-  counts[EXTRACT256(array,1)] += 1; /* 27 */
-  counts[EXTRACT256(array,2)] += 1; /* 43 */
-  counts[EXTRACT256(array,3)] += 1; /* 59 */
-  counts[EXTRACT256(array,4)] += 1; /* 75 */
-  counts[EXTRACT256(array,5)] += 1; /* 91 */
-  counts[EXTRACT256(array,6)] += 1; /* 107 */
-  counts[EXTRACT256(array,7)] += 1; /* 123 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 12 */
-  counts[EXTRACT256(array,1)] += 1; /* 28 */
-  counts[EXTRACT256(array,2)] += 1; /* 44 */
-  counts[EXTRACT256(array,3)] += 1; /* 60 */
-  counts[EXTRACT256(array,4)] += 1; /* 76 */
-  counts[EXTRACT256(array,5)] += 1; /* 92 */
-  counts[EXTRACT256(array,6)] += 1; /* 108 */
-  counts[EXTRACT256(array,7)] += 1; /* 124 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 13 */
-  counts[EXTRACT256(array,1)] += 1; /* 29 */
-  counts[EXTRACT256(array,2)] += 1; /* 45 */
-  counts[EXTRACT256(array,3)] += 1; /* 61 */
-  counts[EXTRACT256(array,4)] += 1; /* 77 */
-  counts[EXTRACT256(array,5)] += 1; /* 93 */
-  counts[EXTRACT256(array,6)] += 1; /* 109 */
-  counts[EXTRACT256(array,7)] += 1; /* 125 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 14 */
-  counts[EXTRACT256(array,1)] += 1; /* 30 */
-  counts[EXTRACT256(array,2)] += 1; /* 46 */
-  counts[EXTRACT256(array,3)] += 1; /* 62 */
-  counts[EXTRACT256(array,4)] += 1; /* 78 */
-  counts[EXTRACT256(array,5)] += 1; /* 94 */
-  counts[EXTRACT256(array,6)] += 1; /* 110 */
-  counts[EXTRACT256(array,7)] += 1; /* 126 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask7);
-  counts[EXTRACT256(array,0)] += 1; /* 15 */
-  counts[EXTRACT256(array,1)] += 1; /* 31 */
-  counts[EXTRACT256(array,2)] += 1; /* 47 */
-  counts[EXTRACT256(array,3)] += 1; /* 63 */
-  counts[EXTRACT256(array,4)] += 1; /* 79 */
-  counts[EXTRACT256(array,5)] += 1; /* 95 */
-  counts[EXTRACT256(array,6)] += 1; /* 111 */
-  counts[EXTRACT256(array,7)] += 1; /* 127 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
+  masked = (low_rev >> 8) & MASK6;	/* 22 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  return;
-}
-#endif
-
-
-#if !defined(HAVE_AVX2)
-
-static int
-store_7mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-#ifdef INDIVIDUAL_SHIFTS
-#elif defined(SIMD_MASK_THEN_STORE)
-  UINT4 _masked[4] __attribute__ ((aligned (16)));
-  __m128i _oligo;
-#else
-  __m128i _oligo, _masked;
-#endif
-
-  
-  oligo = nexthigh_rev >> 20;	/* For 31..26 */
-  oligo |= low_rev << 12;
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK7; /* 31 */
+  masked = (low_rev >> 10) & MASK6;	/* 21 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
-  masked = (oligo >> 2) & MASK7; /* 30 */
+  masked = (low_rev >> 12) & MASK6;	/* 20 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
-  masked = (oligo >> 4) & MASK7; /* 29 */
+  masked = (low_rev >> 14) & MASK6; /* 19 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
-  masked = (oligo >> 6) & MASK7; /* 28 */
+  masked = (low_rev >> 16) & MASK6; /* 18 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
-  masked = (oligo >> 8) & MASK7; /* 27 */
+  masked = (low_rev >> 18) & MASK6; /* 17 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
-  masked = (oligo >> 10) & MASK7; /* 26 */
+  masked = low_rev >> 20;	/* 16, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
-
+  
 #else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
-#endif
-
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK7;	/* 25 */
+  masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
-  masked = (low_rev >> 2) & MASK7;	/* 24 */
+  masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
-  masked = (low_rev >> 4) & MASK7;	/* 23 */
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+#else
+  _masked = _mm_and_si128(_oligo, mask6);
+#endif
+
+  masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
-  masked = (low_rev >> 6) & MASK7;	/* 22 */
+  masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
-  masked = (low_rev >> 8) & MASK7;	/* 21 */
+  masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
+#endif
 
-  masked = (low_rev >> 10) & MASK7;	/* 20 */
+
+  oligo = low_rev >> 22;	/* For 15..11 */
+  oligo |= high_rev << 10;
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK6; /* 15 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
-  masked = (low_rev >> 12) & MASK7; /* 19 */
+  masked = (oligo >> 2) & MASK6; /* 14 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
-  masked = (low_rev >> 14) & MASK7; /* 18 */
+  masked = (oligo >> 4) & MASK6; /* 13 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
-  masked = (low_rev >> 16) & MASK7; /* 17 */
+  masked = (oligo >> 6) & MASK6; /* 12 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
-  masked = low_rev >> 18;		/* 16, No mask necessary */
+  masked = (oligo >> 8) & MASK6; /* 11 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
 #else
-  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
-#else
-  _masked = _mm_and_si128(_oligo, mask7);
+  masked = (oligo >> 8) & MASK6; /* 11 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
+  }
 #endif
 
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
-#else
-  _masked = _mm_and_si128(_oligo, mask7);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
-#endif
-
-
-  oligo = low_rev >> 20;	/* For 15..10 */
-  oligo |= high_rev << 12;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK7; /* 15 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
-
-  masked = (oligo >> 2) & MASK7; /* 14 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
-
-  masked = (oligo >> 4) & MASK7; /* 13 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
-
-  masked = (oligo >> 6) & MASK7; /* 12 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
-
-  masked = (oligo >> 8) & MASK7; /* 11 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
-
-  masked = (oligo >> 10) & MASK7; /* 10 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
-#else
-  _masked = _mm_and_si128(_oligo, mask7);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
-#else
-  _masked = _mm_and_si128(_oligo, mask7);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
-
-  masked = EXTRACT(_masked,1);
+  masked = high_rev & MASK6;	/* 10 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
-#endif
 
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK7;	/* 9 */
+  masked = (high_rev >> 2) & MASK6; /* 9 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
-  masked = (high_rev >> 2) & MASK7; /* 8 */
+  masked = (high_rev >> 4) & MASK6; /* 8 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
-  masked = (high_rev >> 4) & MASK7;	/* 7 */
+  masked = (high_rev >> 6) & MASK6;	/* 7 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
-  masked = (high_rev >> 6) & MASK7;	/* 6 */
+  masked = (high_rev >> 8) & MASK6;	/* 6 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
-  masked = (high_rev >> 8) & MASK7;	/* 5 */
+  masked = (high_rev >> 10) & MASK6;	/* 5 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
-  masked = (high_rev >> 10) & MASK7;	/* 4 */
+  masked = (high_rev >> 12) & MASK6;	/* 4 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
-  masked = (high_rev >> 12) & MASK7;	/* 3 */
+  masked = (high_rev >> 14) & MASK6;	/* 3 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
-  masked = (high_rev >> 14) & MASK7;	/* 2 */
+  masked = (high_rev >> 16) & MASK6;	/* 2 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
-  masked = (high_rev >> 16) & MASK7;	/* 1 */
+  masked = (high_rev >> 18) & MASK6;	/* 1 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
-  masked = high_rev >> 18;		/* 0, No mask necessary */
+  masked = high_rev >> 20;		/* 0, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
   
 #else
   _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
 #else
-  _masked = _mm_and_si128(_oligo, mask7);
+  _masked = _mm_and_si128(_oligo, mask6);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
+  }
+
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 #endif
 
+
   return chrpos - 32;
 }
 
-#else	/* HAVE_AVX2 */
+#else  /* HAVE_AVX2 */
 
 static int
-store_7mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+store_6mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
 
 
-  oligo = nexthigh_rev >> 20;	/* For 31..26 */
-  oligo |= low_rev << 12;
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  oligo = nexthigh_rev >> 22;	/* For 27..31 */
+  oligo |= low_rev << 10;
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
   }
 
+
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }
   }
 
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
 
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }
   }
 
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
   }
 
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
+    }
+  }
 
-  oligo = low_rev >> 20;	/* For 15..10 */
-  oligo |= high_rev << 12;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  oligo = low_rev >> 22;	/* For 15..11 */
+  oligo |= high_rev << 10;
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
   }
 
+
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }
   }
 
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
+    }
   }
 
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+  _masked = _mm256_and_si256(_oligo, bigmask6);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
+  }
+
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
+    }
   }
 
   return chrpos - 32;
 }
 
-#endif  /* HAVE_AVX2 */
-
+#endif	/* HAVE_AVX2 */
 
 
 #if !defined(HAVE_AVX2)
 
 static void
-count_6mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+count_5mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -21389,342 +17531,340 @@ count_6mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #endif
 
 
-  oligo = nexthigh_rev >> 22;	/* For 31..27 */
-  oligo |= low_rev << 10;
+  oligo = nexthigh_rev >> 24;	/* For 31..28 */
+  oligo |= low_rev << 8;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK6; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = oligo & MASK5; /* 31 */
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 2) & MASK6; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 2) & MASK5; /* 30 */
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 4) & MASK6; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 4) & MASK5; /* 29 */
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 6) & MASK6; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 6) & MASK5; /* 28 */
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 8) & MASK6; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-
-  masked = (oligo >> 8) & MASK6; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK6;	/* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = low_rev & MASK5;	/* 27 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
+
+  masked = (low_rev >> 2) & MASK5;	/* 26 */
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 2) & MASK6;	/* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 4) & MASK5;	/* 25 */
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 4) & MASK6;	/* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 6) & MASK5;	/* 24 */
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 6) & MASK6;	/* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 8) & MASK5;	/* 23 */
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 8) & MASK6;	/* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 10) & MASK5;	/* 22 */
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 10) & MASK6;	/* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 12) & MASK5;	/* 21 */
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 12) & MASK6;	/* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 14) & MASK5;	/* 20 */
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 14) & MASK6; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 16) & MASK5; /* 19 */
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 16) & MASK6; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 18) & MASK5; /* 18 */
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rev >> 18) & MASK6; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (low_rev >> 20) & MASK5; /* 17 */
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = low_rev >> 20;	/* 16, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = low_rev >> 22;		/* 16, No mask necessary */
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
 #else
   _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
-  oligo = low_rev >> 22;	/* For 15..11 */
-  oligo |= high_rev << 10;
+  oligo = low_rev >> 24;	/* For 15..12 */
+  oligo |= high_rev << 8;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK6; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = oligo & MASK5; /* 15 */
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 2) & MASK6; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 2) & MASK5; /* 14 */
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 4) & MASK6; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 4) & MASK5; /* 13 */
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 6) & MASK6; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (oligo >> 6) & MASK5; /* 12 */
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-  masked = (oligo >> 8) & MASK6; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
-
-  masked = (oligo >> 8) & MASK6; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK6;	/* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = high_rev & MASK5;	/* 11 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
+
+  masked = (high_rev >> 2) & MASK5; /* 10 */
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 2) & MASK6; /* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 4) & MASK5; /* 9 */
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 4) & MASK6; /* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 6) & MASK5; /* 8 */
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 6) & MASK6;	/* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 8) & MASK5;	/* 7 */
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 8) & MASK6;	/* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 10) & MASK5;	/* 6 */
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 10) & MASK6;	/* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 12) & MASK5;	/* 5 */
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 12) & MASK6;	/* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 14) & MASK5;	/* 4 */
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 14) & MASK6;	/* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 16) & MASK5;	/* 3 */
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 16) & MASK6;	/* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 18) & MASK5;	/* 2 */
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
-  masked = (high_rev >> 18) & MASK6;	/* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = (high_rev >> 20) & MASK5;	/* 1 */
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
-  masked = high_rev >> 20;		/* 0, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  masked = high_rev >> 22;		/* 0, No mask necessary */
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
-
+  
 #else
   _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
 
   _oligo = _mm_srli_epi32(_oligo, 8);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -21734,280 +17874,166 @@ count_6mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 #else	/* HAVE_AVX2 */
 
 static void
-count_6mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+count_5mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
 
 
-  oligo = nexthigh_rev >> 22;	/* For 31..27 */
-  oligo |= low_rev << 10;
+  oligo = nexthigh_rev >> 24;	/* For 31..28 */
+  oligo |= low_rev << 8;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low5);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
   masked = EXTRACT256(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
-  oligo = low_rev >> 22;	/* For 15..11 */
-  oligo |= high_rev << 10;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
+  oligo = low_rev >> 24;	/* For 15..12 */
+  oligo |= high_rev << 8;
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low5);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
   masked = EXTRACT256(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   return;
@@ -22017,7017 +18043,1259 @@ count_6mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Gen
 
 
 
-/* Expecting current to have {high0_rev, low0_rev, high1_rev,
-   low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and
-   high2_rev} */
-#ifdef USE_SIMD_FOR_COUNTS
+#ifdef HAVE_SSE2
 static void
-extract_6mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
+extract_5mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) {
   __m128i oligo;
 
-  _mm_store_si128(out++, _mm_srli_epi32(current,20)); /* No mask necessary */;
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
-  _mm_store_si128(out++, _mm_and_si128( current, mask6));
+  _mm_store_si128(out++, _mm_srli_epi32(current,22)); /* No mask necessary */
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
+  _mm_store_si128(out++, _mm_and_si128( current, mask5));
 
-  oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask6));
+  oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask5));
 
   return;
 }
 
+#ifdef USE_UNORDERED_5
+static Chrpos_T
+store_5mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_5mers_fwd_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_5mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_5mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m128i _u0, _u1, _u2, _u3;
+
+  out = &(array[0]);
+
+  oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8));
+  /* _row0 = _mm_and_si128( oligo, mask5); */
+  /* _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask5); */
+  _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55), mask5_epi16);
+
+  /* _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask5); */
+  /* _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask5); */
+  _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask5_epi16);
+
+
+  /* _row4 = _mm_and_si128( current, mask5); */
+  /* _row5 = _mm_and_si128( _mm_srli_epi32(current,2), mask5); */
+  _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask5_epi16);
+
+  /* _row6 = _mm_and_si128( _mm_srli_epi32(current,4), mask5); */
+  /* _row7 = _mm_and_si128( _mm_srli_epi32(current,6), mask5); */
+  _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask5_epi16);
+
+  /* _row8 = _mm_and_si128( _mm_srli_epi32(current,8), mask5); */
+  /* _row9 = _mm_and_si128( _mm_srli_epi32(current,10), mask5); */
+  _t4 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask5_epi16);
+
+  /* _row10 = _mm_and_si128( _mm_srli_epi32(current,12), mask5); */
+  /* _row11 = _mm_and_si128( _mm_srli_epi32(current,14), mask5); */
+  _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask5_epi16);
+
+  /* _row12 = _mm_and_si128( _mm_srli_epi32(current,16), mask5); */
+  /* _row13 = _mm_and_si128( _mm_srli_epi32(current,18), mask5); */
+  _t6 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask5_epi16);
+
+  /* _row14 = _mm_and_si128( _mm_srli_epi32(current,20), mask5); */
+  /* _row15 = _mm_srli_epi32(current,22); */ /* No mask necessary */
+  _t7 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,6), _mm_srli_epi32(current, 20), 0x55), mask5_epi16);
+
+
+  /* Split: top half */
+  _u0 = _mm_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm_unpackhi_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  /* Split: bottom half */
+  _u0 = _mm_unpacklo_epi32(_t0,_t1);
+  _u1 = _mm_unpacklo_epi32(_t2,_t3);
+  _u2 = _mm_unpacklo_epi32(_t4,_t5);
+  _u3 = _mm_unpacklo_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
 #ifdef HAVE_AVX2
 static void
-extract_6mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
+extract_5mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
   __m256i oligo;
 
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,20)); /* No mask necessary */;
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask6));
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,22)); /* No mask necessary */
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask5));
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,22), _mm256_slli_epi32(current,10));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask6));
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,24), _mm256_slli_epi32(current,8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask5));
 
   return;
 }
-#endif
 
+#ifdef USE_UNORDERED_5
+static Chrpos_T
+store_5mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_5mers_fwd_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-static void
-count_6mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
 #else
-  Genomecomp_T array[4];
+/* Includes extract_5mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_5mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(next,24), _mm256_slli_epi32(current,8));
+  /* _row0 = _mm256_and_si256( oligo, bigmask5); */
+  /* _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5); */
+  _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask5_epi16);
+
+  /* _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5); */
+  /* _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5); */
+  _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask5_epi16);
+
+
+  /* _row4 = _mm256_and_si256( current, bigmask5); */
+  /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5); */
+  _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask5_epi16);
+
+  /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5); */
+  /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5); */
+  _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask5_epi16);
+
+  /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5); */
+  /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5); */
+  _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask5_epi16);
+
+  /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5); */
+  /* _row11 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5); */
+  _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask5_epi16);
+
+  /* _row12 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5); */
+  /* _row13 = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5); */
+  _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask5_epi16);
+
+  /* _row14 = _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5); */
+  /* _row15 = _mm256_srli_epi32(current,22); */ /* No mask necessary */
+  _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,6), _mm256_srli_epi32(current,20), 0x55), bigmask5_epi16);
+
+
+  _u0 = _mm256_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi32(_t6,_t7);
+
+
+  _t0 = _mm256_unpackhi_epi64(_u0,_u1);
+  _t1 = _mm256_unpackhi_epi64(_u2,_u3);
+  _t2 = _mm256_unpacklo_epi64(_u0,_u1);
+  _t3 = _mm256_unpacklo_epi64(_u2,_u3);
+  _t4 = _mm256_unpackhi_epi64(_u4,_u5);
+  _t5 = _mm256_unpackhi_epi64(_u6,_u7);
+  _t6 = _mm256_unpacklo_epi64(_u4,_u5);
+  _t7 = _mm256_unpacklo_epi64(_u6,_u7);
+
+
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0)));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
 #endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
 #endif
 
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,20); /* No mask necessary */;
-#else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,20)); /* No mask necessary */;
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("16 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("32 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("48 %04X => %d\n",array[3],counts[array[3]]));
+#ifdef HAVE_AVX512
+static void
+extract_5mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,22)); /* No mask necessary */
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,20), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask5));
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,24), _mm512_slli_epi32(current,8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask5));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */
-#endif
-  debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  return;
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,18), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("17 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("33 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("49 %04X => %d\n",array[3],counts[array[3]]));
+#ifdef USE_UNORDERED_5
+static Chrpos_T
+store_5mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_5mers_fwd_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */
-#endif
-  debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+/* Includes extract_5mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_5mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(next,24), _mm512_slli_epi32(current,8));
+  _u0 = _mm512_and_si512( oligo, hugemask5);
+  /* _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask5);
+  _t0 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask5);
+  /* _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask5);
+  _t1 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_and_si512( current, hugemask5);
+  /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask5);
+  _t2 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask5);
+  /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask5);
+  _t3 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask5);
+  /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask5);
+  _t4 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask5);
+  /* _row11 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask5);
+  _t5 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask5);
+  /* _row13 = _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask5);
+  _t6 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,20), hugemask5);
+  /* _row15 = _mm512_srli_epi32(current,22); */ /* No mask necessary */
+  _u1 = _mm512_and_si512( _mm512_srli_epi32(current,6), highmask5);
+  _t7 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi32(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,16), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("18 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("34 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("50 %04X => %d\n",array[3],counts[array[3]]));
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
 #endif
-  debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,14), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("19 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("35 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("51 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */
-#endif
-  debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask6);
+#if !defined(HAVE_AVX2)
+
+static int
+store_5mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+  UINT4 _masked[4] __attribute__ ((aligned (16)));
+  __m128i _oligo;
 #else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
+  __m128i _oligo, _masked;
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+
+
+  oligo = nexthigh_rev >> 24;	/* For 31..28 */
+  oligo |= low_rev << 8;
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK5; /* 31 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("20 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("36 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("52 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */
-#endif
-  debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = (oligo >> 2) & MASK5; /* 30 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
+  }
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
+  masked = (oligo >> 4) & MASK5; /* 29 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("21 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("37 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("53 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */
-#endif
-  debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("22 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("38 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("54 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */
-#endif
-  debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("23 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("39 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("55 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */
-#endif
-  debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("24 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("40 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("56 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */
-#endif
-  debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("25 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("41 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("57 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */
-#endif
-  debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("26 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("42 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("58 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */
-#endif
-  debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-
-  oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("27 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("43 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("59 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */
-#endif
-  debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("28 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("44 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("60 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */
-#endif
-  debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("29 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("45 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("61 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */
-#endif
-  debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("30 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("46 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("62 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */
-#endif
-  debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("31 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("47 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("63 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */
-#endif
-  debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-  return;
-}
-
-#endif
-
-
-#ifdef HAVE_AVX2
-static void
-count_6mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
-  __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
-  array = _mm256_srli_epi32(current,20); /* No mask necessary */;
-  counts[EXTRACT256(array,0)] += 1;	 /* 0 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 16 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 32 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 48 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 64 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 80 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 96 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 112 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;	 /* 1 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 17 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 33 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 49 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 65 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 81 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 97 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 113 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 2 */
-  counts[EXTRACT256(array,1)] += 1; /* 18 */
-  counts[EXTRACT256(array,2)] += 1; /* 34 */
-  counts[EXTRACT256(array,3)] += 1; /* 50 */
-  counts[EXTRACT256(array,4)] += 1; /* 66 */
-  counts[EXTRACT256(array,5)] += 1; /* 82 */
-  counts[EXTRACT256(array,6)] += 1; /* 98 */
-  counts[EXTRACT256(array,7)] += 1; /* 114 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 3 */
-  counts[EXTRACT256(array,1)] += 1; /* 19 */
-  counts[EXTRACT256(array,2)] += 1; /* 35 */
-  counts[EXTRACT256(array,3)] += 1; /* 51 */
-  counts[EXTRACT256(array,4)] += 1; /* 67 */
-  counts[EXTRACT256(array,5)] += 1; /* 83 */
-  counts[EXTRACT256(array,6)] += 1; /* 99 */
-  counts[EXTRACT256(array,7)] += 1; /* 115 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 4 */
-  counts[EXTRACT256(array,1)] += 1; /* 20 */
-  counts[EXTRACT256(array,2)] += 1; /* 36 */
-  counts[EXTRACT256(array,3)] += 1; /* 52 */
-  counts[EXTRACT256(array,4)] += 1; /* 68 */
-  counts[EXTRACT256(array,5)] += 1; /* 84 */
-  counts[EXTRACT256(array,6)] += 1; /* 100 */
-  counts[EXTRACT256(array,7)] += 1; /* 116 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 5 */
-  counts[EXTRACT256(array,1)] += 1; /* 21 */
-  counts[EXTRACT256(array,2)] += 1; /* 37 */
-  counts[EXTRACT256(array,3)] += 1; /* 53 */
-  counts[EXTRACT256(array,4)] += 1; /* 69 */
-  counts[EXTRACT256(array,5)] += 1; /* 85 */
-  counts[EXTRACT256(array,6)] += 1; /* 101 */
-  counts[EXTRACT256(array,7)] += 1; /* 117 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 6 */
-  counts[EXTRACT256(array,1)] += 1; /* 22 */
-  counts[EXTRACT256(array,2)] += 1; /* 38 */
-  counts[EXTRACT256(array,3)] += 1; /* 54 */
-  counts[EXTRACT256(array,4)] += 1; /* 70 */
-  counts[EXTRACT256(array,5)] += 1; /* 86 */
-  counts[EXTRACT256(array,6)] += 1; /* 102 */
-  counts[EXTRACT256(array,7)] += 1; /* 118 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 7 */
-  counts[EXTRACT256(array,1)] += 1; /* 23 */
-  counts[EXTRACT256(array,2)] += 1; /* 39 */
-  counts[EXTRACT256(array,3)] += 1; /* 55 */
-  counts[EXTRACT256(array,4)] += 1; /* 71 */
-  counts[EXTRACT256(array,5)] += 1; /* 87 */
-  counts[EXTRACT256(array,6)] += 1; /* 103 */
-  counts[EXTRACT256(array,7)] += 1; /* 119 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 8 */
-  counts[EXTRACT256(array,1)] += 1; /* 24 */
-  counts[EXTRACT256(array,2)] += 1; /* 40 */
-  counts[EXTRACT256(array,3)] += 1; /* 56 */
-  counts[EXTRACT256(array,4)] += 1; /* 72 */
-  counts[EXTRACT256(array,5)] += 1; /* 88 */
-  counts[EXTRACT256(array,6)] += 1; /* 104 */
-  counts[EXTRACT256(array,7)] += 1; /* 120 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 9 */
-  counts[EXTRACT256(array,1)] += 1; /* 25 */
-  counts[EXTRACT256(array,2)] += 1; /* 41 */
-  counts[EXTRACT256(array,3)] += 1; /* 57 */
-  counts[EXTRACT256(array,4)] += 1; /* 73 */
-  counts[EXTRACT256(array,5)] += 1; /* 89 */
-  counts[EXTRACT256(array,6)] += 1; /* 105 */
-  counts[EXTRACT256(array,7)] += 1; /* 121 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 10 */
-  counts[EXTRACT256(array,1)] += 1; /* 26 */
-  counts[EXTRACT256(array,2)] += 1; /* 42 */
-  counts[EXTRACT256(array,3)] += 1; /* 58 */
-  counts[EXTRACT256(array,4)] += 1; /* 74 */
-  counts[EXTRACT256(array,5)] += 1; /* 90 */
-  counts[EXTRACT256(array,6)] += 1; /* 106 */
-  counts[EXTRACT256(array,7)] += 1; /* 122 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,22), _mm256_slli_epi32(current,10));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 11 */
-  counts[EXTRACT256(array,1)] += 1; /* 27 */
-  counts[EXTRACT256(array,2)] += 1; /* 43 */
-  counts[EXTRACT256(array,3)] += 1; /* 59 */
-  counts[EXTRACT256(array,4)] += 1; /* 75 */
-  counts[EXTRACT256(array,5)] += 1; /* 91 */
-  counts[EXTRACT256(array,6)] += 1; /* 107 */
-  counts[EXTRACT256(array,7)] += 1; /* 123 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 12 */
-  counts[EXTRACT256(array,1)] += 1; /* 28 */
-  counts[EXTRACT256(array,2)] += 1; /* 44 */
-  counts[EXTRACT256(array,3)] += 1; /* 60 */
-  counts[EXTRACT256(array,4)] += 1; /* 76 */
-  counts[EXTRACT256(array,5)] += 1; /* 92 */
-  counts[EXTRACT256(array,6)] += 1; /* 108 */
-  counts[EXTRACT256(array,7)] += 1; /* 124 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 13 */
-  counts[EXTRACT256(array,1)] += 1; /* 29 */
-  counts[EXTRACT256(array,2)] += 1; /* 45 */
-  counts[EXTRACT256(array,3)] += 1; /* 61 */
-  counts[EXTRACT256(array,4)] += 1; /* 77 */
-  counts[EXTRACT256(array,5)] += 1; /* 93 */
-  counts[EXTRACT256(array,6)] += 1; /* 109 */
-  counts[EXTRACT256(array,7)] += 1; /* 125 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 14 */
-  counts[EXTRACT256(array,1)] += 1; /* 30 */
-  counts[EXTRACT256(array,2)] += 1; /* 46 */
-  counts[EXTRACT256(array,3)] += 1; /* 62 */
-  counts[EXTRACT256(array,4)] += 1; /* 78 */
-  counts[EXTRACT256(array,5)] += 1; /* 94 */
-  counts[EXTRACT256(array,6)] += 1; /* 110 */
-  counts[EXTRACT256(array,7)] += 1; /* 126 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask6);
-  counts[EXTRACT256(array,0)] += 1; /* 15 */
-  counts[EXTRACT256(array,1)] += 1; /* 31 */
-  counts[EXTRACT256(array,2)] += 1; /* 47 */
-  counts[EXTRACT256(array,3)] += 1; /* 63 */
-  counts[EXTRACT256(array,4)] += 1; /* 79 */
-  counts[EXTRACT256(array,5)] += 1; /* 95 */
-  counts[EXTRACT256(array,6)] += 1; /* 111 */
-  counts[EXTRACT256(array,7)] += 1; /* 127 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  return;
-}
-#endif
-
-
-#if !defined(HAVE_AVX2)
-
-static int
-store_6mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-#ifdef INDIVIDUAL_SHIFTS
-#elif defined(SIMD_MASK_THEN_STORE)
-  UINT4 _masked[4] __attribute__ ((aligned (16)));
-  __m128i _oligo;
-#else
-  __m128i _oligo, _masked;
-#endif
-
-
-  oligo = nexthigh_rev >> 22;	/* For 27..31 */
-  oligo |= low_rev << 10;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK6; /* 31 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
-
-  masked = (oligo >> 2) & MASK6; /* 30 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
-
-  masked = (oligo >> 4) & MASK6; /* 29 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
-
-  masked = (oligo >> 6) & MASK6; /* 28 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
-
-  masked = (oligo >> 8) & MASK6; /* 27 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
+  masked = (oligo >> 6) & MASK5; /* 28 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
+  }
 
 #else
   _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask6);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
+#endif
 
 
-  masked = (oligo >> 8) & MASK6; /* 27 */
+#ifdef INDIVIDUAL_SHIFTS
+  masked = low_rev & MASK5;	/* 27 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
-#endif
 
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK6;	/* 26 */
+  masked = (low_rev >> 2) & MASK5;	/* 26 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
-  masked = (low_rev >> 2) & MASK6;	/* 25 */
+  masked = (low_rev >> 4) & MASK5;	/* 25 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
-  masked = (low_rev >> 4) & MASK6;	/* 24 */
+  masked = (low_rev >> 6) & MASK5;	/* 24 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
-  masked = (low_rev >> 6) & MASK6;	/* 23 */
+  masked = (low_rev >> 8) & MASK5;	/* 23 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
-  masked = (low_rev >> 8) & MASK6;	/* 22 */
+  masked = (low_rev >> 10) & MASK5;	/* 22 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
-  masked = (low_rev >> 10) & MASK6;	/* 21 */
+  masked = (low_rev >> 12) & MASK5;	/* 21 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
-  masked = (low_rev >> 12) & MASK6;	/* 20 */
+  masked = (low_rev >> 14) & MASK5;	/* 20 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
-  masked = (low_rev >> 14) & MASK6; /* 19 */
+  masked = (low_rev >> 16) & MASK5; /* 19 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
-  masked = (low_rev >> 16) & MASK6; /* 18 */
+  masked = (low_rev >> 18) & MASK5; /* 18 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
-  masked = (low_rev >> 18) & MASK6; /* 17 */
+  masked = (low_rev >> 20) & MASK5; /* 17 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
-  masked = low_rev >> 20;	/* 16, No mask necessary */
+  masked = low_rev >> 22;		/* 16, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
   
 #else
   _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
 #ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
-#else
-  _masked = _mm_and_si128(_oligo, mask6);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
-#else
-  _masked = _mm_and_si128(_oligo, mask6);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
-#else
-  _masked = _mm_and_si128(_oligo, mask6);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
-#endif
-
-
-  oligo = low_rev >> 22;	/* For 15..11 */
-  oligo |= high_rev << 10;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK6; /* 15 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
-
-  masked = (oligo >> 2) & MASK6; /* 14 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
-
-  masked = (oligo >> 4) & MASK6; /* 13 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
-
-  masked = (oligo >> 6) & MASK6; /* 12 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
-
-  masked = (oligo >> 8) & MASK6; /* 11 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
-#else
-  _masked = _mm_and_si128(_oligo, mask6);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
-
-
-  masked = (oligo >> 8) & MASK6; /* 11 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
-#endif
-
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK6;	/* 10 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
-
-  masked = (high_rev >> 2) & MASK6; /* 9 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
-
-  masked = (high_rev >> 4) & MASK6; /* 8 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
-
-  masked = (high_rev >> 6) & MASK6;	/* 7 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
-
-  masked = (high_rev >> 8) & MASK6;	/* 6 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
-
-  masked = (high_rev >> 10) & MASK6;	/* 5 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
-  }
-
-  masked = (high_rev >> 12) & MASK6;	/* 4 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
-  }
-
-  masked = (high_rev >> 14) & MASK6;	/* 3 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
-
-  masked = (high_rev >> 16) & MASK6;	/* 2 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
-
-  masked = (high_rev >> 18) & MASK6;	/* 1 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
-
-  masked = high_rev >> 20;		/* 0, No mask necessary */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-  
-#else
-  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
-#else
-  _masked = _mm_and_si128(_oligo, mask6);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
-#else
-  _masked = _mm_and_si128(_oligo, mask6);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6));
-#else
-  _masked = _mm_and_si128(_oligo, mask6);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-#endif
-
-
-  return chrpos - 32;
-}
-
-#else  /* HAVE_AVX2 */
-
-static int
-store_6mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-  __m256i _oligo, _masked, _counts;
-
-
-  oligo = nexthigh_rev >> 22;	/* For 27..31 */
-  oligo |= low_rev << 10;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
-
-
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
-
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
-
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
-
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
-
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
-
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
-
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
-
-
-  oligo = low_rev >> 22;	/* For 15..11 */
-  oligo |= high_rev << 10;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
-
-
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
-
-
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
-
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
-
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
-  }
-
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
-  }
-
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
-
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-
-  return chrpos - 32;
-}
-
-#endif	/* HAVE_AVX2 */
-
-
-#if !defined(HAVE_AVX2)
-
-static void
-count_5mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-#ifdef INDIVIDUAL_SHIFTS
-#elif defined(SIMD_MASK_THEN_STORE)
-  UINT4 _masked[4] __attribute__ ((aligned (16)));
-  __m128i _oligo;
-#else
-  __m128i _oligo, _masked;
-#endif
-
-
-  oligo = nexthigh_rev >> 24;	/* For 31..28 */
-  oligo |= low_rev << 8;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK5; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 2) & MASK5; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 4) & MASK5; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 6) & MASK5; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-#endif
-
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK5;	/* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 2) & MASK5;	/* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 4) & MASK5;	/* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 6) & MASK5;	/* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 8) & MASK5;	/* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 10) & MASK5;	/* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 12) & MASK5;	/* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 14) & MASK5;	/* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 16) & MASK5; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 18) & MASK5; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
-  masked = (low_rev >> 20) & MASK5; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
-  masked = low_rev >> 22;		/* 16, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
-#else
-  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
-#endif
-
-
-  oligo = low_rev >> 24;	/* For 15..12 */
-  oligo |= high_rev << 8;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK5; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 2) & MASK5; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 4) & MASK5; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 6) & MASK5; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
-#endif
-
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK5;	/* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 2) & MASK5; /* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 4) & MASK5; /* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 6) & MASK5; /* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 8) & MASK5;	/* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 10) & MASK5;	/* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 12) & MASK5;	/* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 14) & MASK5;	/* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 16) & MASK5;	/* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 18) & MASK5;	/* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rev >> 20) & MASK5;	/* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
-
-  masked = high_rev >> 22;		/* 0, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
-  
-#else
-  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
-#endif
-
-  return;
-}
-
-#else	/* HAVE_AVX2 */
-
-static void
-count_5mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-  __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
-
-  oligo = nexthigh_rev >> 24;	/* For 31..28 */
-  oligo |= low_rev << 8;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-
-  oligo = low_rev >> 24;	/* For 15..12 */
-  oligo |= high_rev << 8;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-  return;
-}
-
-#endif  /* HAVE_AVX2 */
-
-
-
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_5mers_fwd_simd (__m128i *out, __m128i current, __m128i next) {
-  __m128i oligo;
-
-  _mm_store_si128(out++, _mm_srli_epi32(current,22)); /* No mask necessary */
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
-  _mm_store_si128(out++, _mm_and_si128( current, mask5));
-
-  oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask5));
-
-  return;
-}
-
-#ifdef HAVE_AVX2
-static void
-extract_5mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
-
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,22)); /* No mask necessary */
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask5));
-
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,24), _mm256_slli_epi32(current,8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask5));
-
-  return;
-}
-#endif
-
-
-static void
-count_5mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
-#else
-  Genomecomp_T array[4];
-#endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
-#endif
-
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,22); /* No mask necessary */
-#else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,22)); /* No mask necessary */
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("16 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("32 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("48 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */
-#endif
-  debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,20), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("17 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("33 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("49 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */
-#endif
-  debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,18), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("18 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("34 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("50 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */
-#endif
-  debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,16), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("19 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("35 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("51 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */
-#endif
-  debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,14), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("20 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("36 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("52 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */
-#endif
-  debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("21 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("37 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("53 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */
-#endif
-  debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("22 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("38 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("54 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */
-#endif
-  debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("23 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("39 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("55 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */
-#endif
-  debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("24 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("40 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("56 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */
-#endif
-  debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("25 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("41 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("57 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */
-#endif
-  debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("26 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("42 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("58 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */
-#endif
-  debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("27 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("43 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("59 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */
-#endif
-  debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-
-  oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("28 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("44 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("60 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */
-#endif
-  debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("29 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("45 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("61 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */
-#endif
-  debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("30 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("46 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("62 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */
-#endif
-  debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("31 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("47 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("63 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */
-#endif
-  debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-  return;
-}
-
-#endif
-
-
-#ifdef HAVE_AVX2
-static void
-count_5mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
-  __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
-
-  array = _mm256_srli_epi32(current,22); /* No mask necessary */
-  counts[EXTRACT256(array,0)] += 1;	 /* 0 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 16 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 32 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 48 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 64 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 80 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 96 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 112 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;	 /* 1 */
-  counts[EXTRACT256(array,1)] += 1;	 /* 17 */
-  counts[EXTRACT256(array,2)] += 1;	 /* 33 */
-  counts[EXTRACT256(array,3)] += 1;	 /* 49 */
-  counts[EXTRACT256(array,4)] += 1;	 /* 65 */
-  counts[EXTRACT256(array,5)] += 1;	 /* 81 */
-  counts[EXTRACT256(array,6)] += 1;	 /* 97 */
-  counts[EXTRACT256(array,7)] += 1;	 /* 113 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 2 */
-  counts[EXTRACT256(array,1)] += 1; /* 18 */
-  counts[EXTRACT256(array,2)] += 1; /* 34 */
-  counts[EXTRACT256(array,3)] += 1; /* 50 */
-  counts[EXTRACT256(array,4)] += 1; /* 66 */
-  counts[EXTRACT256(array,5)] += 1; /* 82 */
-  counts[EXTRACT256(array,6)] += 1; /* 98 */
-  counts[EXTRACT256(array,7)] += 1; /* 114 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 3 */
-  counts[EXTRACT256(array,1)] += 1; /* 19 */
-  counts[EXTRACT256(array,2)] += 1; /* 35 */
-  counts[EXTRACT256(array,3)] += 1; /* 51 */
-  counts[EXTRACT256(array,4)] += 1; /* 67 */
-  counts[EXTRACT256(array,5)] += 1; /* 83 */
-  counts[EXTRACT256(array,6)] += 1; /* 99 */
-  counts[EXTRACT256(array,7)] += 1; /* 115 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 4 */
-  counts[EXTRACT256(array,1)] += 1; /* 20 */
-  counts[EXTRACT256(array,2)] += 1; /* 36 */
-  counts[EXTRACT256(array,3)] += 1; /* 52 */
-  counts[EXTRACT256(array,4)] += 1; /* 68 */
-  counts[EXTRACT256(array,5)] += 1; /* 84 */
-  counts[EXTRACT256(array,6)] += 1; /* 100 */
-  counts[EXTRACT256(array,7)] += 1; /* 116 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 5 */
-  counts[EXTRACT256(array,1)] += 1; /* 21 */
-  counts[EXTRACT256(array,2)] += 1; /* 37 */
-  counts[EXTRACT256(array,3)] += 1; /* 53 */
-  counts[EXTRACT256(array,4)] += 1; /* 69 */
-  counts[EXTRACT256(array,5)] += 1; /* 85 */
-  counts[EXTRACT256(array,6)] += 1; /* 101 */
-  counts[EXTRACT256(array,7)] += 1; /* 117 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 6 */
-  counts[EXTRACT256(array,1)] += 1; /* 22 */
-  counts[EXTRACT256(array,2)] += 1; /* 38 */
-  counts[EXTRACT256(array,3)] += 1; /* 54 */
-  counts[EXTRACT256(array,4)] += 1; /* 70 */
-  counts[EXTRACT256(array,5)] += 1; /* 86 */
-  counts[EXTRACT256(array,6)] += 1; /* 102 */
-  counts[EXTRACT256(array,7)] += 1; /* 118 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 7 */
-  counts[EXTRACT256(array,1)] += 1; /* 23 */
-  counts[EXTRACT256(array,2)] += 1; /* 39 */
-  counts[EXTRACT256(array,3)] += 1; /* 55 */
-  counts[EXTRACT256(array,4)] += 1; /* 71 */
-  counts[EXTRACT256(array,5)] += 1; /* 87 */
-  counts[EXTRACT256(array,6)] += 1; /* 103 */
-  counts[EXTRACT256(array,7)] += 1; /* 119 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 8 */
-  counts[EXTRACT256(array,1)] += 1; /* 24 */
-  counts[EXTRACT256(array,2)] += 1; /* 40 */
-  counts[EXTRACT256(array,3)] += 1; /* 56 */
-  counts[EXTRACT256(array,4)] += 1; /* 72 */
-  counts[EXTRACT256(array,5)] += 1; /* 88 */
-  counts[EXTRACT256(array,6)] += 1; /* 104 */
-  counts[EXTRACT256(array,7)] += 1; /* 120 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 9 */
-  counts[EXTRACT256(array,1)] += 1; /* 25 */
-  counts[EXTRACT256(array,2)] += 1; /* 41 */
-  counts[EXTRACT256(array,3)] += 1; /* 57 */
-  counts[EXTRACT256(array,4)] += 1; /* 73 */
-  counts[EXTRACT256(array,5)] += 1; /* 89 */
-  counts[EXTRACT256(array,6)] += 1; /* 105 */
-  counts[EXTRACT256(array,7)] += 1; /* 121 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 10 */
-  counts[EXTRACT256(array,1)] += 1; /* 26 */
-  counts[EXTRACT256(array,2)] += 1; /* 42 */
-  counts[EXTRACT256(array,3)] += 1; /* 58 */
-  counts[EXTRACT256(array,4)] += 1; /* 74 */
-  counts[EXTRACT256(array,5)] += 1; /* 90 */
-  counts[EXTRACT256(array,6)] += 1; /* 106 */
-  counts[EXTRACT256(array,7)] += 1; /* 122 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 11 */
-  counts[EXTRACT256(array,1)] += 1; /* 27 */
-  counts[EXTRACT256(array,2)] += 1; /* 43 */
-  counts[EXTRACT256(array,3)] += 1; /* 59 */
-  counts[EXTRACT256(array,4)] += 1; /* 75 */
-  counts[EXTRACT256(array,5)] += 1; /* 91 */
-  counts[EXTRACT256(array,6)] += 1; /* 107 */
-  counts[EXTRACT256(array,7)] += 1; /* 123 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  oligo = _mm256_or_si256( _mm256_srli_epi32(next,24), _mm256_slli_epi32(current,8));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 12 */
-  counts[EXTRACT256(array,1)] += 1; /* 28 */
-  counts[EXTRACT256(array,2)] += 1; /* 44 */
-  counts[EXTRACT256(array,3)] += 1; /* 60 */
-  counts[EXTRACT256(array,4)] += 1; /* 76 */
-  counts[EXTRACT256(array,5)] += 1; /* 92 */
-  counts[EXTRACT256(array,6)] += 1; /* 108 */
-  counts[EXTRACT256(array,7)] += 1; /* 124 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 13 */
-  counts[EXTRACT256(array,1)] += 1; /* 29 */
-  counts[EXTRACT256(array,2)] += 1; /* 45 */
-  counts[EXTRACT256(array,3)] += 1; /* 61 */
-  counts[EXTRACT256(array,4)] += 1; /* 77 */
-  counts[EXTRACT256(array,5)] += 1; /* 93 */
-  counts[EXTRACT256(array,6)] += 1; /* 109 */
-  counts[EXTRACT256(array,7)] += 1; /* 125 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 14 */
-  counts[EXTRACT256(array,1)] += 1; /* 30 */
-  counts[EXTRACT256(array,2)] += 1; /* 46 */
-  counts[EXTRACT256(array,3)] += 1; /* 62 */
-  counts[EXTRACT256(array,4)] += 1; /* 78 */
-  counts[EXTRACT256(array,5)] += 1; /* 94 */
-  counts[EXTRACT256(array,6)] += 1; /* 110 */
-  counts[EXTRACT256(array,7)] += 1; /* 126 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask5);
-  counts[EXTRACT256(array,0)] += 1; /* 15 */
-  counts[EXTRACT256(array,1)] += 1; /* 31 */
-  counts[EXTRACT256(array,2)] += 1; /* 47 */
-  counts[EXTRACT256(array,3)] += 1; /* 63 */
-  counts[EXTRACT256(array,4)] += 1; /* 79 */
-  counts[EXTRACT256(array,5)] += 1; /* 95 */
-  counts[EXTRACT256(array,6)] += 1; /* 111 */
-  counts[EXTRACT256(array,7)] += 1; /* 127 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  return;
-}
-#endif
-
-
-#if !defined(HAVE_AVX2)
-
-static int
-store_5mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-#ifdef INDIVIDUAL_SHIFTS
-#elif defined(SIMD_MASK_THEN_STORE)
-  UINT4 _masked[4] __attribute__ ((aligned (16)));
-  __m128i _oligo;
-#else
-  __m128i _oligo, _masked;
-#endif
-
-
-  oligo = nexthigh_rev >> 24;	/* For 31..28 */
-  oligo |= low_rev << 8;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK5; /* 31 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
-
-  masked = (oligo >> 2) & MASK5; /* 30 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
-
-  masked = (oligo >> 4) & MASK5; /* 29 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
-
-  masked = (oligo >> 6) & MASK5; /* 28 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
-#endif
-
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = low_rev & MASK5;	/* 27 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
-
-  masked = (low_rev >> 2) & MASK5;	/* 26 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
-
-  masked = (low_rev >> 4) & MASK5;	/* 25 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
-
-  masked = (low_rev >> 6) & MASK5;	/* 24 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
-
-  masked = (low_rev >> 8) & MASK5;	/* 23 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
-
-  masked = (low_rev >> 10) & MASK5;	/* 22 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
-
-  masked = (low_rev >> 12) & MASK5;	/* 21 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
-
-  masked = (low_rev >> 14) & MASK5;	/* 20 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
-
-  masked = (low_rev >> 16) & MASK5; /* 19 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
-
-  masked = (low_rev >> 18) & MASK5; /* 18 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
-
-  masked = (low_rev >> 20) & MASK5; /* 17 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
-
-  masked = low_rev >> 22;		/* 16, No mask necessary */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
-  
-#else
-  _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
-#endif
-
-
-  oligo = low_rev >> 24;	/* For 15..12 */
-  oligo |= high_rev << 8;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK5; /* 15 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
-
-  masked = (oligo >> 2) & MASK5; /* 14 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
-
-  masked = (oligo >> 4) & MASK5; /* 13 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
-
-  masked = (oligo >> 6) & MASK5; /* 12 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
-#endif
-
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = high_rev & MASK5;	/* 11 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
-
-  masked = (high_rev >> 2) & MASK5; /* 10 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
-
-  masked = (high_rev >> 4) & MASK5; /* 9 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
-
-  masked = (high_rev >> 6) & MASK5; /* 8 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
-
-  masked = (high_rev >> 8) & MASK5;	/* 7 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
-
-  masked = (high_rev >> 10) & MASK5;	/* 6 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
-
-  masked = (high_rev >> 12) & MASK5;	/* 5 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
-  }
-
-  masked = (high_rev >> 14) & MASK5;	/* 4 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
-  }
-
-  masked = (high_rev >> 16) & MASK5;	/* 3 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
-
-  masked = (high_rev >> 18) & MASK5;	/* 2 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
-
-  masked = (high_rev >> 20) & MASK5;	/* 1 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
-
-  masked = high_rev >> 22;		/* 0, No mask necessary */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-  
-#else
-  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
-  }
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
-
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-#endif
-
-  return chrpos - 32;
-}
-
-#else	/* HAVE_AVX2 */
-
-static int
-store_5mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
-  Genomecomp_T masked, oligo;
-  __m256i _oligo, _masked, _counts;
-
-
-  oligo = nexthigh_rev >> 24;	/* For 31..28 */
-  oligo |= low_rev << 8;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
-
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
-
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
-
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
-
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
-
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
-
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
-
-
-  oligo = low_rev >> 24;	/* For 15..12 */
-  oligo |= high_rev << 8;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
-  }
-
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
-  }
-
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
-  }
-
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
-  }
-
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
-  }
-
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
-  }
-
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-
-  return chrpos - 32;
-}
-
-#endif  /* HAVE_AVX2 */
-
-
-
-#ifndef USE_SIMD_FOR_COUNTS
-
-static void
-count_positions_fwd_std (Count_T *counts, Inquery_T *inquery, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
-			 int genestrand) {
-  int startdiscard, enddiscard;
-  Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
-    low, high, nextlow;
-
-  debug(printf("Starting count_positions_fwd_std\n"));
-
-  if (left_plus_length < (Univcoord_T) indexsize) {
-    left_plus_length = 0;
-  } else {
-    left_plus_length -= indexsize;
-  }
-
-  startptr = left/32U*3;
-  ptr = endptr = left_plus_length/32U*3;
-  startdiscard = left % 32; /* (left+pos5) % 32 */
-  enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
-  
-  if (left_plus_length <= left) {
-    /* Skip */
-
-  } else if (startptr == endptr) {
-#ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
-#endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
-
-    high_rev = reverse_nt[low >> 16];
-    high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-    low_rev = reverse_nt[high >> 16];
-    low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-    nexthigh_rev = reverse_nt[nextlow >> 16];
-    nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-    if (indexsize == 9) {
-      count_9mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else if (indexsize == 8) {
-      count_8mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else if (indexsize == 7) {
-      count_7mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else if (indexsize == 6) {
-      count_6mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else if (indexsize == 5) {
-      count_5mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else {
-      fprintf(stderr,"indexsize %d not supported\n",indexsize);
-      abort();
-    }
-
-  } else {
-    /* Genome_print_blocks(ref_blocks,left,left+16); */
-
-    /* End block */
-#ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
-#endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
-
-    high_rev = reverse_nt[low >> 16];
-    high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-    low_rev = reverse_nt[high >> 16];
-    low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-    nexthigh_rev = reverse_nt[nextlow >> 16];
-    nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-    if (indexsize == 9) {
-      count_9mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 8) {
-      count_8mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 7) {
-      count_7mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 6) {
-      count_6mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 5) {
-      count_5mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else {
-      abort();
-    }
-
-    /* Middle blocks */
-    if (indexsize == 9) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	count_9mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev);
-      }
-
-    } else if (indexsize == 8) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	count_8mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev);
-      }
-
-    } else if (indexsize == 7) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	count_7mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev);
-      }
-
-    } else if (indexsize == 6) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	count_6mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev);
-      }
-
-    } else if (indexsize == 5) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	count_5mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev);
-      }
-
-    } else {
-      abort();
-    }
-
-    ptr -= 3;
-
-    /* Start block */
-    assert(ptr == startptr);
-
-#ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
-#endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
-
-    high_rev = reverse_nt[low >> 16];
-    high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-    low_rev = reverse_nt[high >> 16];
-    low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-    nexthigh_rev = reverse_nt[nextlow >> 16];
-    nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-    if (indexsize == 9) {
-      count_9mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 8) {
-      count_8mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 7) {
-      count_7mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 6) {
-      count_6mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 5) {
-      count_5mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else {
-      fprintf(stderr,"indexsize %d not supported\n",indexsize);
-      abort();
-    }
-
-  }
-
-  return;
-}
-#endif
-
-
-
-#ifdef USE_SIMD_FOR_COUNTS
-
-static void
-count_positions_fwd_simd (Count_T *counts, Inquery_T *inquery, int indexsize,
-			  Univcoord_T left, Univcoord_T left_plus_length, int genestrand) {
-  int startdiscard, enddiscard;
-  Genomecomp_T ptr, startptr, endptr, nexthigh_rev, nextlow;
-  Genomecomp_T high0_rev, low0_rev, low0, high0, low1, high1;
-  __m128i current, next, mask2, mask4;
-  /* __m128i array[16]; */
-#ifdef HAVE_SSSE3
-  __m128i reverse8;
-#else
-  __m128i mask8;
-#endif
-#ifdef HAVE_SSE4_1
-  __m128i temp;
-#else
-  Genomecomp_T high1_rev, low1_rev;
-#endif
-#ifdef HAVE_AVX2
-  Genomecomp_T low2, high2, low3, high3;
-  __m256i current256, next256, temp256, bigmask2, bigmask4, bigreverse8;
-  __m256i shift256;
-#endif
-
-
-  debug(printf("Starting count_positions_fwd_simd\n"));
-
-  if (left_plus_length < (Univcoord_T) indexsize) {
-    left_plus_length = 0;
-  } else {
-    left_plus_length -= indexsize;
-  }
-
-  startptr = left/32U*3;
-  ptr = endptr = left_plus_length/32U*3;
-  startdiscard = left % 32; /* (left+pos5) % 32 */
-  enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
-  
-  mask2 = _mm_set1_epi32(0x33333333);
-  mask4 = _mm_set1_epi32(0x0F0F0F0F);
-#ifdef HAVE_SSSE3
-  reverse8 = _mm_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03);
-#else
-  mask8 = _mm_set1_epi32(0x00FF00FF);
-#endif
-#ifdef HAVE_AVX2
-  bigmask2 = _mm256_set1_epi32(0x33333333);
-  bigmask4 = _mm256_set1_epi32(0x0F0F0F0F);
-  bigreverse8 = _mm256_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03,
-                                0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03);
-  shift256 = _mm256_setr_epi32(1,2,3,4,5,6,7,0);
-#endif
-
-  if (left_plus_length <= left) {
-    /* Skip */
-
-  } else if (startptr == endptr) {
-#ifdef WORDS_BIGENDIAN
-    high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-    low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-    high0 = ref_blocks[ptr];
-    low0 = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
-#endif
-    if (mode == CMET_STRANDED) {
-      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
-
-    current = _mm_set_epi32(0,nextlow,high0,low0);
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-    current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-    current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
-
-#ifdef HAVE_SSE4_1
-    high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-    low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-    nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
-#else
-    high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-    low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-    nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
-#endif
-
-    if (indexsize == 9) {
-      count_9mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else if (indexsize == 8) {
-      count_8mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else if (indexsize == 7) {
-      count_7mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else if (indexsize == 6) {
-      count_6mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else if (indexsize == 5) {
-      count_5mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
-    } else {
-      fprintf(stderr,"indexsize %d not supported\n",indexsize);
-      abort();
-    }
-
-  } else {
-    /* Genome_print_blocks(ref_blocks,left,left+16); */
-
-    /* End block */
-#ifdef WORDS_BIGENDIAN
-    high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-    low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-    high0 = ref_blocks[ptr];
-    low0 = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
-#endif
-    if (mode == CMET_STRANDED) {
-      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
-
-    current = _mm_set_epi32(0,nextlow,high0,low0);
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-    current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-    current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
-
-#ifdef HAVE_SSE4_1
-    high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-    low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-    nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
-#else
-    high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-    low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-    nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
-#endif
-
-    if (indexsize == 9) {
-      count_9mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 8) {
-      count_8mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 7) {
-      count_7mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 6) {
-      count_6mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 5) {
-      count_5mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else {
-      abort();
-    }
-
-    /* Middle blocks */
-    if (indexsize == 9) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
-
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
-
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes*/
-
-	nexthigh_rev = high0_rev;
-
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
-
-	count_9mers_fwd_simd_128(counts,inquery,current256,next256);
-      }
-#endif
-
-      while (ptr > startptr + 6) {
-	ptr -= 6;
-
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
-
-	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
-	nexthigh_rev = high0_rev;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
+  }
 
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
+  }
 
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
+  }
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
+  }
 
-#if 0
-	extract_9mers_fwd_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-	count_9mers_fwd_simd(counts,inquery,current,next);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
-      }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
+  }
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
+  }
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
+  }
+
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
-	count_9mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev);
-      }
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
+  }
 
-    } else if (indexsize == 8) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
+  }
 
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
+  }
+#endif
 
-	nexthigh_rev = high0_rev;
 
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+  oligo = low_rev >> 24;	/* For 15..12 */
+  oligo |= high_rev << 8;
 
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK5; /* 15 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
+  }
 
-	count_8mers_fwd_simd_128(counts,inquery,current256,next256);
-      }
-#endif
+  masked = (oligo >> 2) & MASK5; /* 14 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
+  }
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
+  masked = (oligo >> 4) & MASK5; /* 13 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  masked = (oligo >> 6) & MASK5; /* 12 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
+  }
 
-	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
 
-	nexthigh_rev = high0_rev;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
+  }
 
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2);*/
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
+  }
 
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
+  }
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
+  }
 #endif
 
-#if 0
-	extract_8mers_fwd_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
-#else
-	count_8mers_fwd_simd(counts,inquery,current,next);
-#endif
-      }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+#ifdef INDIVIDUAL_SHIFTS
+  masked = high_rev & MASK5;	/* 11 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  masked = (high_rev >> 2) & MASK5; /* 10 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
+  }
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  masked = (high_rev >> 4) & MASK5; /* 9 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
+  }
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-#endif
+  masked = (high_rev >> 6) & MASK5; /* 8 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
+  }
 
-	count_8mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev);
-      }
+  masked = (high_rev >> 8) & MASK5;	/* 7 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
+  }
 
-    } else if (indexsize == 7) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
+  masked = (high_rev >> 10) & MASK5;	/* 6 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  masked = (high_rev >> 12) & MASK5;	/* 5 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
+  }
+
+  masked = (high_rev >> 14) & MASK5;	/* 4 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
+  }
+
+  masked = (high_rev >> 16) & MASK5;	/* 3 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
+  }
+
+  masked = (high_rev >> 18) & MASK5;	/* 2 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
+  }
 
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
+  masked = (high_rev >> 20) & MASK5;	/* 1 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
+  }
+
+  masked = high_rev >> 22;		/* 0, No mask necessary */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
+  }
+  
+#else
+  _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
+  _masked = _mm_and_si128(_oligo, mask5);
+#endif
 
-	nexthigh_rev = high0_rev;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
+  }
 
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
+  }
 
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
+  }
 
-	count_7mers_fwd_simd_128(counts,inquery,current256,next256);
-      }
-#endif
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
+  }
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
 
-	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
+  }
 
-	nexthigh_rev = high0_rev;
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
+  }
 
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
+  }
 
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
+  }
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
 
-#if 0
-	extract_7mers_fwd_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-	count_7mers_fwd_simd(counts,inquery,current,next);
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
-      }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
+  }
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
+  }
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
+  }
 #endif
 
-	count_7mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev);
-      }
+  return chrpos - 32;
+}
 
-    } else if (indexsize == 6) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
+#else	/* HAVE_AVX2 */
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+static int
+store_5mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) {
+  Genomecomp_T masked, oligo;
+  __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
 
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
 
-	nexthigh_rev = high0_rev;
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
 
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
 
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+  oligo = nexthigh_rev >> 24;	/* For 31..28 */
+  oligo |= low_rev << 8;
 
-	count_6mers_fwd_simd_128(counts,inquery,current256,next256);
-      }
-#endif
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
+    }
+  }
 
-	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
+  }
 
-	nexthigh_rev = high0_rev;
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
+  }
 
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
+  }
 
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-#if 0
-	extract_6mers_fwd_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
-#else
-	count_6mers_fwd_simd(counts,inquery,current,next);
-#endif
-      }
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
+  }
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
+  }
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-#endif
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
+  }
 
-	count_6mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev);
-      }
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
+    }
+  }
 
-    } else if (indexsize == 5) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
+  }
+
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
+  }
+
+
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
+
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }
+  }
+
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
+  }
+
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
+  }
+
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
+    }
+  }
 
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
 
-	nexthigh_rev = high0_rev;
+  oligo = low_rev >> 24;	/* For 15..12 */
+  oligo |= high_rev << 8;
 
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-	count_5mers_fwd_simd_128(counts,inquery,current256,next256);
-      }
-#endif
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
+  }
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
+  }
 
-	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
+  }
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
 
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
-#endif
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-#if 0
-	extract_5mers_fwd_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
-#else
-	count_5mers_fwd_simd(counts,inquery,current,next);
-#endif
-      }
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }
+  }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }
+  }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
+  }
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
+  }
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-#endif
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
+  }
 
-	count_5mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev);
-      }
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
+  }
 
-    } else {
-      abort();
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
     }
+  }
 
-    ptr -= 3;
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
+  }
 
-    /* Start block */
-    assert(ptr == startptr);
 
-#ifdef WORDS_BIGENDIAN
-    high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-    low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
-#else
-    high0 = ref_blocks[ptr];
-    low0 = ref_blocks[ptr+1];
-    /* nextlow = ref_blocks[ptr+4]; */
-#endif
-    if (mode == CMET_STRANDED) {
-      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-      } else {
-	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-      }
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
+
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
     }
+  }
 
-    current = _mm_set_epi32(0,0,high0,low0);
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-    current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-    current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
+  }
 
-    nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-    high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-    low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-#else
-    high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-    low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-#endif
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
+  }
 
-    if (indexsize == 9) {
-      count_9mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 8) {
-      count_8mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 7) {
-      count_7mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 6) {
-      count_6mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 5) {
-      count_5mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
-    } else {
-      fprintf(stderr,"indexsize %d not supported\n",indexsize);
-      abort();
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
     }
   }
-  
-  return;
+
+  return chrpos - 32;
 }
 
-#endif
+#endif  /* HAVE_AVX2 */
+
 
 
-#ifndef USE_SIMD_FOR_COUNTS
+#ifndef HAVE_SSE2
+
 static void
-store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, int indexsize,
-			 Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
+count_positions_fwd_std (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
 			 int genestrand) {
   int startdiscard, enddiscard;
   Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
     low, high, nextlow;
 
+  debug(printf("Starting count_positions_fwd_std\n"));
 
   if (left_plus_length < (Univcoord_T) indexsize) {
     left_plus_length = 0;
   } else {
     left_plus_length -= indexsize;
   }
-  chrpos += (left_plus_length - left); /* We are starting from the right */
 
   startptr = left/32U*3;
   ptr = endptr = left_plus_length/32U*3;
@@ -29047,7 +19315,10 @@ store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
     low = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
       high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
@@ -29055,6 +19326,22 @@ store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
       } else {
 	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
       }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      }
     }
 
     high_rev = reverse_nt[low >> 16];
@@ -29065,15 +19352,15 @@ store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
     nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
 
     if (indexsize == 9) {
-      chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
     } else {
       fprintf(stderr,"indexsize %d not supported\n",indexsize);
       abort();
@@ -29092,7 +19379,10 @@ store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
     low = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
       high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
@@ -29100,195 +19390,107 @@ store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
       } else {
 	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
       }
-    }
-
-    high_rev = reverse_nt[low >> 16];
-    high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-    low_rev = reverse_nt[high >> 16];
-    low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-    nexthigh_rev = reverse_nt[nextlow >> 16];
-    nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-    if (indexsize == 9) {
-      chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 8) {
-      chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 7) {
-      chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 6) {
-      chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 5) {
-      chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
-    } else {
-      abort();
-    }
-
-    if (indexsize == 9) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	chrpos = store_9mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
-      }
-
-    } else if (indexsize == 8) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	chrpos = store_8mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
-      }
-
-    } else if (indexsize == 7) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	chrpos = store_7mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
-      }
-
-    } else if (indexsize == 6) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
-
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	chrpos = store_6mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
       }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      }
+    }
+
+    high_rev = reverse_nt[low >> 16];
+    high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+    low_rev = reverse_nt[high >> 16];
+    low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+    nexthigh_rev = reverse_nt[nextlow >> 16];
+    nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
 
+    if (indexsize == 9) {
+      count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 8) {
+      count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 7) {
+      count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 6) {
+      count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 5) {
-      while (ptr > startptr + 3) {
-	ptr -= 3;
+      count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else {
+      abort();
+    }
+
+    /* Middle blocks */
+    while (ptr > startptr + 3) {
+      ptr -= 3;
 
 #ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+      high = Bigendian_convert_uint(ref_blocks[ptr]);
+      low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
+      high = ref_blocks[ptr];
+      low = ref_blocks[ptr+1];
+      nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
+
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
 	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
+	} else {
+	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
 	}
+      } else if (mode == ATOI_STRANDED) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+	}
+      } else if (mode == TTOC_STRANDED) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+	} else {
+	  high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+	}
+      }
 
-	high_rev = reverse_nt[low >> 16];
-	high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
-	low_rev = reverse_nt[high >> 16];
-	low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
-	nexthigh_rev = reverse_nt[nextlow >> 16];
-	nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
-
-	chrpos = store_5mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev);
+      high_rev = reverse_nt[low >> 16];
+      high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+      low_rev = reverse_nt[high >> 16];
+      low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+      nexthigh_rev = reverse_nt[nextlow >> 16];
+      nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+
+      if (indexsize == 9) {
+	count_9mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev);
+      } else if (indexsize == 8) {
+	count_8mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev);
+      } else if (indexsize == 7) {
+	count_7mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev);
+      } else if (indexsize == 6) {
+	count_6mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev);
+      } else if (indexsize == 5) {
+	count_5mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev);
+      } else {
+	abort();
       }
-    } else {
-      abort();
     }
 
     ptr -= 3;
@@ -29305,7 +19507,10 @@ store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
     low = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
       high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
@@ -29313,6 +19518,22 @@ store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
       } else {
 	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
       }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      }
     }
 
     high_rev = reverse_nt[low >> 16];
@@ -29323,36 +19544,239 @@ store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
     nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
 
     if (indexsize == 9) {
-      chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else {
       fprintf(stderr,"indexsize %d not supported\n",indexsize);
       abort();
     }
 
   }
-  
+
   return;
 }
 #endif
 
 
-#ifdef USE_SIMD_FOR_COUNTS
+#ifdef HAVE_AVX2
+static __m256i
+apply_mode_fwd_256 (Genomecomp_T *block_ptr, Mode_T mode, int genestrand) {
+  Genomecomp_T low0, high0, low1, high1, low2, high2, low3, high3, nextlow;
+
+  high0 = block_ptr[0]; low0 = block_ptr[1];
+  high1 = block_ptr[3]; low1 = block_ptr[4];
+  high2 = block_ptr[6]; low2 = block_ptr[7];
+  high3 = block_ptr[9]; low3 = block_ptr[10];
+  nextlow = block_ptr[13];
+
+  if (mode == CMET_STRANDED) {
+    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
+    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
+    nextlow = Cmet_reduce_ct(nextlow);
+  } else if (mode == CMET_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+      high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+      high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
+      high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
+      nextlow = Cmet_reduce_ct(nextlow);
+    } else {
+      high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+      high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
+      high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
+      nextlow = Cmet_reduce_ga(nextlow);
+    }
+
+  } else if (mode == ATOI_STRANDED) {
+    high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+    high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+    high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+    nextlow = Atoi_reduce_tc(nextlow);
+  } else if (mode == ATOI_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+      high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+      high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+      nextlow = Atoi_reduce_tc(nextlow);
+    } else {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+      high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+      high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+      nextlow = Atoi_reduce_ag(nextlow);
+    }
+
+  } else if (mode == TTOC_STRANDED) {
+    high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+    high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+    high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+    nextlow = Atoi_reduce_ag(nextlow);
+  } else if (mode == TTOC_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+      high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+      high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+      nextlow = Atoi_reduce_ag(nextlow);
+    } else {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+      high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+      high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+      nextlow = Atoi_reduce_tc(nextlow);
+    }
+  }
+
+  return _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
+}
+#endif
+
+
+
+#ifdef HAVE_AVX512
+static __m512i
+apply_mode_fwd_512 (Genomecomp_T *block_ptr, Mode_T mode, int genestrand) {
+  Genomecomp_T low0, high0, low1, high1, low2, high2, low3, high3,
+    low4, high4, low5, high5, low6, high6, low7, high7, nextlow;
+
+  high0 = block_ptr[0]; low0 = block_ptr[1];
+  high1 = block_ptr[3]; low1 = block_ptr[4];
+  high2 = block_ptr[6]; low2 = block_ptr[7];
+  high3 = block_ptr[9]; low3 = block_ptr[10];
+  
+  high4 = block_ptr[12]; low4 = block_ptr[13];
+  high5 = block_ptr[15]; low5 = block_ptr[16];
+  high6 = block_ptr[18]; low6 = block_ptr[19];
+  high7 = block_ptr[21]; low7 = block_ptr[22];
+  nextlow = block_ptr[25];
+
+  if (mode == CMET_STRANDED) {
+    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
+    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
+    high4 = Cmet_reduce_ct(high4); low4 = Cmet_reduce_ct(low4);
+    high5 = Cmet_reduce_ct(high5); low5 = Cmet_reduce_ct(low5);
+    high6 = Cmet_reduce_ct(high6); low6 = Cmet_reduce_ct(low6);
+    high7 = Cmet_reduce_ct(high7); low7 = Cmet_reduce_ct(low7);
+    nextlow = Cmet_reduce_ct(nextlow);
+  } else if (mode == CMET_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+      high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+      high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
+      high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
+      high4 = Cmet_reduce_ct(high4); low4 = Cmet_reduce_ct(low4);
+      high5 = Cmet_reduce_ct(high5); low5 = Cmet_reduce_ct(low5);
+      high6 = Cmet_reduce_ct(high6); low6 = Cmet_reduce_ct(low6);
+      high7 = Cmet_reduce_ct(high7); low7 = Cmet_reduce_ct(low7);
+      nextlow = Cmet_reduce_ct(nextlow);
+    } else {
+      high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+      high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
+      high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
+      high4 = Cmet_reduce_ga(high4); low0 = Cmet_reduce_ga(low4);
+      high5 = Cmet_reduce_ga(high5); low1 = Cmet_reduce_ga(low5);
+      high6 = Cmet_reduce_ga(high6); low2 = Cmet_reduce_ga(low6);
+      high7 = Cmet_reduce_ga(high7); low3 = Cmet_reduce_ga(low7);
+      nextlow = Cmet_reduce_ga(nextlow);
+    }
+
+  } else if (mode == ATOI_STRANDED) {
+    high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+    high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+    high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+    high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4);
+    high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5);
+    high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6);
+    high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7);
+    nextlow = Atoi_reduce_tc(nextlow);
+  } else if (mode == ATOI_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+      high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+      high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+      high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4);
+      high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5);
+      high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6);
+      high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7);
+      nextlow = Atoi_reduce_tc(nextlow);
+    } else {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+      high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+      high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+      high4 = Atoi_reduce_ag(high4); low0 = Atoi_reduce_ag(low4);
+      high5 = Atoi_reduce_ag(high5); low1 = Atoi_reduce_ag(low5);
+      high6 = Atoi_reduce_ag(high6); low2 = Atoi_reduce_ag(low6);
+      high7 = Atoi_reduce_ag(high7); low3 = Atoi_reduce_ag(low7);
+      nextlow = Atoi_reduce_ag(nextlow);
+    }
+
+  } else if (mode == TTOC_STRANDED) {
+    high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+    high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+    high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+    high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4);
+    high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5);
+    high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6);
+    high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7);
+    nextlow = Atoi_reduce_ag(nextlow);
+  } else if (mode == ATOI_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+      high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+      high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+      high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4);
+      high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5);
+      high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6);
+      high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7);
+      nextlow = Atoi_reduce_ag(nextlow);
+    } else {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+      high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+      high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+      high4 = Atoi_reduce_tc(high4); low0 = Atoi_reduce_tc(low4);
+      high5 = Atoi_reduce_tc(high5); low1 = Atoi_reduce_tc(low5);
+      high6 = Atoi_reduce_tc(high6); low2 = Atoi_reduce_tc(low6);
+      high7 = Atoi_reduce_tc(high7); low3 = Atoi_reduce_tc(low7);
+      nextlow = Atoi_reduce_tc(nextlow);
+    }
+  }
+
+  return _mm512_set_epi32(high7,low7,high6,low6,high5,low5,high4,low4,
+			  high3,low3,high2,low2,high1,low1,high0,low0);
+}
+#endif
+
+
+#ifdef HAVE_SSE2
 static void
-store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, int indexsize,
-			  Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
-			  int genestrand) {
+count_positions_fwd_simd (Count_T *counts, int indexsize,
+			  Univcoord_T left, Univcoord_T left_plus_length, int genestrand) {
   int startdiscard, enddiscard;
   Genomecomp_T ptr, startptr, endptr, nexthigh_rev, nextlow;
   Genomecomp_T high0_rev, low0_rev, low0, high0, low1, high1;
-  __m128i current, next, mask2, mask4;
+  __m128i current, a, b, next, mask2, mask4;
   __m128i array[16];
 #ifdef HAVE_SSSE3
   __m128i reverse8;
@@ -29367,19 +19791,24 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
 #ifdef HAVE_AVX2
   __m256i array256[16];
   Genomecomp_T low2, high2, low3, high3;
-  __m256i current256, next256, temp256, bigmask2, bigmask4, bigreverse8;
+  __m256i current256, a256, b256, c256, d256, next256, temp256, bigmask2, bigmask4, bigreverse8;
   __m256i shift256;
 #endif
+#ifdef HAVE_AVX512
+  __m512i array512[16];
+  Genomecomp_T low4, high4, low5, high5, low6, high6, low7, high7;
+  __m512i current512, a512, b512, next512, temp512, hugemask2, hugemask4;
+  __m512i shift512;
+#endif
 
 
-  debug(printf("Starting store_positions_fwd_simd\n"));
+  debug(printf("Starting count_positions_fwd_simd\n"));
 
   if (left_plus_length < (Univcoord_T) indexsize) {
     left_plus_length = 0;
   } else {
     left_plus_length -= indexsize;
   }
-  chrpos += (left_plus_length - left); /* We are starting from the right */
 
   startptr = left/32U*3;
   ptr = endptr = left_plus_length/32U*3;
@@ -29400,6 +19829,12 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
                                 0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03);
   shift256 = _mm256_setr_epi32(1,2,3,4,5,6,7,0);
 #endif
+#ifdef HAVE_AVX512
+  hugemask2 = _mm512_set1_epi32(0x33333333);
+  hugemask4 = _mm512_set1_epi32(0x0F0F0F0F);
+  /* hugereverse8 = _mm512_broadcast_i64x4(bigreverse8); */
+  shift512 = _mm512_setr_epi32(1,2,3,4,5,6,7,8, 9,10,11,12,13,14,15,0);
+#endif
 
   if (left_plus_length <= left) {
     /* Skip */
@@ -29414,7 +19849,10 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
     low0 = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
       high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
@@ -29422,6 +19860,22 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
       } else {
 	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
       }
+    } else if (mode == ATOI_STRANDED) {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      }
     }
 
     current = _mm_set_epi32(0,nextlow,high0,low0);
@@ -29434,6 +19888,7 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
     current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
 #endif
 
+    /* nexthigh_rev = high0_rev; */
 #ifdef HAVE_SSE4_1
     high0_rev = (unsigned int) _mm_extract_epi32(current,0);
     low0_rev = (unsigned int) _mm_extract_epi32(current,1);
@@ -29445,15 +19900,15 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
 #endif
 
     if (indexsize == 9) {
-      chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+      count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
     } else {
       fprintf(stderr,"indexsize %d not supported\n",indexsize);
       abort();
@@ -29472,7 +19927,10 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
     low0 = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
       high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
@@ -29480,6 +19938,22 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
       } else {
 	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
       }
+    } else if (mode == ATOI_STRANDED) {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      }
     }
 
     current = _mm_set_epi32(0,nextlow,high0,low0);
@@ -29492,6 +19966,7 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
     current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
 #endif
 
+    /* nexthigh_rev = high0_rev; */
 #ifdef HAVE_SSE4_1
     high0_rev = (unsigned int) _mm_extract_epi32(current,0);
     low0_rev = (unsigned int) _mm_extract_epi32(current,1);
@@ -29503,902 +19978,1322 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
 #endif
 
     if (indexsize == 9) {
-      chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+      count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+      count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+      count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+      count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+      count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
     } else {
       abort();
     }
 
     /* Middle blocks */
-    if (indexsize == 9) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
+#ifdef HAVE_AVX512
+    while (ptr > startptr + 24) {
+      ptr -= 24;
+
+      if (mode == STANDARD) {
+	a512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr]));
+	b512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr+7]));
+	current512 = _mm512_permutex2var_epi32(a512,_mm512_setr_epi32(1, 0, 4, 3, 7, 6, 10, 9, 13, 12, 16+9, 16+8, 16+12, 16+11, 16+15, 16+14), b512);
+      } else {
+	current512 = apply_mode_fwd_512(&(ref_blocks[ptr]),mode,genestrand);
+      }
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+      current512 = _mm512_or_si512(_mm512_and_si512(_mm512_srli_epi32(current512,2),hugemask2),_mm512_slli_epi32(_mm512_and_si512(current512,hugemask2),2)); /* Swap pairs */
+      current512 = _mm512_or_si512(_mm512_and_si512(_mm512_srli_epi32(current512,4),hugemask4),_mm512_slli_epi32(_mm512_and_si512(current512,hugemask4),4)); /* Swap nibbles */
+#ifdef HAVE_AVX512BW
+      current512 = _mm512_shuffle_epi8(current512,hugereverse8); /* Reverse bytes within 128-bit lanes*/
+#else
+      /* Reverse bytes within 128-bit lanes*/
+      current256 = _mm256_shuffle_epi8(_mm512_extracti64x4_epi64(current512,0x0),bigreverse8);
+      next256 = _mm256_shuffle_epi8(_mm512_extracti64x4_epi64(current512,0x1),bigreverse8);
+      current512 = _mm512_broadcast_i64x4(next256);
+      current512 = _mm512_inserti64x4(current512,current256,0x0);
+#endif
+
+      nexthigh_rev = high0_rev;	/* Take from previous loop */
+
+      current = _mm512_extracti32x4_epi32(current512,0);
+      high0_rev = (unsigned int) _mm_extract_epi32(current, 0); /* Generate for next loop */
+      
+      temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+      temp512 = _mm512_inserti32x4(current512,temp,0x00);
+      next512 = _mm512_permutexvar_epi32(shift512,temp512); /* shift goes first! */
+
+      if (indexsize == 9) {
+	extract_9mers_fwd_simd_256(array512,current512,next512);
+      } else if (indexsize == 8) {
+	extract_8mers_fwd_simd_256(array512,current512,next512);
+      } else if (indexsize == 7) {
+	extract_7mers_fwd_simd_256(array512,current512,next512);
+      } else if (indexsize == 6) {
+	extract_6mers_fwd_simd_256(array512,current512,next512);
+      } else if (indexsize == 5) {
+	extract_5mers_fwd_simd_256(array512,current512,next512);
+      } else {
+	abort();
+      }
+      count_fwdrev_simd_n(counts,(Genomecomp_T *) array512,256);
+    }
+#endif
 
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
 
-	nexthigh_rev = high0_rev;
+#ifdef HAVE_AVX2
+    while (ptr > startptr + 12) {
+      ptr -= 12;
+
+      if (mode == STANDARD) {
+	a256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr]));
+	b256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr+3]));
+	c256 = _mm256_unpacklo_epi64(a256,b256);
+	d256 = _mm256_unpackhi_epi64(a256,b256);
+	current256 = _mm256_permute2x128_si256(c256, d256, 0x30);
+	current256 = _mm256_shuffle_epi32(current256, 0xB1); /* 0b10110001 */
+      } else {
+	current256 = apply_mode_fwd_256(&(ref_blocks[ptr]),mode,genestrand);
+      }
+
+      current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
+      current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
+      current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes*/
 
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+      nexthigh_rev = high0_rev;	/* Take from previous loop */
+      high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); /* Generate for next loop */
 
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+#if 0
+      /* Doesn't work, because performs shift within 128-bit lanes */
+      next256 = _mm256_alignr_epi8(_mm256_set1_epi32(nexthigh_rev),current256,4);
+#else
+      temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
+      next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+#endif
 
+      if (indexsize == 9) {
 	extract_9mers_fwd_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
+      } else if (indexsize == 8) {
+	extract_8mers_fwd_simd_128(array256,current256,next256);
+      } else if (indexsize == 7) {
+	extract_7mers_fwd_simd_128(array256,current256,next256);
+      } else if (indexsize == 6) {
+	extract_6mers_fwd_simd_128(array256,current256,next256);
+      } else if (indexsize == 5) {
+	extract_5mers_fwd_simd_128(array256,current256,next256);
+      } else {
+	abort();
       }
+      count_fwdrev_simd_n(counts,(Genomecomp_T *) array256,128);
+    }
 #endif
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
+    while (ptr > startptr + 6) {
+      ptr -= 6;
 
+#ifdef HAVE_SSSE3
+      if (mode == STANDARD) {
+	a = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr]));
+	b = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr+3]));
+	current = _mm_unpacklo_epi64(a,b);
+	current = _mm_shuffle_epi32(current, 0xB1); /* 0b10110001 */
+#ifndef HAVE_SSE4_1
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
 	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
 #else
-	high0 = ref_blocks[ptr];
 	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
 #endif
-	if (mode == CMET_STRANDED) {
+#endif
+	
+      } else {
+#ifdef WORDS_BIGENDIAN
+	high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+#else
+	high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1];
+	high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4];
+	nextlow = ref_blocks[ptr+7];
+#endif
+
+	if (mode == STANDARD) {
+	  /* Skip */
+	} else if (mode == CMET_STRANDED) {
 	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
 	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
+	  nextlow = Cmet_reduce_ct(nextlow);
 	} else if (mode == CMET_NONSTRANDED) {
 	  if (genestrand > 0) {
 	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
 	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
+	    nextlow = Cmet_reduce_ct(nextlow);
 	  } else {
 	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
 	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
+	    nextlow = Cmet_reduce_ga(nextlow);
+	  }
+
+	} else if (mode == ATOI_STRANDED) {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	  high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	  nextlow = Atoi_reduce_tc(nextlow);
+	} else if (mode == ATOI_NONSTRANDED) {
+	  if (genestrand > 0) {
+	    high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	    nextlow = Atoi_reduce_tc(nextlow);
+	  } else {
+	    high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	    nextlow = Atoi_reduce_ag(nextlow);
+	  }
+
+	} else if (mode == TTOC_STRANDED) {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	  high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	  nextlow = Atoi_reduce_ag(nextlow);
+	} else if (mode == TTOC_NONSTRANDED) {
+	  if (genestrand > 0) {
+	    high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	    nextlow = Atoi_reduce_ag(nextlow);
+	  } else {
+	    high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	    nextlow = Atoi_reduce_tc(nextlow);
 	  }
 	}
 
 	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+      }
+
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+      current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+
+      nexthigh_rev = high0_rev;	/* Take from previous loop */
+#ifdef HAVE_SSE4_1
+      high0_rev = (unsigned int) _mm_extract_epi32(current,0); /* Generate for next loop (SSE4.1 and higher) */
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+      high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); /* Generate for next loop (SSSE3) */
+#endif
+
+#if 1
+      next = _mm_alignr_epi8(_mm_set1_epi32(nexthigh_rev),current,4);
+#else
+      /* Previous solution for SSE4.1 */
+      temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+      next = _mm_shuffle_epi32(temp,0x39);
 #endif
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
 
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
 #else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+      /* Non-SSSE3 */
+#ifdef WORDS_BIGENDIAN
+      high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+      high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+#else
+      high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1];
+      high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4];
+      nextlow = ref_blocks[ptr+7];
+#endif
+
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+	nextlow = Cmet_reduce_ct(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+	  nextlow = Cmet_reduce_ct(nextlow);
+	} else {
+	  high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+	  nextlow = Cmet_reduce_ga(nextlow);
+	}
+
+      } else if (mode == ATOI_STRANDED) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	  high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	  nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	  high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	  nextlow = Atoi_reduce_ag(nextlow);
+	}
+
+      } else if (mode == TTOC_STRANDED) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	  high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	  nextlow = Atoi_reduce_ag(nextlow);
+	} else {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	  high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	  nextlow = Atoi_reduce_tc(nextlow);
+	}
+      }
+
+      current = _mm_set_epi32(high1,low1,high0,low0);
+
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+      current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+
+      nexthigh_rev = high0_rev;	/* Take from previous loop */
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+      high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); /* Generate for next loop */
+      low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+      high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+      low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+
+      next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
 #endif
 
-	extract_9mers_fwd_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
+      if (indexsize == 9) {
+	extract_9mers_fwd_simd_64(array,current,next);
+      } else if (indexsize == 8) {
+	extract_8mers_fwd_simd_64(array,current,next);
+      } else if (indexsize == 7) {
+	extract_7mers_fwd_simd_64(array,current,next);
+      } else if (indexsize == 6) {
+	extract_6mers_fwd_simd_64(array,current,next);
+      } else if (indexsize == 5) {
+	extract_5mers_fwd_simd_64(array,current,next);
+      } else {
+	abort();
       }
+      count_fwdrev_simd_n(counts,(Genomecomp_T *) array,64);
+    }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+    if (ptr > startptr + 3) {
+      ptr -= 3;
 
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+      high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+      low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
+      high0 = ref_blocks[ptr];
+      low0 = ref_blocks[ptr+1];
+      nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
+
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+	} else {
+	  high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+	}
+      } else if (mode == ATOI_STRANDED) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+	}
+      } else if (mode == TTOC_STRANDED) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+	} else {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
 	}
+      }
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+      current = _mm_set_epi32(0,nextlow,high0,low0);
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
 #ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+      current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+      current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
 #endif
 
-	nexthigh_rev = high0_rev;
+      /* nexthigh_rev = high0_rev; */
 #ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-#endif
-
-	chrpos = store_9mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+      high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+      low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+      nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
+#else
+      high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+      low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+      nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
+#endif
+      
+      if (indexsize == 9) {
+	count_9mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev);
+      } else if (indexsize == 8) {
+	count_8mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev);
+      } else if (indexsize == 7) {
+	count_7mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev);
+      } else if (indexsize == 6) {
+	count_6mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev);
+      } else if (indexsize == 5) {
+	count_5mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev);
+      } else {
+	abort();
       }
+    }
 
-    } else if (indexsize == 8) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
-
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
-
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
-
-	nexthigh_rev = high0_rev;
-
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
-
-	extract_8mers_fwd_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
-      }
-#endif
+    ptr -= 3;
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
+    /* Start block */
+    assert(ptr == startptr);
 
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+    high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+    low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
+    high0 = ref_blocks[ptr];
+    low0 = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
 
-	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      }
+    }
+
+    current = _mm_set_epi32(0,nextlow,high0,low0);
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
 #ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+    current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+    current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
 #endif
 
-	nexthigh_rev = high0_rev;
+    /* nexthigh_rev = high0_rev; */
 #ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
+    high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+    low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+    nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
 #else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+    high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+    low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+    nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
+#endif
+
+    if (indexsize == 9) {
+      count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 8) {
+      count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 7) {
+      count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 6) {
+      count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 5) {
+      count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else {
+      fprintf(stderr,"indexsize %d not supported\n",indexsize);
+      abort();
+    }
+  }
+  
+  return;
+}
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
 #endif
 
-	extract_8mers_fwd_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
-      }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+#ifndef HAVE_SSE2
+static void
+store_positions_fwd_std (Chrpos_T *table, UINT4 *positions, Count_T *counts, int indexsize,
+			 Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
+			 int genestrand) {
+  int startdiscard, enddiscard;
+  Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev,
+    low, high, nextlow;
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+  if (left_plus_length < (Univcoord_T) indexsize) {
+    left_plus_length = 0;
+  } else {
+    left_plus_length -= indexsize;
+  }
+  chrpos += (left_plus_length - left); /* We are starting from the right */
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+  startptr = left/32U*3;
+  ptr = endptr = left_plus_length/32U*3;
+  startdiscard = left % 32; /* (left+pos5) % 32 */
+  enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+  
+  if (left_plus_length <= left) {
+    /* Skip */
+
+  } else if (startptr == endptr) {
+#ifdef WORDS_BIGENDIAN
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
 
-	chrpos = store_8mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
       }
+    }
 
-    } else if (indexsize == 7) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
+    high_rev = reverse_nt[low >> 16];
+    high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+    low_rev = reverse_nt[high >> 16];
+    low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+    nexthigh_rev = reverse_nt[nextlow >> 16];
+    nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+    if (indexsize == 9) {
+      chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else if (indexsize == 8) {
+      chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else if (indexsize == 7) {
+      chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else if (indexsize == 6) {
+      chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else if (indexsize == 5) {
+      chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else {
+      fprintf(stderr,"indexsize %d not supported\n",indexsize);
+      abort();
+    }
 
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
+  } else {
+    /* Genome_print_blocks(ref_blocks,left,left+16); */
 
-	nexthigh_rev = high0_rev;
+    /* End block */
+#ifdef WORDS_BIGENDIAN
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
+#endif
 
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      }
+    }
 
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+    high_rev = reverse_nt[low >> 16];
+    high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+    low_rev = reverse_nt[high >> 16];
+    low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+    nexthigh_rev = reverse_nt[nextlow >> 16];
+    nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
 
-	extract_7mers_fwd_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
-      }
-#endif
+    if (indexsize == 9) {
+      chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 8) {
+      chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 7) {
+      chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 6) {
+      chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 5) {
+      chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else {
+      abort();
+    }
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
+    while (ptr > startptr + 3) {
+      ptr -= 3;
 
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+      high = Bigendian_convert_uint(ref_blocks[ptr]);
+      low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
+      high = ref_blocks[ptr];
+      low = ref_blocks[ptr+1];
+      nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
+
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+	} else {
+	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+	}
+      } else if (mode == ATOI_STRANDED) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+	}
+      } else if (mode == TTOC_STRANDED) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+	} else {
+	  high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
 	}
+      }
 
-	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+      high_rev = reverse_nt[low >> 16];
+      high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+      low_rev = reverse_nt[high >> 16];
+      low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+      nexthigh_rev = reverse_nt[nextlow >> 16];
+      nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
+      
+      if (indexsize == 9) {
+	chrpos = store_9mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev);
+      } else {
+	abort();
+      }
+    }
 
-	nexthigh_rev = high0_rev;
-#ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+    ptr -= 3;
 
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+    /* Start block */
+    assert(ptr == startptr);
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#ifdef WORDS_BIGENDIAN
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
 
-	extract_7mers_fwd_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
       }
+    }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+    high_rev = reverse_nt[low >> 16];
+    high_rev |= (reverse_nt[low & 0x0000FFFF] << 16);
+    low_rev = reverse_nt[high >> 16];
+    low_rev |= (reverse_nt[high & 0x0000FFFF] << 16);
+    nexthigh_rev = reverse_nt[nextlow >> 16];
+    nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16);
 
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+    if (indexsize == 9) {
+      chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 8) {
+      chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 7) {
+      chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 6) {
+      chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 5) {
+      chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+    } else {
+      fprintf(stderr,"indexsize %d not supported\n",indexsize);
+      abort();
+    }
+
+  }
+  
+  return;
+}
+#endif
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+
+#ifdef HAVE_SSE2
+static void
+store_positions_fwd_simd (Chrpos_T *table, UINT4 *positions, Count_T *counts, int indexsize,
+			  Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
+			  int genestrand) {
+  int startdiscard, enddiscard;
+  Genomecomp_T ptr, startptr, endptr, nexthigh_rev, nextlow;
+  Genomecomp_T high0_rev, low0_rev, low0, high0, low1, high1;
+  __m128i current, a, b, next, mask2, mask4;
 #ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+  __m128i reverse8;
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+  __m128i mask8;
 #endif
-
-	nexthigh_rev = high0_rev;
 #ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+  __m128i temp;
 #else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+  Genomecomp_T high1_rev, low1_rev;
 #endif
-
-	chrpos = store_7mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
-      }
-
-    } else if (indexsize == 6) {
 #ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
-
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
-
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
+  Genomecomp_T low2, high2, low3, high3;
+  __m256i current256, a256, b256, c256, d256, next256, temp256, bigmask2, bigmask4, bigreverse8;
+  __m256i shift256;
+#endif
+#ifdef HAVE_AVX512
+  Genomecomp_T low4, high4, low5, high5, low6, high6, low7, high7;
+  __m512i current512, a512, b512, next512, temp512, hugemask2, hugemask4;
+  __m512i shift512;
+#endif
 
-	nexthigh_rev = high0_rev;
 
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
+  debug(printf("Starting store_positions_fwd_simd\n"));
 
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+  if (left_plus_length < (Univcoord_T) indexsize) {
+    left_plus_length = 0;
+  } else {
+    left_plus_length -= indexsize;
+  }
+  chrpos += (left_plus_length - left); /* We are starting from the right */
 
-	extract_6mers_fwd_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
-      }
+  startptr = left/32U*3;
+  ptr = endptr = left_plus_length/32U*3;
+  startdiscard = left % 32; /* (left+pos5) % 32 */
+  enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+  
+  mask2 = _mm_set1_epi32(0x33333333);
+  mask4 = _mm_set1_epi32(0x0F0F0F0F);
+#ifdef HAVE_SSSE3
+  reverse8 = _mm_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03);
+#else
+  mask8 = _mm_set1_epi32(0x00FF00FF);
+#endif
+#ifdef HAVE_AVX2
+  bigmask2 = _mm256_set1_epi32(0x33333333);
+  bigmask4 = _mm256_set1_epi32(0x0F0F0F0F);
+  bigreverse8 = _mm256_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03,
+                                0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03);
+  shift256 = _mm256_setr_epi32(1,2,3,4,5,6,7,0);
+#endif
+#ifdef HAVE_AVX512
+  hugemask2 = _mm512_set1_epi32(0x33333333);
+  hugemask4 = _mm512_set1_epi32(0x0F0F0F0F);
+  shift512 = _mm512_setr_epi32(1,2,3,4,5,6,7,8, 9,10,11,12,13,14,15,0);
 #endif
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
+  if (left_plus_length <= left) {
+    /* Skip */
 
+  } else if (startptr == endptr) {
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
+    high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+    low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
+    high0 = ref_blocks[ptr];
+    low0 = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
 
-	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      }
+    }
+
+    current = _mm_set_epi32(0,nextlow,high0,low0);
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
 #ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+    current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+    current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
 #endif
 
-	nexthigh_rev = high0_rev;
+    /* nexthigh_rev = high0_rev; */
 #ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
+    high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+    low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+    nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
 #else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
-
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+    high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+    low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+    nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
 #endif
 
-	extract_6mers_fwd_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
-      }
+    if (indexsize == 9) {
+      /* chrpos = */ store_9mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else if (indexsize == 8) {
+      /* chrpos = */ store_8mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else if (indexsize == 7) {
+      /* chrpos = */ store_7mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else if (indexsize == 6) {
+      /* chrpos = */ store_6mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else if (indexsize == 5) {
+      /* chrpos = */ store_5mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard);
+    } else {
+      fprintf(stderr,"indexsize %d not supported\n",indexsize);
+      abort();
+    }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+  } else {
+    /* Genome_print_blocks(ref_blocks,left,left+16); */
 
+    /* End block */
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+    high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+    low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
+    high0 = ref_blocks[ptr];
+    low0 = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      }
+    }
+
+    current = _mm_set_epi32(0,nextlow,high0,low0);
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
 #ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+    current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+    current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+    current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
 #endif
 
-	nexthigh_rev = high0_rev;
+    /* nexthigh_rev = high0_rev; */
 #ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+    high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+    low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+    nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
 #else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+    high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+    low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+    nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
 #endif
 
-	chrpos = store_6mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
-      }
-
+    if (indexsize == 9) {
+      chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 8) {
+      chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 7) {
+      chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 6) {
+      chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 5) {
-#ifdef HAVE_AVX2
-      while (ptr > startptr + 12) {
-	ptr -= 12;
-
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */
-#else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	/* nextlow = ref_blocks[ptr+13]; */
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	  high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
+      chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard);
+    } else {
+      abort();
+    }
 
-	current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0);
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
-	current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
-	current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
+    /* Middle blocks */
+#ifdef HAVE_AVX512
+    while (ptr > startptr + 24) {
+      ptr -= 24;
+
+      if (mode == STANDARD) {
+	a512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr]));
+	b512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr+7]));
+	current512 = _mm512_permutex2var_epi32(a512,_mm512_setr_epi32(1, 0, 4, 3, 7, 6, 10, 9, 13, 12, 16+9, 16+8, 16+12, 16+11, 16+15, 16+14), b512);
+      } else {
+	current512 = apply_mode_fwd_512(&(ref_blocks[ptr]),mode,genestrand);
+      }
 
-	nexthigh_rev = high0_rev;
+      current512 = _mm512_or_si512(_mm512_and_si512(_mm512_srli_epi32(current512,2),hugemask2),_mm512_slli_epi32(_mm512_and_si512(current512,hugemask2),2)); /* Swap pairs */
+      current512 = _mm512_or_si512(_mm512_and_si512(_mm512_srli_epi32(current512,4),hugemask4),_mm512_slli_epi32(_mm512_and_si512(current512,hugemask4),4)); /* Swap nibbles */
+#ifdef HAVE_AVX512BW
+      current512 = _mm512_shuffle_epi8(current512,hugereverse8); /* Reverse bytes within 128-bit lanes*/
+#else
+      /* Reverse bytes within 128-bit lanes*/
+      current256 = _mm256_shuffle_epi8(_mm512_extracti64x4_epi64(current512,0x0),bigreverse8);
+      next256 = _mm256_shuffle_epi8(_mm512_extracti64x4_epi64(current512,0x1),bigreverse8);
+      current512 = _mm512_broadcast_i64x4(next256);
+      current512 = _mm512_inserti64x4(current512,current256,0x0);
+#endif
+
+      nexthigh_rev = high0_rev;	/* Take from previous loop */
+
+      current = _mm512_extracti32x4_epi32(current512,0);
+      high0_rev = (unsigned int) _mm_extract_epi32(current, 0); /* Generate for next loop */
+      
+      temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+      temp512 = _mm512_inserti32x4(current512,temp,0x00);
+      next512 = _mm512_permutexvar_epi32(shift512,temp512); /* shift goes first! */
+
+      if (indexsize == 9) {
+	chrpos = store_9mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else {
+	abort();
+      }
+    }
+#endif
 
-	high0_rev = (unsigned int) _mm256_extract_epi32(current256,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
 
-	temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+#ifdef HAVE_AVX2
+    while (ptr > startptr + 12) {
+      ptr -= 12;
+
+      if (mode == STANDARD) {
+	a256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr]));
+	b256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr+3]));
+	c256 = _mm256_unpacklo_epi64(a256,b256);
+	d256 = _mm256_unpackhi_epi64(a256,b256);
+	current256 = _mm256_permute2x128_si256(c256, d256, 0x30);
+	current256 = _mm256_shuffle_epi32(current256, 0xB1); /* 0b10110001 */
+      } else {
+	current256 = apply_mode_fwd_256(&(ref_blocks[ptr]),mode,genestrand);
+      }
 
-	extract_5mers_fwd_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
+      current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */
+      current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */
+      current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */
+      
+      nexthigh_rev = high0_rev;	/* Take from previous loop */
+      high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); /* Generate for next loop */
+      
+#if 0
+      /* Doesn't work, because performs shift within 128-bit lanes */
+      next256 = _mm256_alignr_epi8(_mm256_set1_epi32(nexthigh_rev),current256,4);
+#else
+      temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00);
+      next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+#endif
+      
+      if (indexsize == 9) {
+	chrpos = store_9mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else {
+	abort();
       }
+    }
 #endif
 
-      while (ptr > startptr + 6) {
-	ptr -= 6;
+    while (ptr > startptr + 6) {
+      ptr -= 6;
 
+#ifdef HAVE_SSSE3
+      if (mode == STANDARD) {
+	a = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr]));
+	b = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr+3]));
+	current = _mm_unpacklo_epi64(a,b);
+	current = _mm_shuffle_epi32(current, 0xB1); /* 0b10110001 */
+#ifndef HAVE_SSE4_1
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
 	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */
 #else
-	high0 = ref_blocks[ptr];
 	low0 = ref_blocks[ptr+1];
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	/* nextlow = ref_blocks[ptr+7]; */
 #endif
-	if (mode == CMET_STRANDED) {
+#endif
+
+      } else {
+#ifdef WORDS_BIGENDIAN
+	high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+#else
+	high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1];
+	high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4];
+	nextlow = ref_blocks[ptr+7];
+#endif
+
+	if (mode == STANDARD) {
+	  /* Skip */
+	} else if (mode == CMET_STRANDED) {
 	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
 	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	  /* nextlow = Cmet_reduce_ct(nextlow); */
+	  nextlow = Cmet_reduce_ct(nextlow);
 	} else if (mode == CMET_NONSTRANDED) {
 	  if (genestrand > 0) {
 	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
 	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    /* nextlow = Cmet_reduce_ct(nextlow); */
+	    nextlow = Cmet_reduce_ct(nextlow);
 	  } else {
 	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
 	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    /* nextlow = Cmet_reduce_ga(nextlow); */
+	    nextlow = Cmet_reduce_ga(nextlow);
+	  }
+	} else if (mode == ATOI_STRANDED) {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	  high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	  nextlow = Atoi_reduce_tc(nextlow);
+	} else if (mode == ATOI_NONSTRANDED) {
+	  if (genestrand > 0) {
+	    high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	    nextlow = Atoi_reduce_tc(nextlow);
+	  } else {
+	    high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	    nextlow = Atoi_reduce_ag(nextlow);
+	  }
+	} else if (mode == TTOC_STRANDED) {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	  high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	  nextlow = Atoi_reduce_ag(nextlow);
+	} else if (mode == TTOC_NONSTRANDED) {
+	  if (genestrand > 0) {
+	    high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	    nextlow = Atoi_reduce_ag(nextlow);
+	  } else {
+	    high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	    nextlow = Atoi_reduce_tc(nextlow);
 	  }
 	}
 
 	current = _mm_set_epi32(high1,low1,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
-#ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
-#else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
-#endif
+      }
 
-	nexthigh_rev = high0_rev;
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+      current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+      
+      nexthigh_rev = high0_rev;	/* Take from previous loop */
 #ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	/* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */
-	/* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */
-	/* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */
-
-	temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
-	next = _mm_shuffle_epi32(temp,0x39);
+      high0_rev = (unsigned int) _mm_extract_epi32(current,0); /* Generate for next loop (SSE4.1 and higher) */
 #else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-	high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
-	low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+      high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); /* Generate for next loop (SSSE3) */
+#endif
 
-	next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#if 1
+      next = _mm_alignr_epi8(_mm_set1_epi32(nexthigh_rev),current,4);
+#else
+      /* Previous solution for SSE4.1 */
+      temp = _mm_insert_epi32(current,nexthigh_rev,0x00);
+      next = _mm_shuffle_epi32(temp,0x39);
 #endif
 
-	extract_5mers_fwd_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
+
+#else
+      /* Non-SSSE3 */
+#ifdef WORDS_BIGENDIAN
+      high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+      high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); ow1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+#else
+      high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1];
+      high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4];
+      nextlow = ref_blocks[ptr+7];
+#endif
+
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+	nextlow = Cmet_reduce_ct(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0);
+	  high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+	  nextlow = Cmet_reduce_ct(nextlow);
+	} else {
+	  high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0);
+	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+	  nextlow = Cmet_reduce_ga(nextlow);
+	}
+      } else if (mode == ATOI_STRANDED) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0);
+	  high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	  nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0);
+	  high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	  nextlow = Atoi_reduce_ag(nextlow);
+	}
+      }
+
+      current = _mm_set_epi32(high1,low1,high0,low0);
+
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+      current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+      
+      nexthigh_rev = high0_rev;	/* Take from previous loop */
+
+      high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); /* Generate for next loop */
+      low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+      high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16);
+      low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16);
+      
+      next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev);
+#endif
+
+      if (indexsize == 9) {
+	chrpos = store_9mers_fwd_simd_64(chrpos,table,positions,counts,current,next);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_fwd_simd_64(chrpos,table,positions,counts,current,next);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_fwd_simd_64(chrpos,table,positions,counts,current,next);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_fwd_simd_64(chrpos,table,positions,counts,current,next);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_fwd_simd_64(chrpos,table,positions,counts,current,next);
+      } else {
+	abort();
       }
+    }
 
-      if (ptr > startptr + 3) {
-	ptr -= 3;
+    if (ptr > startptr + 3) {
+      ptr -= 3;
 
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	/* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+      high0 = Bigendian_convert_uint(ref_blocks[ptr]);
+      low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	low0 = ref_blocks[ptr+1];
-	/* nextlow = ref_blocks[ptr+4]; */
+      high0 = ref_blocks[ptr];
+      low0 = ref_blocks[ptr+1];
+      nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
-	  }
-	}
 
-	current = _mm_set_epi32(0,0,high0,low0);
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+	} else {
+	  high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+	}
+      } else if (mode == ATOI_STRANDED) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+	}
+      } else if (mode == TTOC_STRANDED) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+	} else {
+	  high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+	}
+      }
+      
+      current = _mm_set_epi32(0,nextlow,high0,low0);
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
 #ifdef HAVE_SSSE3
-	current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
+      current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */
 #else
-	current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
-	current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
+      current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */
+      current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
 #endif
-
-	nexthigh_rev = high0_rev;
+      
+      /* nexthigh_rev = high0_rev; */
 #ifdef HAVE_SSE4_1
-	high0_rev = (unsigned int) _mm_extract_epi32(current,0);
-	low0_rev = (unsigned int) _mm_extract_epi32(current,1);
-#else
-	high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
-	low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
-#endif
-
-	chrpos = store_5mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+      high0_rev = (unsigned int) _mm_extract_epi32(current,0);
+      low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+      nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
+#else
+      high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
+      low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+      nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
+#endif
+      
+      if (indexsize == 9) {
+	chrpos = store_9mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev);
+      } else {
+	abort();
       }
-
-    } else {
-      abort();
     }
 
     ptr -= 3;
@@ -30409,23 +21304,42 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
 #ifdef WORDS_BIGENDIAN
     high0 = Bigendian_convert_uint(ref_blocks[ptr]);
     low0 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
     high0 = ref_blocks[ptr];
     low0 = ref_blocks[ptr+1];
-    /* nextlow = ref_blocks[ptr+4]; */
+    nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
-      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
-	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */
+	high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow);
       } else {
-	high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */
+	high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow);
       }
     }
 
-    current = _mm_set_epi32(0,0,high0,low0);
+    current = _mm_set_epi32(0,nextlow,high0,low0);
     current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */
     current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */
 #ifdef HAVE_SSSE3
@@ -30435,25 +21349,27 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
     current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */
 #endif
 
-    nexthigh_rev = high0_rev;
+    /* nexthigh_rev = high0_rev; */
 #ifdef HAVE_SSE4_1
     high0_rev = (unsigned int) _mm_extract_epi32(current,0);
     low0_rev = (unsigned int) _mm_extract_epi32(current,1);
+    nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2);
 #else
     high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16);
     low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16);
+    nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16);
 #endif
 
     if (indexsize == 9) {
-      chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
+      chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31);
     } else {
       fprintf(stderr,"indexsize %d not supported\n",indexsize);
       abort();
@@ -30470,7 +21386,7 @@ store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
  ************************************************************************/
 
 static void
-count_9mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc,
+count_9mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc,
 			 Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -30480,7 +21396,7 @@ count_9mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 7) {
     masked = low_rc >> 2*pos;
     masked &= MASK9;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30489,7 +21405,7 @@ count_9mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = low_rc >> 2*pos;
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK9;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30497,7 +21413,7 @@ count_9mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 23) {
     masked = high_rc >> (2*pos - 32);
     masked &= MASK9;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30506,7 +21422,7 @@ count_9mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = high_rc >> (2*pos - 32);
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK9;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30515,7 +21431,7 @@ count_9mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
 }
 
 static int
-store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -30527,9 +21443,8 @@ store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rc >> 2*pos;
     masked &= MASK9;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30540,9 +21455,8 @@ store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK9;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30552,9 +21466,8 @@ store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rc >> (2*pos - 32);
     masked &= MASK9;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30565,9 +21478,8 @@ store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK9;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30578,7 +21490,7 @@ store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 
 
 static void
-count_8mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc,
+count_8mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc,
 			 Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -30588,7 +21500,7 @@ count_8mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 8) {
     masked = low_rc >> 2*pos;
     masked &= MASK8;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30597,7 +21509,7 @@ count_8mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = low_rc >> 2*pos;
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK8;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30605,7 +21517,7 @@ count_8mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 24) {
     masked = high_rc >> (2*pos - 32);
     masked &= MASK8;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30614,7 +21526,7 @@ count_8mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = high_rc >> (2*pos - 32);
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK8;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30623,7 +21535,7 @@ count_8mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
 }
 
 static int
-store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -30635,9 +21547,8 @@ store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rc >> 2*pos;
     masked &= MASK8;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30648,9 +21559,8 @@ store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK8;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30660,9 +21570,8 @@ store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rc >> (2*pos - 32);
     masked &= MASK8;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30673,9 +21582,8 @@ store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK8;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30685,7 +21593,7 @@ store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 }
 
 static void
-count_7mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc,
+count_7mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc,
 			 Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -30695,7 +21603,7 @@ count_7mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 9) {
     masked = low_rc >> 2*pos;
     masked &= MASK7;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30704,7 +21612,7 @@ count_7mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = low_rc >> 2*pos;
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK7;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30712,7 +21620,7 @@ count_7mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 25) {
     masked = high_rc >> (2*pos - 32);
     masked &= MASK7;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30721,7 +21629,7 @@ count_7mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = high_rc >> (2*pos - 32);
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK7;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30730,7 +21638,7 @@ count_7mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
 }
 
 static int
-store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -30742,9 +21650,8 @@ store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rc >> 2*pos;
     masked &= MASK7;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30755,9 +21662,8 @@ store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK7;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30767,9 +21673,8 @@ store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rc >> (2*pos - 32);
     masked &= MASK7;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30780,9 +21685,8 @@ store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK7;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30793,7 +21697,7 @@ store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 
 
 static void
-count_6mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc,
+count_6mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc,
 			 Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -30803,7 +21707,7 @@ count_6mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 10) {
     masked = low_rc >> 2*pos;
     masked &= MASK6;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30812,7 +21716,7 @@ count_6mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = low_rc >> 2*pos;
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK6;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30820,7 +21724,7 @@ count_6mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 26) {
     masked = high_rc >> (2*pos - 32);
     masked &= MASK6;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30829,7 +21733,7 @@ count_6mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = high_rc >> (2*pos - 32);
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK6;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30838,7 +21742,7 @@ count_6mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
 }
 
 static int
-store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -30850,9 +21754,8 @@ store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rc >> 2*pos;
     masked &= MASK6;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30863,9 +21766,8 @@ store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK6;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30875,9 +21777,8 @@ store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rc >> (2*pos - 32);
     masked &= MASK6;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30888,9 +21789,8 @@ store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK6;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30900,7 +21800,7 @@ store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 }
 
 static void
-count_5mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc,
+count_5mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc,
 			 Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) {
   Genomecomp_T masked;
   int pos;
@@ -30910,7 +21810,7 @@ count_5mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 11) {
     masked = low_rc >> 2*pos;
     masked &= MASK5;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30919,7 +21819,7 @@ count_5mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = low_rc >> 2*pos;
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK5;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30927,7 +21827,7 @@ count_5mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
   while (pos <= enddiscard && pos <= 27) {
     masked = high_rc >> (2*pos - 32);
     masked &= MASK5;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30936,7 +21836,7 @@ count_5mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
     masked = high_rc >> (2*pos - 32);
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK5;
-    INCR_COUNT(counts[masked],inquery[masked]);
+    INCR_COUNT(counts[masked]);
     debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked]));
     pos++;
   }
@@ -30945,7 +21845,7 @@ count_5mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_r
 }
 
 static int
-store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
+store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
 			 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc,
 			 int startdiscard, int enddiscard) {
   Genomecomp_T masked;
@@ -30957,9 +21857,8 @@ store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = low_rc >> 2*pos;
     masked &= MASK5;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30970,9 +21869,8 @@ store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= high_rc << (32 - 2*pos);
     masked &= MASK5;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30982,9 +21880,8 @@ store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked = high_rc >> (2*pos - 32);
     masked &= MASK5;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -30995,9 +21892,8 @@ store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
     masked |= nextlow_rc << (64 - 2*pos);
     masked &= MASK5;
     if (counts[masked]) {
-      assert(pointers[masked] > positions[masked]);
       debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos));
-      table[--pointers[masked]] = chrpos;
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
     chrpos--;
     pos++;
@@ -31011,7 +21907,7 @@ store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT
 #if !defined(HAVE_AVX2)
 
 static void
-count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_9mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -31024,35 +21920,35 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK9;	/* 0 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 2) & MASK9; /* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 4) & MASK9; /* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 6) & MASK9; /* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 8) & MASK9; /* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 10) & MASK9; /* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 12) & MASK9; /* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = low_rc >> 14;	/* 7, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -31064,19 +21960,19 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
 
@@ -31088,19 +21984,19 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -31110,35 +22006,35 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK9; /* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 2) & MASK9; /* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 4) & MASK9; /* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 6) & MASK9; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 8) & MASK9; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 10) & MASK9; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 12) & MASK9; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 14) & MASK9; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -31150,19 +22046,19 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
 
@@ -31174,54 +22070,54 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rc & MASK9;	/* 16 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 2) & MASK9; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 4) & MASK9; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 6) & MASK9; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 8) & MASK9; /* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 10) & MASK9; /* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 12) & MASK9; /* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = high_rc >> 14;	/* 23, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -31233,19 +22129,19 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
 
@@ -31257,19 +22153,19 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -31279,35 +22175,35 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK9; /* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 2) & MASK9; /* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 4) & MASK9; /* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 6) & MASK9; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 8) & MASK9; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 10) & MASK9; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 12) & MASK9; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 14) & MASK9; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -31319,19 +22215,19 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
 
@@ -31343,19 +22239,19 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -31365,1904 +22261,582 @@ count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #else	/* HAVE_AVX2 */
 
 static void
-count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_9mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
   oligo = low_rc >> 16;		/* For 15..8 */
   oligo |= high_rc << 16;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-
-  oligo = high_rc >> 16;	/* For 31..24 */
-  oligo |= nextlow_rc << 16;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask9);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-  return;
-}
-
-#endif  /* HAVE_AVX2 */
-
-
-
-/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
-   and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_9mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
-  __m128i oligo;
-
-  oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask9));
-
-  _mm_store_si128(out++, _mm_srli_epi32(current,14)); /* No mask necessary */;
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
-  _mm_store_si128(out++, _mm_and_si128( current, mask9));
-
-  return;
-}
-
-#ifdef HAVE_AVX2
-static void
-extract_9mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
-
-  oligo = _mm256_or_si256( _mm256_srli_epi32(current,16), _mm256_slli_epi32(next,16));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask9));
-
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,14)); /* No mask necessary */;
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask9));
-
-  return;
-}
-#endif
-
-
-static void
-count_9mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
-#else
-  Genomecomp_T array[4];
-#endif
-#if  defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
-#endif
-
-  oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,14), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("47 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("31 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("15 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */
-#endif
-  debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,12), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("46 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("30 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("14 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */
-#endif
-  debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("45 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("29 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("13 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */
-#endif
-  debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("44 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("28 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("12 %04X => %d\n",array[3],counts[array[3]]));
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */
-#endif
-  debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("43 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("27 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("11 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */
-#endif
-  debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("42 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("26 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("10 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */
-#endif
-  debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("41 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("25 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("9 %04X => %d\n",array[3],counts[array[3]]));
+  oligo = high_rc >> 16;	/* For 31..24 */
+  oligo |= nextlow_rc << 16;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */
-#endif
-  debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask9);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("40 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("24 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("8 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */
-#endif
-  debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,14); /* No mask necessary */;
-#else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,14)); /* No mask necessary */;
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("39 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("23 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("7 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */
-#endif
-  debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("38 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("22 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("6 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */
-#endif
-  debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("37 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("21 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("5 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */
-#endif
-  debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  return;
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("36 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("20 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("4 %04X => %d\n",array[3],counts[array[3]]));
+#endif  /* HAVE_AVX2 */
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */
-#endif
-  debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("35 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("19 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("3 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */
-#endif
-  debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
+   and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
+#ifdef HAVE_SSE2
+static void
+extract_9mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) {
+  __m128i oligo;
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("34 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("18 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("2 %04X => %d\n",array[3],counts[array[3]]));
+  oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask9));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */
-#endif
-  debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  _mm_store_si128(out++, _mm_srli_epi32(current,14)); /* No mask necessary */;
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask9));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
+  _mm_store_si128(out++, _mm_and_si128( current, mask9));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask9);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("33 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("17 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("1 %04X => %d\n",array[3],counts[array[3]]));
+  return;
+}
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */
-#endif
-  debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+#ifdef USE_UNORDERED_9
+static Chrpos_T
+store_9mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_9mers_rev_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask9);
 #else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask9));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("32 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("16 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("0 %04X => %d\n",array[3],counts[array[3]]));
+/* Includes extract_9mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_9mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7,
+    _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */
-#endif
-  debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  out = &(array[0]);
 
-  return;
+  _row0 = _mm_and_si128( current, mask9);
+  _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask9);
+  _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask9);
+  _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask9);
+  _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask9);
+  _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask9);
+  _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask9);
+  _row7 = _mm_srli_epi32(current,14); /* No mask necessary */;
+
+  oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16));
+  _row8 = _mm_and_si128( oligo, mask9);
+  _row9 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask9);
+  _row10 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask9);
+  _row11 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask9);
+  _row12 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask9);
+  _row13 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask9);
+  _row14 = _mm_and_si128( _mm_srli_epi32(oligo,12), mask9);
+  _row15 = _mm_and_si128( _mm_srli_epi32(oligo,14), mask9);
+
+
+  /* Split: top half */
+  _t0 = _mm_unpackhi_epi32(_row0,_row1);
+  _t1 = _mm_unpackhi_epi32(_row2,_row3);
+  _t2 = _mm_unpackhi_epi32(_row4,_row5);
+  _t3 = _mm_unpackhi_epi32(_row6,_row7);
+  _t4 = _mm_unpackhi_epi32(_row8,_row9);
+  _t5 = _mm_unpackhi_epi32(_row10,_row11);
+  _t6 = _mm_unpackhi_epi32(_row12,_row13);
+  _t7 = _mm_unpackhi_epi32(_row14,_row15);
+
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t0,_t1));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t2,_t3));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t4,_t5));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t6,_t7));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t0,_t1));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t2,_t3));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t4,_t5));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t6,_t7));
+
+
+  /* Split: bottom half */
+  _t0 = _mm_unpacklo_epi32(_row0,_row1);
+  _t1 = _mm_unpacklo_epi32(_row2,_row3);
+  _t2 = _mm_unpacklo_epi32(_row4,_row5);
+  _t3 = _mm_unpacklo_epi32(_row6,_row7);
+  _t4 = _mm_unpacklo_epi32(_row8,_row9);
+  _t5 = _mm_unpacklo_epi32(_row10,_row11);
+  _t6 = _mm_unpacklo_epi32(_row12,_row13);
+  _t7 = _mm_unpacklo_epi32(_row14,_row15);
+
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t0,_t1));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t2,_t3));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t4,_t5));
+  _mm_store_si128(out++, _mm_unpackhi_epi64(_t6,_t7));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t0,_t1));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t2,_t3));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t4,_t5));
+  _mm_store_si128(out++, _mm_unpacklo_epi64(_t6,_t7));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
 }
 #endif
-
+#endif
 
 #ifdef HAVE_AVX2
 static void
-count_9mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
+extract_9mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
   __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
 
   oligo = _mm256_or_si256( _mm256_srli_epi32(current,16), _mm256_slli_epi32(next,16));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 127 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 111 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 95 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 79 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 63 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 47 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 31 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 15 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 126 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 110 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 94 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 78 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 62 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 46 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 30 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 14 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 125 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 109 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 93 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 77 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 61 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 45 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 29 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 13 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 124 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 108 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 92 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 76 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 60 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 44 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 28 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 12 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 123 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 107 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 91 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 75 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 59 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 43 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 27 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 11 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 122 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 106 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 90 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 74 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 58 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 42 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 26 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 10 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 121 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 105 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 89 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 73 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 57 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 41 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 25 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 9 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 120 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 104 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 88 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 72 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 56 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 40 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 24 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 8 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_srli_epi32(current,14); /* No mask necessary */;
-  counts[EXTRACT256(array,0)] += 1;		       /* 119 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 103 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 87 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 71 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 55 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 39 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 23 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 7 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 118 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 102 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 86 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 70 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 54 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 38 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 22 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 6 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 117 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 101 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 85 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 69 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 53 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 37 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 21 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 5 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 116 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 100 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 84 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 68 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 52 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 36 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 20 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 4 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 115 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 99 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 83 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 67 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 51 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 35 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 19 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 3 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 114 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 98 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 82 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 66 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 50 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 34 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 18 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 2 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 113 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 97 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 81 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 65 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 49 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 33 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 17 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 1 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask9);
-  counts[EXTRACT256(array,0)] += 1;		       /* 112 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 96 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 80 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 64 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 48 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 32 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 16 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 0 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask9));
+
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,14)); /* No mask necessary */;
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask9));
 
   return;
 }
+
+#ifdef USE_UNORDERED_9
+static Chrpos_T
+store_9mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			  __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_9mers_rev_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_9mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_9mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			  __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7,
+    _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  _row0 = _mm256_and_si256( current, bigmask9);
+  _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9);
+  _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9);
+  _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9);
+  _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9);
+  _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9);
+  _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9);
+  _row7 = _mm256_srli_epi32(current,14); /* No mask necessary */;
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(current,16), _mm256_slli_epi32(next,16));
+  _row8 = _mm256_and_si256( oligo, bigmask9);
+  _row9 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9);
+  _row10 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9);
+  _row11 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9);
+  _row12 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9);
+  _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9);
+  _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9);
+  _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9);
+
+
+  /* Split: top half */
+  _t0 = _mm256_unpackhi_epi32(_row0,_row1);
+  _t1 = _mm256_unpackhi_epi32(_row2,_row3);
+  _t2 = _mm256_unpackhi_epi32(_row4,_row5);
+  _t3 = _mm256_unpackhi_epi32(_row6,_row7);
+  _t4 = _mm256_unpackhi_epi32(_row8,_row9);
+  _t5 = _mm256_unpackhi_epi32(_row10,_row11);
+  _t6 = _mm256_unpackhi_epi32(_row12,_row13);
+  _t7 = _mm256_unpackhi_epi32(_row14,_row15);
+
+  _u0 = _mm256_unpackhi_epi64(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi64(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi64(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi64(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi64(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi64(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi64(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi64(_t6,_t7);
+
+
+  /* Split: bottom half */
+  _t0 = _mm256_unpacklo_epi32(_row0,_row1);
+  _t1 = _mm256_unpacklo_epi32(_row2,_row3);
+  _t2 = _mm256_unpacklo_epi32(_row4,_row5);
+  _t3 = _mm256_unpacklo_epi32(_row6,_row7);
+  _t4 = _mm256_unpacklo_epi32(_row8,_row9);
+  _t5 = _mm256_unpacklo_epi32(_row10,_row11);
+  _t6 = _mm256_unpacklo_epi32(_row12,_row13);
+  _t7 = _mm256_unpacklo_epi32(_row14,_row15);
+
+  _row8 = _mm256_unpackhi_epi64(_t0,_t1);
+  _row9 = _mm256_unpackhi_epi64(_t2,_t3);
+  _row10 = _mm256_unpackhi_epi64(_t4,_t5);
+  _row11 = _mm256_unpackhi_epi64(_t6,_t7);
+  _row12 = _mm256_unpacklo_epi64(_t0,_t1);
+  _row13 = _mm256_unpacklo_epi64(_t2,_t3);
+  _row14 = _mm256_unpacklo_epi64(_t4,_t5);
+  _row15 = _mm256_unpacklo_epi64(_t6,_t7);
+
+
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u0, _u1, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u2, _u3, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u4, _u5, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u6, _u7, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row8, _row9, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row10, _row11, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row12, _row13, 0x31));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row14, _row15, 0x31));
+
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u0, _u1, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u2, _u3, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u4, _u5, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_u6, _u7, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row8, _row9, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row10, _row11, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row12, _row13, 0x20));
+  _mm256_store_si256(out++, _mm256_permute2x128_si256(_row14, _row15, 0x20));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+#ifdef HAVE_AVX512
+static void
+extract_9mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,16), _mm512_slli_epi32(next,16));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,14), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask9));
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,14)); /* No mask necessary */;
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask9));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask9));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_9
+static Chrpos_T
+store_9mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_9mers_rev_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_9mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_9mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7,
+    _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  _row0 = _mm512_and_si512( current, hugemask9);
+  _row1 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask9);
+  _row2 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask9);
+  _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask9);
+  _row4 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask9);
+  _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask9);
+  _row6 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask9);
+  _row7 = _mm512_srli_epi32(current,14); /* No mask necessary */;
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,16), _mm512_slli_epi32(next,16));
+  _row8 = _mm512_and_si512( oligo, hugemask9);
+  _row9 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask9);
+  _row10 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask9);
+  _row11 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask9);
+  _row12 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask9);
+  _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask9);
+  _row14 = _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask9);
+  _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,14), hugemask9);
+
+
+  /* Split: top half */
+  _t0 = _mm512_unpackhi_epi32(_row0,_row1);
+  _t1 = _mm512_unpackhi_epi32(_row2,_row3);
+  _t2 = _mm512_unpackhi_epi32(_row4,_row5);
+  _t3 = _mm512_unpackhi_epi32(_row6,_row7);
+  _t4 = _mm512_unpackhi_epi32(_row8,_row9);
+  _t5 = _mm512_unpackhi_epi32(_row10,_row11);
+  _t6 = _mm512_unpackhi_epi32(_row12,_row13);
+  _t7 = _mm512_unpackhi_epi32(_row14,_row15);
+
+  _u0 = _mm512_unpackhi_epi64(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi64(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi64(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi64(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi64(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi64(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi64(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi64(_t6,_t7);
+
+  /* Split: bottom half */
+  _t0 = _mm512_unpacklo_epi32(_row0,_row1);
+  _t1 = _mm512_unpacklo_epi32(_row2,_row3);
+  _t2 = _mm512_unpacklo_epi32(_row4,_row5);
+  _t3 = _mm512_unpacklo_epi32(_row6,_row7);
+  _t4 = _mm512_unpacklo_epi32(_row8,_row9);
+  _t5 = _mm512_unpacklo_epi32(_row10,_row11);
+  _t6 = _mm512_unpacklo_epi32(_row12,_row13);
+  _t7 = _mm512_unpacklo_epi32(_row14,_row15);
+
+  _row8 = _mm512_unpackhi_epi64(_t0,_t1);
+  _row9 = _mm512_unpackhi_epi64(_t2,_t3);
+  _row10 = _mm512_unpackhi_epi64(_t4,_t5);
+  _row11 = _mm512_unpackhi_epi64(_t6,_t7);
+  _row12 = _mm512_unpacklo_epi64(_t0,_t1);
+  _row13 = _mm512_unpacklo_epi64(_t2,_t3);
+  _row14 = _mm512_unpacklo_epi64(_t4,_t5);
+  _row15 = _mm512_unpacklo_epi64(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(6, 7, 8+6, 8+7, 4, 5, 8+4, 8+5);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+  _t4 = _mm512_permutex2var_epi64(_row8, _shuffle0, _row9);
+  _t5 = _mm512_permutex2var_epi64(_row10, _shuffle0, _row11);
+  _t6 = _mm512_permutex2var_epi64(_row12, _shuffle0, _row13);
+  _t7 = _mm512_permutex2var_epi64(_row14, _shuffle0, _row15);
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3);
+  _shuffle2 = _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7);
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle1, _t1));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle1, _t3));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle1, _t5));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle1, _t7));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle2, _t1));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle2, _t3));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle2, _t5));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle2, _t7));
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(2, 3, 8+2, 8+3, 0, 1, 8+0, 8+1);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+  _t4 = _mm512_permutex2var_epi64(_row8, _shuffle0, _row9);
+  _t5 = _mm512_permutex2var_epi64(_row10, _shuffle0, _row11);
+  _t6 = _mm512_permutex2var_epi64(_row12, _shuffle0, _row13);
+  _t7 = _mm512_permutex2var_epi64(_row14, _shuffle0, _row15);
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); */
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); */
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle1, _t1));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle1, _t3));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle1, _t5));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle1, _t7));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle2, _t1));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle2, _t3));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle2, _t5));
+  _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle2, _t7));
+
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
 #endif
 
 
 #if !defined(HAVE_AVX2)
 
 static int
-store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+store_9mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -33276,58 +22850,50 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK9;	/* 0 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = (low_rc >> 2) & MASK9; /* 1 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = (low_rc >> 4) & MASK9; /* 2 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = (low_rc >> 6) & MASK9; /* 3 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
   masked = (low_rc >> 8) & MASK9; /* 4 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = (low_rc >> 10) & MASK9; /* 5 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = (low_rc >> 12) & MASK9; /* 6 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = low_rc >> 14;	/* 7, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
 #else
@@ -33340,30 +22906,26 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
 
@@ -33376,30 +22938,26 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 #endif
 
@@ -33410,58 +22968,50 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK9; /* 8 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
   masked = (oligo >> 2) & MASK9; /* 9 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
   masked = (oligo >> 4) & MASK9; /* 10 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
   masked = (oligo >> 6) & MASK9; /* 11 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
   masked = (oligo >> 8) & MASK9; /* 12 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = (oligo >> 10) & MASK9; /* 13 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
   masked = (oligo >> 12) & MASK9; /* 14 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
   masked = (oligo >> 14) & MASK9; /* 15 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 
 #else
@@ -33474,30 +23024,26 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
 
@@ -33510,30 +23056,26 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 #endif
 
@@ -33541,58 +23083,50 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rc & MASK9;	/* 16 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = (high_rc >> 2) & MASK9; /* 17 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = (high_rc >> 4) & MASK9; /* 18 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = (high_rc >> 6) & MASK9; /* 19 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
   masked = (high_rc >> 8) & MASK9; /* 20 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = (high_rc >> 10) & MASK9; /* 21 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = (high_rc >> 12) & MASK9; /* 22 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = high_rc >> 14;	/* 23, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
 #else
@@ -33605,30 +23139,26 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
 
@@ -33641,30 +23171,26 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 #endif
 
@@ -33675,58 +23201,50 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK9; /* 24 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
   masked = (oligo >> 2) & MASK9; /* 25 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
   masked = (oligo >> 4) & MASK9; /* 26 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
   masked = (oligo >> 6) & MASK9; /* 27 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = (oligo >> 8) & MASK9; /* 28 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
   masked = (oligo >> 10) & MASK9; /* 29 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
   masked = (oligo >> 12) & MASK9; /* 30 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
   masked = (oligo >> 14) & MASK9; /* 31 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 
 #else
@@ -33739,30 +23257,26 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
 
@@ -33775,30 +23289,26 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 #endif
 
@@ -33808,259 +23318,320 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #else	/* HAVE_AVX2 */
 
 static int
-store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+store_9mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
+
 
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
   }
 
 
   oligo = low_rc >> 16;		/* For 15..8 */
   oligo |= high_rc << 16;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
+    }
   }
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
   }
 
 
   oligo = high_rc >> 16;	/* For 31..24 */
   oligo |= nextlow_rc << 16;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask9);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
+    }
   }
 
   return chrpos - 32;
@@ -34074,7 +23645,7 @@ store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #if !defined(HAVE_AVX2)
 
 static void
-count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_8mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -34087,39 +23658,39 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK8;	/* 0 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 2) & MASK8; /* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 4) & MASK8; /* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 6) & MASK8; /* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 8) & MASK8; /* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 10) & MASK8; /* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 12) & MASK8; /* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 14) & MASK8; /* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
   masked = low_rc >> 16;	/* 8, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -34132,22 +23703,22 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
   masked = EXTRACT(_masked,0);
   assert(masked == (low_rc & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((low_rc >> 2) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((low_rc >> 4) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((low_rc >> 6) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
 
@@ -34160,27 +23731,27 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
   masked = EXTRACT(_masked,0);
   assert(masked == ((low_rc >> 8) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((low_rc >> 10) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((low_rc >> 12) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((low_rc >> 14) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
 
   masked = low_rc >> 16;	/* 8, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -34190,31 +23761,31 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK8; /* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 2) & MASK8; /* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 4) & MASK8; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 6) & MASK8; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 8) & MASK8; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 10) & MASK8; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 12) & MASK8; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -34227,22 +23798,22 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
   masked = EXTRACT(_masked,0);
   assert(masked == (oligo & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((oligo >> 2) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((oligo >> 4) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((oligo >> 6) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
 
@@ -34255,56 +23826,56 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
   masked = EXTRACT(_masked,0);
   assert(masked == ((oligo >> 8) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((oligo >> 10) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((oligo >> 12) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rc & MASK8;	/* 16 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 2) & MASK8; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 4) & MASK8; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 6) & MASK8; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 8) & MASK8; /* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 10) & MASK8; /* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 12) & MASK8; /* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 14) & MASK8; /* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
   masked = high_rc >> 16;	/* 24, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -34317,22 +23888,22 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
   masked = EXTRACT(_masked,0);
   assert(masked == (high_rc & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((high_rc >> 2) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((high_rc >> 4) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((high_rc >> 6) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
 
@@ -34345,27 +23916,27 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
   masked = EXTRACT(_masked,0);
   assert(masked == ((high_rc >> 8) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((high_rc >> 10) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((high_rc >> 12) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((high_rc >> 14) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
 
   masked = high_rc >> 16;	/* 24, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -34375,31 +23946,31 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK8; /* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 2) & MASK8; /* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 4) & MASK8; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 6) & MASK8; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 8) & MASK8; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 10) & MASK8; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 12) & MASK8; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -34412,22 +23983,22 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
   masked = EXTRACT(_masked,0);
   assert(masked == (oligo & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((oligo >> 2) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((oligo >> 4) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((oligo >> 6) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
 
@@ -34440,17 +24011,17 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
   masked = EXTRACT(_masked,0);
   assert(masked == ((oligo >> 8) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((oligo >> 10) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((oligo >> 12) & MASK8));
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -34460,1898 +24031,586 @@ count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #else	/* HAVE_AVX2 */
 
 static void
-count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_8mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask8);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-   /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
 
   masked = low_rc >> 16;	/* 8, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
 
   oligo = low_rc >> 18;		/* For 15..9 */
   oligo |= high_rc << 14;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask8);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low7);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask8);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
 
   masked = high_rc >> 16;	/* 24, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
 
   oligo = high_rc >> 18;	/* For 31..25 */
   oligo |= nextlow_rc << 14;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask8);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low7);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-  return;
-}
-
-#endif  /* HAVE_AVX2 */
-
-
-
-/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
-   and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_8mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
-  __m128i oligo;
-
-  oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask8));
-
-  _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */;
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
-  _mm_store_si128(out++, _mm_and_si128( current, mask8));
-
-  return;
-}
-
-#ifdef HAVE_AVX2
-static void
-extract_8mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
-
-  oligo = _mm256_or_si256( _mm256_srli_epi32(current,18), _mm256_slli_epi32(next,14));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask8));
-
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,16)); /* No mask necessary */;
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask8));
-
-  return;
-}
-#endif
-
-
-static void
-count_8mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
-#else
-  Genomecomp_T array[4];
-#endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
-#endif
-
-  oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,12), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("47 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("31 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("15 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */
-#endif
-  debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("46 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("30 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("14 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */
-#endif
-  debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("45 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("29 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("13 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */
-#endif
-  debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("44 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("28 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("12 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */
-#endif
-  debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("43 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("27 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("11 %04X => %d\n",array[3],counts[array[3]]));
+  return;
+}
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */
-#endif
-  debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+#endif  /* HAVE_AVX2 */
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("42 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("26 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("10 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */
-#endif
-  debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("41 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("25 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("9 %04X => %d\n",array[3],counts[array[3]]));
+/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
+   and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
+#ifdef HAVE_SSE2
+static void
+extract_8mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) {
+  __m128i oligo;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */
-#endif
-  debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask8));
 
+  _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */;
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
+  _mm_store_si128(out++, _mm_and_si128( current, mask8));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,16); /* No mask necessary */;
-#else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,16)); /* No mask necessary */;
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("40 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("24 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("8 %04X => %d\n",array[3],counts[array[3]]));
+  return;
+}
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */
-#endif
-  debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+#ifdef USE_UNORDERED_8
+static Chrpos_T
+store_8mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_8mers_rev_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,14), mask8);
 #else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("39 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("23 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("7 %04X => %d\n",array[3],counts[array[3]]));
+/* Includes extract_8mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_8mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m128i _u0, _u1, _u2, _u3;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */
-#endif
-  debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  out = &(array[0]);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("38 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("22 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("6 %04X => %d\n",array[3],counts[array[3]]));
+  /* As a special case, 8_mers don't need to be masked, since they fill each 16-mer */
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */
-#endif
-  debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row0 = _mm_and_si128( current, mask8); */
+  /* _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask8); */
+  _t0 = _mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("37 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("21 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("5 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask8); */
+  /* _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask8); */
+  _t1 = _mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current,4), 0x55);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */
-#endif
-  debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask8); */
+  /* _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask8); */
+  _t2 = _mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current,8), 0x55);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("36 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("20 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("4 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask8); */
+  /* _row7 = _mm_and_si128( _mm_srli_epi32(current,14), mask8); */
+  _t3 = _mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current,12), 0x55);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */
-#endif
-  debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask8));
+  oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14));
+  /* _row8 = _mm_srli_epi32(current,16); */ /* No mask necessary */;
+  /* _row9 = _mm_and_si128( oligo, mask8); */
+  _t4 = _mm_blend_epi16(_mm_slli_epi32(oligo,16), _mm_srli_epi32(current,16), 0x55);
+
+  /* _row10 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask8); */
+  /* _row11 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask8); */
+  _t5 = _mm_blend_epi16(_mm_slli_epi32(oligo,12), _mm_srli_epi32(oligo,2), 0x55);
+
+  /* _row12 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask8); */
+  /* _row13 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask8); */
+  _t6 = _mm_blend_epi16(_mm_slli_epi32(oligo,8), _mm_srli_epi32(oligo,6), 0x55);
+
+  /* _row14 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask8); */
+  /* _row15 = _mm_and_si128( _mm_srli_epi32(oligo,12), mask8); */
+  _t7 = _mm_blend_epi16(_mm_slli_epi32(oligo,4), _mm_srli_epi32(oligo,10), 0x55);
+
+
+  /* Split: top half */
+  _u0 = _mm_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm_unpackhi_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  /* Split: bottom half */
+  _u0 = _mm_unpacklo_epi32(_t0,_t1);
+  _u1 = _mm_unpacklo_epi32(_t2,_t3);
+  _u2 = _mm_unpacklo_epi32(_t4,_t5);
+  _u3 = _mm_unpacklo_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("35 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("19 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("3 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */
 #endif
-  debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask8));
-#endif
 #ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("34 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("18 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("2 %04X => %d\n",array[3],counts[array[3]]));
+static void
+extract_8mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
+  __m256i oligo;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */
-#endif
-  debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  oligo = _mm256_or_si256( _mm256_srli_epi32(current,18), _mm256_slli_epi32(next,14));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask8));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("33 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("17 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("1 %04X => %d\n",array[3],counts[array[3]]));
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,16)); /* No mask necessary */;
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask8));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */
-#endif
-  debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  return;
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask8);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask8));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("32 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("16 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("0 %04X => %d\n",array[3],counts[array[3]]));
+#ifdef USE_UNORDERED_8
+static Chrpos_T
+store_8mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_8mers_rev_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */
-#endif
-  debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+/* Includes extract_8mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_8mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
 
-  return;
-}
+  out = &(array[0]);
 
-#endif
+  /* As a special case, 8_mers don't need to be masked, since they fill each 16-mer */
 
+  /* _row0 = _mm256_and_si256( current, bigmask8); */
+  /* _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8); */
+  _t0 = _mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55);
+
+  /* _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8); */
+  /* _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8); */
+  _t1 = _mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55);
+
+  /* _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8); */
+  /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8); */
+  _t2 = _mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55);
+
+  /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8); */
+  /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8); */
+  _t3 = _mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55);
 
-#ifdef HAVE_AVX2
-static void
-count_8mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
-  __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
 
   oligo = _mm256_or_si256( _mm256_srli_epi32(current,18), _mm256_slli_epi32(next,14));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 127 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 111 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 95 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 79 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 63 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 47 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 31 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 15 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 126 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 110 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 94 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 78 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 62 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 46 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 30 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 14 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 125 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 109 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 93 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 77 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 61 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 45 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 29 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 13 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 124 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 108 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 92 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 76 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 60 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 44 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 28 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 12 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 123 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 107 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 91 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 75 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 59 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 43 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 27 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 11 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 122 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 106 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 90 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 74 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 58 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 42 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 26 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 10 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 121 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 105 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 89 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 73 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 57 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 41 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 25 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 9 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_srli_epi32(current,16); /* No mask necessary */;
-  counts[EXTRACT256(array,0)] += 1;		       /* 120 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 104 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 88 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 72 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 56 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 40 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 24 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 8 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 119 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 103 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 87 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 71 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 55 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 39 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 23 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 7 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 118 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 102 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 86 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 70 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 54 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 38 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 22 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 6 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 117 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 101 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 85 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 69 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 53 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 37 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 21 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 5 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 116 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 100 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 84 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 68 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 52 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 36 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 20 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 4 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 115 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 99 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 83 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 67 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 51 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 35 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 19 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 3 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 114 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 98 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 82 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 66 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 50 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 34 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 18 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 2 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 113 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 97 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 81 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 65 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 49 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 33 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 17 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 1 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask8);
-  counts[EXTRACT256(array,0)] += 1;		       /* 112 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 96 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 80 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 64 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 48 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 32 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 16 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 0 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
+  /* _row8 = _mm256_srli_epi32(current,16); */ /* No mask necessary */;
+  /* _row9 = _mm256_and_si256( oligo, bigmask8); */
+  _t4 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,16), _mm256_srli_epi32(current,16), 0x55);
+
+  /* _row10 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8); */
+  /* _row11 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8); */
+  _t5 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,12), _mm256_srli_epi32(oligo,2), 0x55);
+
+  /* _row12 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8); */
+  /* _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8); */
+  _t6 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,8), _mm256_srli_epi32(oligo,6), 0x55);
+
+  /* _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8); */
+  /* _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8); */
+  _t7 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,4), _mm256_srli_epi32(oligo,10), 0x55);
+
+
+  _u0 = _mm256_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi32(_t6,_t7);
+
+
+  _t0 = _mm256_unpackhi_epi64(_u0,_u1);
+  _t1 = _mm256_unpackhi_epi64(_u2,_u3);
+  _t2 = _mm256_unpacklo_epi64(_u0,_u1);
+  _t3 = _mm256_unpacklo_epi64(_u2,_u3);
+  _t4 = _mm256_unpackhi_epi64(_u4,_u5);
+  _t5 = _mm256_unpackhi_epi64(_u6,_u7);
+  _t6 = _mm256_unpacklo_epi64(_u4,_u5);
+  _t7 = _mm256_unpacklo_epi64(_u6,_u7);
+
+
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0)));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+#ifdef HAVE_AVX512
+static void
+extract_8mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,18), _mm512_slli_epi32(next,14));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask8));
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,16)); /* No mask necessary */;
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask8));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask8));
 
   return;
 }
+
+#ifdef USE_UNORDERED_8
+static Chrpos_T
+store_8mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_8mers_rev_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_8mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_8mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  _u0 = _mm512_and_si512( current, hugemask8);
+  /* _row1 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask8);
+  _t0 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask8);
+  /* _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask8);
+  _t1 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask8);
+  /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask8);
+  _t2 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask8);
+  /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask8);
+  _t3 = _mm512_or_si512(_u0, _u1);
+
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,18), _mm512_slli_epi32(next,14));
+  _u0 = _mm512_srli_epi32(current,16); /* No mask necessary */;
+  /* _row9 = _mm512_and_si512( oligo, hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,16), highmask8);
+  _t4 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask8);
+  /* _row11 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,12), highmask8);
+  _t5 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask8);
+  /* _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,8), highmask8);
+  _t6 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask8);
+  /* _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask8); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,4), highmask8);
+  _t7 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi32(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
 #endif
 
 
 #if !defined(HAVE_AVX2)
 
 static int
-store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+store_8mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -36365,65 +24624,56 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK8;	/* 0 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = (low_rc >> 2) & MASK8; /* 1 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = (low_rc >> 4) & MASK8; /* 2 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = (low_rc >> 6) & MASK8; /* 3 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
   masked = (low_rc >> 8) & MASK8; /* 4 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = (low_rc >> 10) & MASK8; /* 5 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = (low_rc >> 12) & MASK8; /* 6 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = (low_rc >> 14) & MASK8; /* 7 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
   masked = low_rc >> 16;	/* 8, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
 #else
@@ -36437,33 +24687,29 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   masked = EXTRACT(_masked,0);
   assert(masked == (low_rc & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((low_rc >> 2) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((low_rc >> 4) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((low_rc >> 6) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
 
@@ -36477,41 +24723,36 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   masked = EXTRACT(_masked,0);
   assert(masked == ((low_rc >> 8) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((low_rc >> 10) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((low_rc >> 12) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((low_rc >> 14) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
 
   masked = low_rc >> 16;	/* 8, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 #endif
 
@@ -36522,51 +24763,44 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK8; /* 9 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
   masked = (oligo >> 2) & MASK8; /* 10 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
   masked = (oligo >> 4) & MASK8; /* 11 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
   masked = (oligo >> 6) & MASK8; /* 12 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = (oligo >> 8) & MASK8; /* 13 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
   masked = (oligo >> 10) & MASK8; /* 14 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
   masked = (oligo >> 12) & MASK8; /* 15 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 
 #else
@@ -36580,33 +24814,29 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   masked = EXTRACT(_masked,0);
   assert(masked == (oligo & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((oligo >> 2) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((oligo >> 4) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((oligo >> 6) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
 
@@ -36620,25 +24850,22 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   masked = EXTRACT(_masked,0);
   assert(masked == ((oligo >> 8) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((oligo >> 10) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((oligo >> 12) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 #endif
 
@@ -36646,65 +24873,56 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rc & MASK8;	/* 16 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = (high_rc >> 2) & MASK8; /* 17 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = (high_rc >> 4) & MASK8; /* 18 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = (high_rc >> 6) & MASK8; /* 19 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
   masked = (high_rc >> 8) & MASK8; /* 20 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = (high_rc >> 10) & MASK8; /* 21 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = (high_rc >> 12) & MASK8; /* 22 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = (high_rc >> 14) & MASK8; /* 23 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
   masked = high_rc >> 16;	/* 24, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
 #else
@@ -36718,33 +24936,29 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   masked = EXTRACT(_masked,0);
   assert(masked == (high_rc & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((high_rc >> 2) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((high_rc >> 4) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((high_rc >> 6) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
 
@@ -36758,41 +24972,36 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   masked = EXTRACT(_masked,0);
   assert(masked == ((high_rc >> 8) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((high_rc >> 10) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((high_rc >> 12) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((high_rc >> 14) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
 
   masked = high_rc >> 16;	/* 24, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 #endif
 
@@ -36803,51 +25012,44 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK8; /* 25 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
   masked = (oligo >> 2) & MASK8; /* 26 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
   masked = (oligo >> 4) & MASK8; /* 27 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = (oligo >> 6) & MASK8; /* 28 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
   masked = (oligo >> 8) & MASK8; /* 29 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
   masked = (oligo >> 10) & MASK8; /* 30 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
   masked = (oligo >> 12) & MASK8; /* 31 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 
 #else
@@ -36861,33 +25063,29 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   masked = EXTRACT(_masked,0);
   assert(masked == (oligo & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((oligo >> 2) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((oligo >> 4) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = EXTRACT(_masked,3);
   assert(masked == ((oligo >> 6) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
 
@@ -36901,25 +25099,22 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
   masked = EXTRACT(_masked,0);
   assert(masked == ((oligo >> 8) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
   masked = EXTRACT(_masked,1);
   assert(masked == ((oligo >> 10) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
   masked = EXTRACT(_masked,2);
   assert(masked == ((oligo >> 12) & MASK8));
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 #endif
 
@@ -36929,261 +25124,319 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #else	/* HAVE_AVX2 */
 
 static int
-store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+store_8mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
+
+
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask8);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    }
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
   }
 
 
   masked = low_rc >> 16;	/* 8, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
 
   oligo = low_rc >> 18;		/* For 15..9 */
   oligo |= high_rc << 14;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask8);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }}
+
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
+    }
   }
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask8);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
-  }
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }}
+
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
   }
 
 
   masked = high_rc >> 16;	/* 24, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
 
   oligo = high_rc >> 18;	/* For 31..25 */
   oligo |= nextlow_rc << 14;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask8);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
+    }}
+
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
+    }
   }
 
   return chrpos - 32;
@@ -37196,7 +25449,7 @@ store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #if !defined(HAVE_AVX2)
 
 static void
-count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_7mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -37209,43 +25462,43 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK7;	/* 0 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 2) & MASK7; /* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 4) & MASK7; /* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 6) & MASK7; /* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 8) & MASK7; /* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 10) & MASK7; /* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 12) & MASK7; /* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 14) & MASK7; /* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 16) & MASK7; /* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = low_rc >> 18;	/* 9, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -37257,19 +25510,19 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
 
@@ -37281,19 +25534,19 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
 
@@ -37305,11 +25558,11 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -37319,27 +25572,27 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK7; /* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 2) & MASK7; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 4) & MASK7; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 6) & MASK7; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 8) & MASK7; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 10) & MASK7; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -37351,19 +25604,19 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
 
@@ -37375,54 +25628,54 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rc & MASK7;	/* 16 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 2) & MASK7; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 4) & MASK7; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 6) & MASK7; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 8) & MASK7; /* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 10) & MASK7; /* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 12) & MASK7; /* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 14) & MASK7; /* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 16) & MASK7; /* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = high_rc >> 18;	/* 25, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -37434,19 +25687,19 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
 
@@ -37458,19 +25711,19 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
 
@@ -37482,11 +25735,11 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -37496,27 +25749,27 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK7; /* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 2) & MASK7; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 4) & MASK7; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 6) & MASK7; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 8) & MASK7; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 10) & MASK7; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -37528,19 +25781,19 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
 
@@ -37552,11 +25805,11 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -37566,1900 +25819,589 @@ count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #else	/* HAVE_AVX2 */
 
 static void
-count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_7mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask7);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
   _masked = _mm256_and_si256(_oligo, bigmask7);
 
   masked = EXTRACT256(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
 
   oligo = low_rc >> 20;		/* For 15..10 */
   oligo |= high_rc << 12;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask7);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low6);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask7);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
 
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
   _masked = _mm256_and_si256(_oligo, bigmask7);
 
   masked = EXTRACT256(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
 
   oligo = high_rc >> 20;	/* For 31..26 */
   oligo |= nextlow_rc << 12;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask7);
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low6);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-  return;
-}
-
-#endif  /* HAVE_AVX2 */
-
-
-
-/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
-   and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_7mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
-  __m128i oligo;
-
-  oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask7));
-
-  _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
-  _mm_store_si128(out++, _mm_and_si128( current, mask7));
-
-  return;
-}
-
-#ifdef HAVE_AVX2
-static void
-extract_7mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
-
-  oligo = _mm256_or_si256( _mm256_srli_epi32(current,20), _mm256_slli_epi32(next,12));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask7));
-
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,18)); /* No mask necessary */
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask7));
-
-  return;
-}
-#endif
-
-
-static void
-count_7mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
-#else
-  Genomecomp_T array[4];
-#endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
-#endif
-
-  oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("47 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("31 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("15 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */
-#endif
-  debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("46 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("30 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("14 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */
-#endif
-  debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("45 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("29 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("13 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */
-#endif
-  debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("44 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("28 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("12 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */
-#endif
-  debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("43 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("27 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("11 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */
-#endif
-  debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("42 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("26 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("10 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */
-#endif
-  debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
+  return;
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,18); /* No mask necessary */
-#else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,18)); /* No mask necessary */
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("41 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("25 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("9 %04X => %d\n",array[3],counts[array[3]]));
+#endif  /* HAVE_AVX2 */
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */
-#endif
-  debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,16), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("40 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("24 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("8 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */
-#endif
-  debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
+   and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
+#ifdef HAVE_SSE2
+static void
+extract_7mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) {
+  __m128i oligo;
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,14), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("39 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("23 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("7 %04X => %d\n",array[3],counts[array[3]]));
+  oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask7));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */
-#endif
-  debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
+  _mm_store_si128(out++, _mm_and_si128( current, mask7));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("38 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("22 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("6 %04X => %d\n",array[3],counts[array[3]]));
+  return;
+}
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */
-#endif
-  debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+#ifdef USE_UNORDERED_7
+static Chrpos_T
+store_7mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_7mers_rev_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask7);
 #else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("37 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("21 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("5 %04X => %d\n",array[3],counts[array[3]]));
+/* Includes extract_7mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_7mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m128i _u0, _u1, _u2, _u3;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */
-#endif
-  debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  out = &(array[0]);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("36 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("20 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("4 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row0 = _mm_and_si128( current, mask7); */
+  /* _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask7);*/
+  _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask7_epi16);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */
-#endif
-  debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask7); */
+  /* _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask7); */
+  _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask7_epi16);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("35 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("19 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("3 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask7); */
+  /* _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask7); */
+  _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask7_epi16);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */
-#endif
-  debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask7); */
+  /* _row7 = _mm_and_si128( _mm_srli_epi32(current,14), mask7); */
+  _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask7_epi16);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask7));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("34 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("18 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("2 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row8 = _mm_and_si128( _mm_srli_epi32(current,16), mask7); */
+  /* _row9 = _mm_srli_epi32(current,18); */ /* No mask necessary */
+  _t4 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask7_epi16);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */
-#endif
-  debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask7));
+  oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12));
+  /* _row10 = _mm_and_si128( oligo, mask7); */
+  /* _row11 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask7); */
+  _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo, 14), oligo, 0x55), mask7_epi16);
+
+  /* _row12 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask7); */
+  /* _row13 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask7); */
+  _t6 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask7_epi16);
+
+  /* _row14 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask7); */
+  /* _row15 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask7); */
+  _t7 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,6), _mm_srli_epi32(oligo, 8), 0x55), mask7_epi16);
+
+
+  /* Split: top half */
+  _u0 = _mm_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm_unpackhi_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  /* Split: bottom half */
+  _u0 = _mm_unpacklo_epi32(_t0,_t1);
+  _u1 = _mm_unpacklo_epi32(_t2,_t3);
+  _u2 = _mm_unpacklo_epi32(_t4,_t5);
+  _u3 = _mm_unpacklo_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("33 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("17 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("1 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */
 #endif
-  debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask7);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask7));
-#endif
 #ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("32 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("16 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("0 %04X => %d\n",array[3],counts[array[3]]));
+static void
+extract_7mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
+  __m256i oligo;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */
-#endif
-  debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  oligo = _mm256_or_si256( _mm256_srli_epi32(current,20), _mm256_slli_epi32(next,12));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask7));
+
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,18)); /* No mask necessary */
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask7));
 
   return;
 }
-#endif
 
+#ifdef USE_UNORDERED_7
+static Chrpos_T
+store_7mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_7mers_rev_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-#ifdef HAVE_AVX2
-static void
-count_7mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
+#else
+/* Includes extract_7mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_7mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16], *out;
   __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(current,20), _mm256_slli_epi32(next,12));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 127 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 111 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 95 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 79 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 63 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 47 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 31 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 15 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 126 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 110 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 94 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 78 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 62 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 46 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 30 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 14 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 125 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 109 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 93 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 77 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 61 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 45 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 29 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 13 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 124 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 108 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 92 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 76 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 60 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 44 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 28 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 12 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 123 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 107 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 91 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 75 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 59 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 43 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 27 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 11 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 122 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 106 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 90 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 74 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 58 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 42 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 26 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 10 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_srli_epi32(current,18); /* No mask necessary */
-  counts[EXTRACT256(array,0)] += 1;		       /* 121 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 105 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 89 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 73 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 57 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 41 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 25 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 9 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 120 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 104 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 88 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 72 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 56 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 40 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 24 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 8 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 119 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 103 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 87 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 71 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 55 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 39 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 23 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 7 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 118 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 102 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 86 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 70 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 54 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 38 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 22 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 6 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 117 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 101 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 85 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 69 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 53 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 37 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 21 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 5 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 116 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 100 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 84 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 68 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 52 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 36 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 20 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 4 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 115 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 99 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 83 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 67 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 51 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 35 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 19 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 3 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 114 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 98 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 82 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 66 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 50 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 34 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 18 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 2 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 113 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 97 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 81 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 65 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 49 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 33 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 17 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 1 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask7);
-  counts[EXTRACT256(array,0)] += 1;		       /* 112 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 96 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 80 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 64 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 48 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 32 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 16 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 0 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
+  out = &(array[0]);
+
+  /* _row0 = _mm256_and_si256( current, bigmask7); */
+  /* _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7); */
+  _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask7_epi16);
+
+  /* _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7); */
+  /* _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7); */
+  _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask7_epi16);
+
+  /* _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7); */
+  /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7); */
+  _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask7_epi16);
+
+  /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7); */
+  /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7); */
+  _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask7_epi16);
+
+  /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7); */
+  /* _row9 = _mm256_srli_epi32(current,18); */ /* No mask necessary */
+  _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask7_epi16);
+
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(current,20), _mm256_slli_epi32(next,12));
+  /* _row10 = _mm256_and_si256( oligo, bigmask7); */
+  /* _row11 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7); */
+  _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask7_epi16);
+
+  /* _row12 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7); */
+  /* _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7); */
+  _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask7_epi16);
+
+  /* _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7); */
+  /* _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7); */
+  _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,6), _mm256_srli_epi32(oligo,8), 0x55), bigmask7_epi16);
+
+
+  _u0 = _mm256_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi32(_t6,_t7);
+
+
+  _t0 = _mm256_unpackhi_epi64(_u0,_u1);
+  _t1 = _mm256_unpackhi_epi64(_u2,_u3);
+  _t2 = _mm256_unpacklo_epi64(_u0,_u1);
+  _t3 = _mm256_unpacklo_epi64(_u2,_u3);
+  _t4 = _mm256_unpackhi_epi64(_u4,_u5);
+  _t5 = _mm256_unpackhi_epi64(_u6,_u7);
+  _t6 = _mm256_unpacklo_epi64(_u4,_u5);
+  _t7 = _mm256_unpacklo_epi64(_u6,_u7);
+
+
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0)));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
+
+#ifdef HAVE_AVX512
+static void
+extract_7mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,20), _mm512_slli_epi32(next,12));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask7));
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,18)); /* No mask necessary */
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask7));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask7));
 
   return;
 }
+
+#ifdef USE_UNORDERED_7
+static Chrpos_T
+store_7mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_7mers_rev_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
+
+#else
+/* Includes extract_7mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_7mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  _u0 = _mm512_and_si512( current, hugemask7);
+  /* _row1 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask7);
+  _t0 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask7);
+  /* _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask7);
+  _t1 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask7);
+  /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask7);
+  _t2 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask7);
+  /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask7);
+  _t3 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask7);
+  /* _row9 = _mm512_srli_epi32(current,18); */ /* No mask necessary */
+  _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask7);
+  _t4 = _mm512_or_si512(_u0, _u1);
+
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,20), _mm512_slli_epi32(next,12));
+  _u0 = _mm512_and_si512( oligo, hugemask7);
+  /* _row11 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask7);
+  _t5 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask7);
+  /* _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask7);
+  _t6 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask7);
+  /* _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask7); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,6), highmask7);
+  _t7 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi32(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
 #endif
 
 
 #if !defined(HAVE_AVX2)
 
 static Chrpos_T
-store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+store_7mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -39473,72 +26415,62 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK7;	/* 0 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = (low_rc >> 2) & MASK7; /* 1 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = (low_rc >> 4) & MASK7; /* 2 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = (low_rc >> 6) & MASK7; /* 3 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
   masked = (low_rc >> 8) & MASK7; /* 4 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = (low_rc >> 10) & MASK7; /* 5 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = (low_rc >> 12) & MASK7; /* 6 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = (low_rc >> 14) & MASK7; /* 7 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
   masked = (low_rc >> 16) & MASK7; /* 8 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
   masked = low_rc >> 18;	/* 9, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
 #else
@@ -39551,30 +26483,26 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
 
@@ -39587,30 +26515,26 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
 
@@ -39623,16 +26547,14 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 #endif
 
@@ -39643,44 +26565,38 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK7; /* 10 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
   masked = (oligo >> 2) & MASK7; /* 11 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
   masked = (oligo >> 4) & MASK7; /* 12 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = (oligo >> 6) & MASK7; /* 13 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
   masked = (oligo >> 8) & MASK7; /* 14 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
   masked = (oligo >> 10) & MASK7; /* 15 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 
 #else
@@ -39693,30 +26609,26 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
 
@@ -39729,16 +26641,14 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 
 #endif
@@ -39747,72 +26657,62 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rc & MASK7;	/* 16 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = (high_rc >> 2) & MASK7; /* 17 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = (high_rc >> 4) & MASK7; /* 18 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = (high_rc >> 6) & MASK7; /* 19 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
   masked = (high_rc >> 8) & MASK7; /* 20 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = (high_rc >> 10) & MASK7; /* 21 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = (high_rc >> 12) & MASK7; /* 22 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = (high_rc >> 14) & MASK7; /* 23 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
   masked = (high_rc >> 16) & MASK7; /* 24 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
   masked = high_rc >> 18;	/* 25, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
 #else
@@ -39825,30 +26725,26 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
 
@@ -39861,30 +26757,26 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
 
@@ -39897,16 +26789,14 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 #endif
 
@@ -39917,44 +26807,38 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK7; /* 26 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
   masked = (oligo >> 2) & MASK7; /* 27 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = (oligo >> 4) & MASK7; /* 28 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
   masked = (oligo >> 6) & MASK7; /* 29 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
   masked = (oligo >> 8) & MASK7; /* 30 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
   masked = (oligo >> 10) & MASK7; /* 31 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 
 #else
@@ -39967,30 +26851,26 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
 
@@ -40003,16 +26883,14 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 #endif
 
@@ -40022,275 +26900,342 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #else	/* HAVE_AVX2 */
 
 static Chrpos_T
-store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+store_7mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
+
+
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }}
+
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
   }
 
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
   _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
   }
 
 
   oligo = low_rc >> 20;		/* For 15..10 */
   oligo |= high_rc << 12;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }}
+
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
+    }
   }
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
   }
 
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
   _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
   }
 
 
   oligo = high_rc >> 20;	/* For 31..26 */
   oligo |= nextlow_rc << 12;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask7);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
 
-  assert(EXTRACT256(_counts,0) == counts[EXTRACT256(_masked,0)]);
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
+
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
+    }
   }
 
-  assert(EXTRACT256(_counts,1) == counts[EXTRACT256(_masked,1)]);
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
   }
 
-  assert(EXTRACT256(_counts,2) == counts[EXTRACT256(_masked,2)]);
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
+    }
   }
 
-  assert(EXTRACT256(_counts,3) == counts[EXTRACT256(_masked,3)]);
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
   }
 
-  assert(EXTRACT256(_counts,4) == counts[EXTRACT256(_masked,4)]);
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
   }
 
-  assert(EXTRACT256(_counts,5) == counts[EXTRACT256(_masked,5)]);
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
+    }
   }
 
   return chrpos - 32;
@@ -40303,7 +27248,7 @@ store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #if !defined(HAVE_AVX2)
 
 static void
-count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_6mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -40316,47 +27261,47 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK6;	/* 0 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 2) & MASK6; /* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 4) & MASK6; /* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 6) & MASK6; /* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 8) & MASK6; /* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 10) & MASK6; /* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 12) & MASK6; /* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 14) & MASK6; /* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 16) & MASK6; /* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 18) & MASK6; /* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = low_rc >> 20;	/* 10, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -40368,19 +27313,19 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
 
@@ -40392,19 +27337,19 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
 
@@ -40416,15 +27361,15 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -40434,23 +27379,23 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK6; /* 11 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 2) & MASK6; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 4) & MASK6; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 6) & MASK6; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 8) & MASK6; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -40462,71 +27407,71 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
 
   masked = (oligo >> 8) & MASK6; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rc & MASK6;	/* 16 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 2) & MASK6; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 4) & MASK6; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 6) & MASK6; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 8) & MASK6; /* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 10) & MASK6; /* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 12) & MASK6; /* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 14) & MASK6; /* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 16) & MASK6; /* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = (high_rc >> 18) & MASK6; /* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = high_rc >> 20;	/* 26, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -40538,19 +27483,19 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
 
@@ -40562,19 +27507,19 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
 
@@ -40586,15 +27531,15 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -40604,23 +27549,23 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK6; /* 27 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 2) & MASK6; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 4) & MASK6; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 6) & MASK6; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = (oligo >> 8) & MASK6; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -40632,24 +27577,24 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
 
   masked = (oligo >> 8) & MASK6; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 #endif
 
@@ -40659,285 +27604,168 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #else	/* HAVE_AVX2 */
 
 static void
-count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_6mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
   _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
 
   oligo = low_rc >> 22;		/* For 15..11 */
   oligo |= high_rc << 10;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low5);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
   _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
 
   oligo = high_rc >> 22;	/* For 31..27 */
   oligo |= nextlow_rc << 10;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask6);
 
   masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low5);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
   return;
 }
 
@@ -40946,9 +27774,9 @@ count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 /* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
    and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
-#ifdef USE_SIMD_FOR_COUNTS
+#ifdef HAVE_SSE2
 static void
-extract_6mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
+extract_6mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) {
   __m128i oligo;
 
   oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10));
@@ -40958,7 +27786,7 @@ extract_6mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
   _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
   _mm_store_si128(out++, _mm_and_si128( oligo, mask6));
 
-  _mm_store_si128(out++, _mm_srli_epi32(current,20));
+  _mm_store_si128(out++, _mm_srli_epi32(current,20)); /* No mask necessary */
   _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
   _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
   _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
@@ -40973,1579 +27801,391 @@ extract_6mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
   return;
 }
 
-#ifdef HAVE_AVX2
-static void
-extract_6mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
-
-  oligo = _mm256_or_si256( _mm256_srli_epi32(current,22), _mm256_slli_epi32(next,10));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask6));
-
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,20));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask6));
-
-  return;
+#ifdef USE_UNORDERED_6
+static Chrpos_T
+store_6mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_6mers_rev_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
 }
-#endif
-
-
-static void
-count_6mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
-#else
-  Genomecomp_T array[4];
-#endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
-#endif
-
-  oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("47 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("31 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("15 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */
-#endif
-  debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("46 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("30 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("14 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */
-#endif
-  debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("45 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("29 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("13 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */
-#endif
-  debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("44 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("28 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("12 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */
-#endif
-  debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("43 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("27 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("11 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */
-#endif
-  debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,20);
 #else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,20));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("42 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("26 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("10 %04X => %d\n",array[3],counts[array[3]]));
+/* Includes extract_6mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_6mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m128i _u0, _u1, _u2, _u3;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */
-#endif
-  debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  out = &(array[0]);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,18), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("41 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("25 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("9 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row0 = _mm_and_si128( current, mask6); */
+  /* _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask6); */
+  _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask6_epi16);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */
-#endif
-  debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask6); */
+  /* _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask6); */
+  _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask6_epi16);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,16), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("40 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("24 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("8 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask6); */
+  /* _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask6); */
+  _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask6_epi16);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */
-#endif
-  debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask6); */
+  /* _row7 = _mm_and_si128( _mm_srli_epi32(current,14), mask6); */
+  _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask6_epi16);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,14), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("39 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("23 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("7 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row8 = _mm_and_si128( _mm_srli_epi32(current,16), mask6); */
+  /* _row9 = _mm_and_si128( _mm_srli_epi32(current,18), mask6); */
+  _t4 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask6_epi16);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */
-#endif
-  debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask6));
+  oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10));
+  /* _row10 = _mm_srli_epi32(current,20); */ /* No mask necessary */
+  /* _row11 = _mm_and_si128( oligo, mask6); */
+  _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,16), _mm_srli_epi32(current, 20), 0x55), mask6_epi16);
+
+  /* _row12 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask6); */
+  /* _row13 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask6); */
+  _t6 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,12), _mm_srli_epi32(oligo, 2), 0x55), mask6_epi16);
+
+  /* _row14 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask6); */
+  /* _row15 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask6); */
+  _t7 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,8), _mm_srli_epi32(oligo, 6), 0x55), mask6_epi16);
+
+
+  /* Split: top half */
+  _u0 = _mm_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm_unpackhi_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  /* Split: bottom half */
+  _u0 = _mm_unpacklo_epi32(_t0,_t1);
+  _u1 = _mm_unpacklo_epi32(_t2,_t3);
+  _u2 = _mm_unpacklo_epi32(_t4,_t5);
+  _u3 = _mm_unpacklo_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("38 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("22 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("6 %04X => %d\n",array[3],counts[array[3]]));
-
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */
 #endif
-  debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask6));
-#endif
 #ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("37 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("21 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("5 %04X => %d\n",array[3],counts[array[3]]));
+static void
+extract_6mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
+  __m256i oligo;
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */
-#endif
-  debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  oligo = _mm256_or_si256( _mm256_srli_epi32(current,22), _mm256_slli_epi32(next,10));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask6));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("36 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("20 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("4 %04X => %d\n",array[3],counts[array[3]]));
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,20));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask6));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */
-#endif
-  debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  return;
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("35 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("19 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("3 %04X => %d\n",array[3],counts[array[3]]));
+#ifdef USE_UNORDERED_6
+static Chrpos_T
+store_6mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_6mers_rev_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */
-#endif
-  debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+/* Includes extract_6mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_6mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("34 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("18 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("2 %04X => %d\n",array[3],counts[array[3]]));
+  out = &(array[0]);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */
-#endif
-  debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row0 = _mm256_and_si256( current, bigmask6); */
+  /* _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6); */
+  _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask6_epi16);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("33 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("17 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("1 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6); */
+  /* _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6); */
+  _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask6_epi16);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */
-#endif
-  debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6); */
+  /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6); */
+  _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask6_epi16);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask6);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask6));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("32 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("16 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("0 %04X => %d\n",array[3],counts[array[3]]));
+  /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6); */
+  /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6); */
+  _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask6_epi16);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */
-#endif
-  debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6); */
+  /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6); */
+  _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask6_epi16);
 
-  return;
-}
 
+  oligo = _mm256_or_si256( _mm256_srli_epi32(current,22), _mm256_slli_epi32(next,10));
+  /* _row10 = _mm256_srli_epi32(current,20); */ /* No mask necessary */
+  /* _row11 = _mm256_and_si256( oligo, bigmask6); */
+  _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,16), _mm256_srli_epi32(current,20), 0x55), bigmask6_epi16);
+
+  /* _row12 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6); */
+  /* _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6); */
+  _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,12), _mm256_srli_epi32(oligo,2), 0x55), bigmask6_epi16);
+
+  /* _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6); */
+  /* _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6); */
+  _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,8), _mm256_srli_epi32(oligo,6), 0x55), bigmask6_epi16);
+
+
+  _u0 = _mm256_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi32(_t6,_t7);
+
+
+  _t0 = _mm256_unpackhi_epi64(_u0,_u1);
+  _t1 = _mm256_unpackhi_epi64(_u2,_u3);
+  _t2 = _mm256_unpacklo_epi64(_u0,_u1);
+  _t3 = _mm256_unpacklo_epi64(_u2,_u3);
+  _t4 = _mm256_unpackhi_epi64(_u4,_u5);
+  _t5 = _mm256_unpackhi_epi64(_u6,_u7);
+  _t6 = _mm256_unpacklo_epi64(_u4,_u5);
+  _t7 = _mm256_unpacklo_epi64(_u6,_u7);
+
+
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0)));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
 #endif
 
-#ifdef HAVE_AVX2
+#ifdef HAVE_AVX512
 static void
-count_6mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
-  __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
+extract_6mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,22), _mm512_slli_epi32(next,10));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask6));
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,20)); /* No mask necessary */
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask6));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask6));
 
+  return;
+}
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(current,22), _mm256_slli_epi32(next,10));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 127 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 111 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 95 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 79 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 63 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 47 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 31 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 15 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 126 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 110 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 94 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 78 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 62 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 46 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 30 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 14 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 125 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 109 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 93 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 77 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 61 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 45 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 29 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 13 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 124 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 108 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 92 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 76 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 60 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 44 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 28 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 12 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 123 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 107 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 91 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 75 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 59 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 43 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 27 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 11 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_srli_epi32(current,20);
-  counts[EXTRACT256(array,0)] += 1;		       /* 122 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 106 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 90 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 74 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 58 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 42 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 26 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 10 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 121 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 105 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 89 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 73 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 57 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 41 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 25 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 9 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 120 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 104 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 88 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 72 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 56 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 40 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 24 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 8 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 119 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 103 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 87 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 71 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 55 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 39 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 23 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 7 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 118 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 102 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 86 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 70 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 54 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 38 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 22 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 6 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 117 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 101 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 85 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 69 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 53 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 37 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 21 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 5 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 116 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 100 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 84 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 68 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 52 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 36 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 20 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 4 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 115 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 99 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 83 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 67 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 51 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 35 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 19 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 3 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 114 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 98 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 82 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 66 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 50 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 34 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 18 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 2 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 113 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 97 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 81 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 65 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 49 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 33 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 17 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 1 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask6);
-  counts[EXTRACT256(array,0)] += 1;		       /* 112 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 96 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 80 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 64 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 48 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 32 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 16 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 0 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
+#ifdef USE_UNORDERED_6
+static Chrpos_T
+store_6mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_6mers_rev_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-  return;
+#else
+/* Includes extract_6mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_6mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  _u0 = _mm512_and_si512( current, hugemask6);
+  /* _row1 = _mm512_and_si512(_mm512_srli_epi32(current,2), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask6);
+  _t0 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask6);
+  /* _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask6);
+  _t1 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask6);
+  /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask6);
+  _t2 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask6);
+  /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask6);
+  _t3 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask6);
+  /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask6);
+  _t4 = _mm512_or_si512(_u0, _u1);
+
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,22), _mm512_slli_epi32(next,10));
+  _u0 = _mm512_srli_epi32(current,20); /* No mask necessary */
+  /* _row11 = _mm512_and_si512( oligo, hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,16), highmask6);
+  _t5 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask6);
+  /* _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,12), highmask6);
+  _t6 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask6);
+  /* _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask6); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,8), highmask6);
+  _t7 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi32(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
 }
 #endif
+#endif
+
 
 
 #if !defined(HAVE_AVX2)
 
 static int
-store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+store_6mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -42559,79 +28199,68 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK6;	/* 0 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = (low_rc >> 2) & MASK6; /* 1 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = (low_rc >> 4) & MASK6; /* 2 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = (low_rc >> 6) & MASK6; /* 3 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
   masked = (low_rc >> 8) & MASK6; /* 4 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = (low_rc >> 10) & MASK6; /* 5 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = (low_rc >> 12) & MASK6; /* 6 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = (low_rc >> 14) & MASK6; /* 7 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
   masked = (low_rc >> 16) & MASK6; /* 8 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
   masked = (low_rc >> 18) & MASK6; /* 9 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
   masked = low_rc >> 20;	/* 10, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
 #else
@@ -42644,30 +28273,26 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
 
@@ -42680,30 +28305,26 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
 
@@ -42716,23 +28337,20 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 #endif
 
@@ -42743,37 +28361,32 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK6; /* 11 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
   masked = (oligo >> 2) & MASK6; /* 12 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = (oligo >> 4) & MASK6; /* 13 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
   masked = (oligo >> 6) & MASK6; /* 14 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
   masked = (oligo >> 8) & MASK6; /* 15 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 
 #else
@@ -42786,38 +28399,33 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
 
   masked = (oligo >> 8) & MASK6; /* 15 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 #endif
 
@@ -42825,79 +28433,68 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = high_rc & MASK6;	/* 16 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = (high_rc >> 2) & MASK6; /* 17 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = (high_rc >> 4) & MASK6; /* 18 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = (high_rc >> 6) & MASK6; /* 19 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
   masked = (high_rc >> 8) & MASK6; /* 20 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = (high_rc >> 10) & MASK6; /* 21 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = (high_rc >> 12) & MASK6; /* 22 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = (high_rc >> 14) & MASK6; /* 23 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
   masked = (high_rc >> 16) & MASK6; /* 24 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
   masked = (high_rc >> 18) & MASK6; /* 25 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
   masked = high_rc >> 20;	/* 26, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
 #else
@@ -42910,30 +28507,26 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
 
@@ -42946,30 +28539,26 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
 
@@ -42982,23 +28571,20 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 #endif
 
@@ -43009,37 +28595,32 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #ifdef INDIVIDUAL_SHIFTS
   masked = oligo & MASK6; /* 27 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = (oligo >> 2) & MASK6; /* 28 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
   masked = (oligo >> 4) & MASK6; /* 29 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
   masked = (oligo >> 6) & MASK6; /* 30 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
   masked = (oligo >> 8) & MASK6; /* 31 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 
 #else
@@ -43052,38 +28633,33 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
   }
 
 
   masked = (oligo >> 8) & MASK6; /* 31 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
   }
 #endif
 
@@ -43093,270 +28669,343 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #else	/* HAVE_AVX2 */
 
 static int
-store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+store_6mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
   __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
+
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }}
+
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
   }
 
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
   _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
   }
 
 
   oligo = low_rc >> 22;		/* For 15..11 */
   oligo |= high_rc << 10;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
+    }
   }
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
   }
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }
   }
 
   if (EXTRACT256(_counts,5)) {
     masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }
   }
 
   if (EXTRACT256(_counts,6)) {
     masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
   }
 
   if (EXTRACT256(_counts,7)) {
     masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
   }
 
 
   _oligo = _mm256_srli_epi32(_oligo, 16);
   _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
+    }
   }
 
 
   oligo = high_rc >> 22;	/* For 31..27 */
   oligo |= nextlow_rc << 10;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
   _masked = _mm256_and_si256(_oligo, bigmask6);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
   if (EXTRACT256(_counts,0)) {
     masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
   }
 
   if (EXTRACT256(_counts,1)) {
     masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
+    }
   }
 
   if (EXTRACT256(_counts,2)) {
     masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
   }
 
   if (EXTRACT256(_counts,3)) {
     masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
   }
 
 
   if (EXTRACT256(_counts,4)) {
     masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
+    }
   }
 
   return chrpos - 32;
@@ -43369,7 +29018,7 @@ store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 #if !defined(HAVE_AVX2)
 
 static void
-count_5mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+count_5mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
   Genomecomp_T masked, oligo;
 #ifdef INDIVIDUAL_SHIFTS
 #elif defined(SIMD_MASK_THEN_STORE)
@@ -43382,51 +29031,51 @@ count_5mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 
 #ifdef INDIVIDUAL_SHIFTS
   masked = low_rc & MASK5;	/* 0 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 2) & MASK5; /* 1 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 4) & MASK5; /* 2 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 6) & MASK5; /* 3 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 8) & MASK5; /* 4 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 10) & MASK5; /* 5 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 12) & MASK5; /* 6 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 14) & MASK5; /* 7 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 16) & MASK5; /* 8 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 18) & MASK5; /* 9 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
   masked = (low_rc >> 20) & MASK5; /* 10 */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
   masked = low_rc >> 22;	/* 11, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
 #else
@@ -43438,19 +29087,19 @@ count_5mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
 
@@ -43462,2552 +29111,933 @@ count_5mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genom
 #endif
 
   masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
+  INCR_COUNT(counts[masked]);
   debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
   masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("7 %04X => %d\n",masked,counts[masked]));
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("8 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("9 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("10 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("11 %04X => %d\n",masked,counts[masked]));
-#endif
-
-
-  oligo = low_rc >> 24;		/* For 15..12 */
-  oligo |= high_rc << 8;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK5; /* 12 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 2) & MASK5; /* 13 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 4) & MASK5; /* 14 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 6) & MASK5; /* 15 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("12 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("13 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("14 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("15 %04X => %d\n",masked,counts[masked]));
-#endif
-
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = high_rc & MASK5;	/* 16 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 2) & MASK5; /* 17 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 4) & MASK5; /* 18 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 6) & MASK5; /* 19 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 8) & MASK5; /* 20 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 10) & MASK5; /* 21 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 12) & MASK5; /* 22 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 14) & MASK5; /* 23 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 16) & MASK5; /* 24 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 18) & MASK5; /* 25 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
-  masked = (high_rc >> 20) & MASK5; /* 26 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = high_rc >> 22;	/* 27, No mask necessary */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-
-#else
-  _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("16 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("17 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("18 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("19 %04X => %d\n",masked,counts[masked]));
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("20 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("21 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("22 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("23 %04X => %d\n",masked,counts[masked]));
-
-
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("24 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("25 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("26 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("27 %04X => %d\n",masked,counts[masked]));
-#endif
-
-
-  oligo = high_rc >> 24;	/* For 31..28 */
-  oligo |= nextlow_rc << 8;
-
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK5; /* 28 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 2) & MASK5; /* 29 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 4) & MASK5; /* 30 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = (oligo >> 6) & MASK5; /* 31 */
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-
-#else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
-
-  masked = EXTRACT(_masked,0);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,1);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,2);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT(_masked,3);
-  INCR_COUNT(counts[masked],inquery[masked]);
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
-#endif
-
-  return;
-}
-
-#else	/* HAVE_AVX2 */
-
-static void
-count_5mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
-  Genomecomp_T masked, oligo;
-  __m256i _oligo, _masked;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
-
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("0 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("1 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("2 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("3 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
-  debug(printf("4 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
-  debug(printf("5 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
-  debug(printf("6 %04X => %d\n",masked,counts[masked]));
-
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  INCR_COUNT(counts[masked]);
   debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
 
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
+  _masked = _mm_and_si128(_oligo, mask5);
+#endif
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
   debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
   debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
   debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
   debug(printf("11 %04X => %d\n",masked,counts[masked]));
-
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
+#endif
 
 
   oligo = low_rc >> 24;		/* For 15..12 */
   oligo |= high_rc << 8;
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK5; /* 12 */
+  INCR_COUNT(counts[masked]);
   debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  masked = (oligo >> 2) & MASK5; /* 13 */
+  INCR_COUNT(counts[masked]);
   debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  masked = (oligo >> 4) & MASK5; /* 14 */
+  INCR_COUNT(counts[masked]);
   debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  masked = (oligo >> 6) & MASK5; /* 15 */
+  INCR_COUNT(counts[masked]);
   debug(printf("15 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
+#else
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
+  _masked = _mm_and_si128(_oligo, mask5);
+#endif
+
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
+#endif
+
+
+#ifdef INDIVIDUAL_SHIFTS
+  masked = high_rc & MASK5;	/* 16 */
+  INCR_COUNT(counts[masked]);
   debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  masked = (high_rc >> 2) & MASK5; /* 17 */
+  INCR_COUNT(counts[masked]);
   debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  masked = (high_rc >> 4) & MASK5; /* 18 */
+  INCR_COUNT(counts[masked]);
   debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  masked = (high_rc >> 6) & MASK5; /* 19 */
+  INCR_COUNT(counts[masked]);
   debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,4);
-  counts[masked] += 1;
+  masked = (high_rc >> 8) & MASK5; /* 20 */
+  INCR_COUNT(counts[masked]);
   debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,5);
-  counts[masked] += 1;
+  masked = (high_rc >> 10) & MASK5; /* 21 */
+  INCR_COUNT(counts[masked]);
   debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,6);
-  counts[masked] += 1;
+  masked = (high_rc >> 12) & MASK5; /* 22 */
+  INCR_COUNT(counts[masked]);
   debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,7);
-  counts[masked] += 1;
+  masked = (high_rc >> 14) & MASK5; /* 23 */
+  INCR_COUNT(counts[masked]);
   debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  /* _counts_after = _mm256_and_si256(_counts_after,low8); */
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
+  masked = (high_rc >> 16) & MASK5; /* 24 */
+  INCR_COUNT(counts[masked]);
   debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
+  masked = (high_rc >> 18) & MASK5; /* 25 */
+  INCR_COUNT(counts[masked]);
   debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
+  masked = (high_rc >> 20) & MASK5; /* 26 */
+  INCR_COUNT(counts[masked]);
   debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
+  masked = high_rc >> 22;	/* 27, No mask necessary */
+  INCR_COUNT(counts[masked]);
   debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-
-
-  oligo = high_rc >> 24;	/* For 31..28 */
-  oligo |= nextlow_rc << 8;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-
-  masked = EXTRACT256(_masked,0);
-  counts[masked] += 1;
-  debug(printf("28 %04X => %d\n",masked,counts[masked]));
+#else
+  _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
+  _masked = _mm_and_si128(_oligo, mask5);
+#endif
 
-  masked = EXTRACT256(_masked,1);
-  counts[masked] += 1;
-  debug(printf("29 %04X => %d\n",masked,counts[masked]));
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,2);
-  counts[masked] += 1;
-  debug(printf("30 %04X => %d\n",masked,counts[masked]));
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
 
-  masked = EXTRACT256(_masked,3);
-  counts[masked] += 1;
-  debug(printf("31 %04X => %d\n",masked,counts[masked]));
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
 
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4);
-  _counts_after = _mm256_and_si256(_counts_after,low4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
 
-  return;
-}
 
-#endif  /* HAVE_AVX2 */
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
+  _masked = _mm_and_si128(_oligo, mask5);
+#endif
 
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
 
-/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
-   and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-extract_5mers_rev_simd (__m128i *out, __m128i current, __m128i next) {
-  __m128i oligo;
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
 
-  oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
-  _mm_store_si128(out++, _mm_and_si128( oligo, mask5));
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
-  _mm_store_si128(out++, _mm_srli_epi32(current,22));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
-  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
-  _mm_store_si128(out++, _mm_and_si128( current, mask5));
 
-  return;
-}
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
+  _masked = _mm_and_si128(_oligo, mask5);
+#endif
 
-#ifdef HAVE_AVX2
-static void
-extract_5mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
-  __m256i oligo;
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(current,24), _mm256_slli_epi32(next,8));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask5));
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
-  _mm256_store_si256(out++, _mm256_srli_epi32(current,22));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5));
-  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask5));
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-  return;
-}
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 #endif
 
 
-static void
-count_5mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) {
-  __m128i oligo;
-#ifdef HAVE_SSE4_1
-  __m128i array;
-#else
-  Genomecomp_T array[4];
-#endif
-#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW)
-  __m128i _counts_after, _counts_neg;
-#endif
+  oligo = high_rc >> 24;	/* For 31..28 */
+  oligo |= nextlow_rc << 8;
 
-  oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8));
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("47 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("31 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("15 %04X => %d\n",array[3],counts[array[3]]));
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK5; /* 28 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */
-#endif
-  debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = (oligo >> 2) & MASK5; /* 29 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("46 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("30 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("14 %04X => %d\n",array[3],counts[array[3]]));
+  masked = (oligo >> 4) & MASK5; /* 30 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+  masked = (oligo >> 6) & MASK5; /* 31 */
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
 #else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */
-#endif
-  debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
-
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask5);
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("45 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("29 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("13 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */
-#endif
-  debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( oligo, mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("44 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("28 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("12 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */
+  masked = EXTRACT(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 #endif
-  debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
 
+  return;
+}
 
-#ifdef HAVE_SSE4_1
-  array = _mm_srli_epi32(current,22);
-#else
-  _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,22));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("43 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("27 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("11 %04X => %d\n",array[3],counts[array[3]]));
+#else	/* HAVE_AVX2 */
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */
-#endif
-  debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+static void
+count_5mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+  Genomecomp_T masked, oligo;
+  __m256i _oligo, _masked;
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,20), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("42 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("26 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("10 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */
-#endif
-  debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,18), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("41 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("25 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("9 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("0 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */
-#endif
-  debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("1 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,16), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("40 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("24 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("8 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("2 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */
-#endif
-  debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("3 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,14), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("39 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("23 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("7 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("4 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */
-#endif
-  debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("5 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,12), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("38 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("22 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("6 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("6 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */
-#endif
-  debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("7 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,10), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("37 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("21 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("5 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */
-#endif
-  debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,8), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("36 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("20 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("4 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("8 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */
-#endif
-  debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("9 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,6), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("35 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("19 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("3 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("10 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */
-#endif
-  debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("11 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,4), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("34 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("18 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("2 %04X => %d\n",array[3],counts[array[3]]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */
-#endif
-  debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  oligo = low_rc >> 24;		/* For 15..12 */
+  oligo |= high_rc << 8;
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( _mm_srli_epi32(current,2), mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("33 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("17 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("1 %04X => %d\n",array[3],counts[array[3]]));
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */
-#endif
-  debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("12 %04X => %d\n",masked,counts[masked]));
 
-#ifdef HAVE_SSE4_1
-  array = _mm_and_si128( current, mask5);
-#else
-  _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask5));
-#endif
-#ifdef HAVE_AVX2
-  counts[EXTRACT(array,0)] += 1;
-  counts[EXTRACT(array,1)] += 1;
-  counts[EXTRACT(array,2)] += 1;
-  counts[EXTRACT(array,3)] += 1;
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128);
-  if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT(_counts_neg,0)) {
-      inquery[EXTRACT(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,1)) {
-      inquery[EXTRACT(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,2)) {
-      inquery[EXTRACT(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT(_counts_neg,3)) {
-      inquery[EXTRACT(array,3)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",array[0],counts[array[0]]));
-  debug(printf("32 %04X => %d\n",array[1],counts[array[1]]));
-  debug(printf("16 %04X => %d\n",array[2],counts[array[2]]));
-  debug(printf("0 %04X => %d\n",array[3],counts[array[3]]));
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("13 %04X => %d\n",masked,counts[masked]));
 
-#else
-  INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */
-  INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */
-  INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */
-  INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */
-#endif
-  debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)]));
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("14 %04X => %d\n",masked,counts[masked]));
 
-  return;
-}
-#endif
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("15 %04X => %d\n",masked,counts[masked]));
+
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
+
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("16 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("17 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("18 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("19 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,4);
+  INCR_COUNT(counts[masked]);
+  debug(printf("20 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,5);
+  INCR_COUNT(counts[masked]);
+  debug(printf("21 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,6);
+  INCR_COUNT(counts[masked]);
+  debug(printf("22 %04X => %d\n",masked,counts[masked]));
+
+  masked = EXTRACT256(_masked,7);
+  INCR_COUNT(counts[masked]);
+  debug(printf("23 %04X => %d\n",masked,counts[masked]));
 
 
-#ifdef HAVE_AVX2
-static void
-count_5mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) {
-  __m256i oligo;
-  __m256i array;
-#ifdef CHECK_FOR_OVERFLOW
-  __m256i _counts_after, _counts_neg;
-#endif
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-  oligo = _mm256_or_si256( _mm256_srli_epi32(current,24), _mm256_slli_epi32(next,8));
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 127 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 111 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 95 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 79 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 63 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 47 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 31 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 15 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 126 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 110 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 94 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 78 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 62 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 46 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 30 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 14 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 125 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 109 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 93 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 77 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 61 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 45 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 29 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 13 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( oligo, bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 124 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 108 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 92 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 76 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 60 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 44 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 28 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 12 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_srli_epi32(current,22);
-  counts[EXTRACT256(array,0)] += 1;		       /* 123 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 107 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 91 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 75 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 59 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 43 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 27 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 11 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 122 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 106 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 90 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 74 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 58 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 42 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 26 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 10 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 121 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 105 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 89 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 73 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 57 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 41 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 25 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 9 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 120 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 104 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 88 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 72 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 56 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 40 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 24 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 8 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 119 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 103 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 87 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 71 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 55 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 39 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 23 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 7 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 118 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 102 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 86 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 70 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 54 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 38 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 22 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 6 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 117 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 101 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 85 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 69 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 53 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 37 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 21 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 5 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 116 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 100 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 84 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 68 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 52 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 36 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 20 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 4 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 115 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 99 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 83 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 67 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 51 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 35 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 19 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 3 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 114 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 98 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 82 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 66 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 50 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 34 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 18 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 2 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 113 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 97 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 81 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 65 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 49 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 33 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 17 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 1 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
-
-  array = _mm256_and_si256( current, bigmask5);
-  counts[EXTRACT256(array,0)] += 1;		       /* 112 */
-  counts[EXTRACT256(array,1)] += 1;		       /* 96 */
-  counts[EXTRACT256(array,2)] += 1;		       /* 80 */
-  counts[EXTRACT256(array,3)] += 1;		       /* 64 */
-  counts[EXTRACT256(array,4)] += 1;		       /* 48 */
-  counts[EXTRACT256(array,5)] += 1;		       /* 32 */
-  counts[EXTRACT256(array,6)] += 1;		       /* 16 */
-  counts[EXTRACT256(array,7)] += 1;		       /* 0 */
-#ifdef CHECK_FOR_OVERFLOW
-  _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4);
-  _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256);
-  if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) {
-    debug(printf("Exceeded maximum counts\n"));
-    if (EXTRACT256(_counts_neg,0)) {
-      inquery[EXTRACT256(array,0)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,1)) {
-      inquery[EXTRACT256(array,1)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,2)) {
-      inquery[EXTRACT256(array,2)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,3)) {
-      inquery[EXTRACT256(array,3)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,4)) {
-      inquery[EXTRACT256(array,4)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,5)) {
-      inquery[EXTRACT256(array,5)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,6)) {
-      inquery[EXTRACT256(array,6)] = INQUERY_FALSE;
-    }
-    if (EXTRACT256(_counts_neg,7)) {
-      inquery[EXTRACT256(array,7)] = INQUERY_FALSE;
-    }
-  }
-#endif	/* CHECK_FOR_OVERFLOW */
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)]));
-  debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)]));
-  debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)]));
-  debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)]));
-  debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)]));
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("24 %04X => %d\n",masked,counts[masked]));
 
-  return;
-}
-#endif
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("25 %04X => %d\n",masked,counts[masked]));
 
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("26 %04X => %d\n",masked,counts[masked]));
 
-#if !defined(HAVE_AVX2)
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("27 %04X => %d\n",masked,counts[masked]));
 
-static int
-store_5mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
-  Genomecomp_T masked, oligo;
-#ifdef INDIVIDUAL_SHIFTS
-#elif defined(SIMD_MASK_THEN_STORE)
-  UINT4 _masked[4] __attribute__ ((aligned (16)));
-  __m128i _oligo;
-#else
-  __m128i _oligo, _masked;
-#endif
 
+  oligo = high_rc >> 24;	/* For 31..28 */
+  oligo |= nextlow_rc << 8;
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = low_rc & MASK5;	/* 0 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-  masked = (low_rc >> 2) & MASK5; /* 1 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
+  masked = EXTRACT256(_masked,0);
+  INCR_COUNT(counts[masked]);
+  debug(printf("28 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rc >> 4) & MASK5; /* 2 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
+  masked = EXTRACT256(_masked,1);
+  INCR_COUNT(counts[masked]);
+  debug(printf("29 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rc >> 6) & MASK5; /* 3 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
+  masked = EXTRACT256(_masked,2);
+  INCR_COUNT(counts[masked]);
+  debug(printf("30 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rc >> 8) & MASK5; /* 4 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
+  masked = EXTRACT256(_masked,3);
+  INCR_COUNT(counts[masked]);
+  debug(printf("31 %04X => %d\n",masked,counts[masked]));
 
-  masked = (low_rc >> 10) & MASK5; /* 5 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
+  return;
+}
 
-  masked = (low_rc >> 12) & MASK5; /* 6 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
+#endif  /* HAVE_AVX2 */
 
-  masked = (low_rc >> 14) & MASK5; /* 7 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
 
-  masked = (low_rc >> 16) & MASK5; /* 8 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
 
-  masked = (low_rc >> 18) & MASK5; /* 9 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
+/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc},
+   and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */
+#ifdef HAVE_SSE2
+static void
+extract_5mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) {
+  __m128i oligo;
 
-  masked = (low_rc >> 20) & MASK5; /* 10 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
+  oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5));
+  _mm_store_si128(out++, _mm_and_si128( oligo, mask5));
 
-  masked = low_rc >> 22;	/* 11, No mask necessary */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
+  _mm_store_si128(out++, _mm_srli_epi32(current,22)); /* No mask necessary */
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5));
+  _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5));
+  _mm_store_si128(out++, _mm_and_si128( current, mask5));
+
+  return;
+}
+
+#ifdef USE_UNORDERED_5
+static Chrpos_T
+store_5mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16];
+			  
+  extract_5mers_rev_simd_64(array,current,next);
+  return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
 #else
-  _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
+/* Includes extract_5mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */
+static Chrpos_T
+store_5mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m128i current, __m128i next) {
+  __m128i array[16], *out;
+  __m128i oligo;
+  __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m128i _u0, _u1, _u2, _u3;
 
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
+  out = &(array[0]);
 
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
+  /* _row0 = _mm_and_si128( current, mask5); */
+  /* _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask5); */
+  _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask5_epi16);
 
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
+  /* _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask5); */
+  /* _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask5); */
+  _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask5_epi16);
 
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
+  /* _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask5); */
+  /* _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask5); */
+  _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask5_epi16);
 
+  /* _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask5); */
+  /* _row7 = _mm_and_si128( _mm_srli_epi32(current,14), mask5); */
+  _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask5_epi16);
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
+  /* _row8 = _mm_and_si128( _mm_srli_epi32(current,16), mask5); */
+  /* _row9 = _mm_and_si128( _mm_srli_epi32(current,18), mask5); */
+  _t4 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask5_epi16);
+
+  /* _row10 = _mm_and_si128( _mm_srli_epi32(current,20), mask5); */
+  /* _row11 = _mm_srli_epi32(current,22); */ /* No mask necessary */ 
+  _t5 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,6), _mm_srli_epi32(current, 20), 0x55), mask5_epi16);
+
+  oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8));
+  /* _row12 = _mm_and_si128( oligo, mask5); */
+  /* _row13 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask5); */
+  _t6 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55), mask5_epi16);
+
+  /* _row14 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask5); */
+  /* _row15 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask5); */
+  _t7 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask5_epi16);
+
+
+  /* Split: top half */
+  _u0 = _mm_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm_unpackhi_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  /* Split: bottom half */
+  _u0 = _mm_unpacklo_epi32(_t0,_t1);
+  _u1 = _mm_unpacklo_epi32(_t2,_t3);
+  _u2 = _mm_unpacklo_epi32(_t4,_t5);
+  _u3 = _mm_unpacklo_epi32(_t6,_t7);
+
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8)));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2));
+  _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3));
+
+  return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
 #endif
 
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
+#ifdef HAVE_AVX2
+static void
+extract_5mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) {
+  __m256i oligo;
 
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
+  oligo = _mm256_or_si256( _mm256_srli_epi32(current,24), _mm256_slli_epi32(next,8));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask5));
 
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
+  _mm256_store_si256(out++, _mm256_srli_epi32(current,22));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5));
+  _mm256_store_si256(out++, _mm256_and_si256( current, bigmask5));
 
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
+  return;
+}
 
+#ifdef USE_UNORDERED_5
+static Chrpos_T
+store_5mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16];
+			  
+  extract_5mers_rev_simd_128(array,current,next);
+  return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
-  _oligo = _mm_srli_epi32(_oligo, 8);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
+/* Includes extract_5mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */
+static Chrpos_T
+store_5mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m256i current, __m256i next) {
+  __m256i array[16], *out;
+  __m256i oligo;
+  __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
 
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
+  out = &(array[0]);
 
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
+  /* _row0 = _mm256_and_si256( current, bigmask5); */
+  /* _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5); */
+  _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask5_epi16);
 
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
+  /* _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5); */
+  /* _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5) ; */
+  _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask5_epi16);
 
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
-#endif
+  /* _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5); */
+  /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5); */
+  _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask5_epi16);
 
+  /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5); */
+  /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5); */
+  _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask5_epi16);
 
-  oligo = low_rc >> 24;		/* For 15..12 */
-  oligo |= high_rc << 8;
+  /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5); */
+  /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5); */
+  _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask5_epi16);
 
-#ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK5; /* 12 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
+  /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5); */
+  /* _row11 = _mm256_srli_epi32(current,22); */ /* No mask necessary */
+  _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,6), _mm256_srli_epi32(current,20), 0x55), bigmask5_epi16);
+
+
+  oligo = _mm256_or_si256( _mm256_srli_epi32(current,24), _mm256_slli_epi32(next,8));
+  /* _row12 = _mm256_and_si256( oligo, bigmask5); */
+  /* _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5); */
+  _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask5_epi16);
+
+  /* _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5); */
+  /* _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5); */
+  _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask5_epi16);
+
+
+  _u0 = _mm256_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm256_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm256_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm256_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm256_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm256_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm256_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm256_unpacklo_epi32(_t6,_t7);
+
+
+  _t0 = _mm256_unpackhi_epi64(_u0,_u1);
+  _t1 = _mm256_unpackhi_epi64(_u2,_u3);
+  _t2 = _mm256_unpacklo_epi64(_u0,_u1);
+  _t3 = _mm256_unpacklo_epi64(_u2,_u3);
+  _t4 = _mm256_unpackhi_epi64(_u4,_u5);
+  _t5 = _mm256_unpackhi_epi64(_u6,_u7);
+  _t6 = _mm256_unpacklo_epi64(_u4,_u5);
+  _t7 = _mm256_unpacklo_epi64(_u6,_u7);
+
+
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0)));
+  _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0)));
+
+  return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
 
-  masked = (oligo >> 2) & MASK5; /* 13 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
+#ifdef HAVE_AVX512
+static void
+extract_5mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) {
+  __m512i oligo;
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,24), _mm512_slli_epi32(next,8));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask5));
+
+  _mm512_store_si512(out++, _mm512_srli_epi32(current,22)); /* No mask necessary */
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,20), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask5));
+  _mm512_store_si512(out++, _mm512_and_si512( current, hugemask5));
 
-  masked = (oligo >> 4) & MASK5; /* 14 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
+  return;
+}
 
-  masked = (oligo >> 6) & MASK5; /* 15 */
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
+#ifdef USE_UNORDERED_5
+static Chrpos_T
+store_5mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m512i array[16];
+			  
+  extract_5mers_rev_simd_256(array,current,next);
+  return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array);
+}
 
 #else
-  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
-#ifdef SIMD_MASK_THEN_STORE
-  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
-#else
-  _masked = _mm_and_si128(_oligo, mask5);
-#endif
+/* Includes extract_5mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */
+static Chrpos_T
+store_5mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+			 __m512i current, __m512i next) {
+  __m256i array[16], *out;
+  __m512i oligo, _shuffle0, _shuffle1, _shuffle2;
+  __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7;
+  __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7;
+
+  out = &(array[0]);
+
+  _u0 = _mm512_and_si512( current, hugemask5);
+  /* _row1 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask5);
+  _t0 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask5);
+  /* _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask5);
+  _t1 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask5);
+  /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask5);
+  _t2 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask5);
+  /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask5);
+  _t3 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask5);
+  /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask5);
+  _t4 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(current,20), hugemask5);
+  /* _row11 = _mm512_srli_epi32(current,22); */ /* No mask necessary */
+  _u1 = _mm512_and_si512( _mm512_srli_epi32(current,6), highmask5);
+  _t5 = _mm512_or_si512(_u0, _u1);
+
+
+  oligo = _mm512_or_si512( _mm512_srli_epi32(current,24), _mm512_slli_epi32(next,8));
+  _u0 = _mm512_and_si512( oligo, hugemask5);
+  /* _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask5);
+  _t6 = _mm512_or_si512(_u0, _u1);
+
+  _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask5);
+  /* _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask5); */
+  _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask5);
+  _t7 = _mm512_or_si512(_u0, _u1);
+
+
+  _u0 = _mm512_unpackhi_epi32(_t0,_t1);
+  _u1 = _mm512_unpackhi_epi32(_t2,_t3);
+  _u2 = _mm512_unpackhi_epi32(_t4,_t5);
+  _u3 = _mm512_unpackhi_epi32(_t6,_t7);
+  _u4 = _mm512_unpacklo_epi32(_t0,_t1);
+  _u5 = _mm512_unpacklo_epi32(_t2,_t3);
+  _u6 = _mm512_unpacklo_epi32(_t4,_t5);
+  _u7 = _mm512_unpacklo_epi32(_t6,_t7);
+
+
+  /* Split: top half */
+  _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); 
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7);
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+
+  /* Split: bottom half */
+  _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0);
+  _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1);
+  _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3);
+  _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5);
+  _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7);
+
+
+  /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
 
-  masked = EXTRACT(_masked,0);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
+  /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */
+  _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3);
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0)));
+  _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1)));
+
+  return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array);
+}
+#endif
+#endif
 
-  masked = EXTRACT(_masked,1);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
-  }
 
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
-  }
+#if !defined(HAVE_AVX2)
 
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
-  }
+static int
+store_5mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+  Genomecomp_T masked, oligo;
+#ifdef INDIVIDUAL_SHIFTS
+#elif defined(SIMD_MASK_THEN_STORE)
+  UINT4 _masked[4] __attribute__ ((aligned (16)));
+  __m128i _oligo;
+#else
+  __m128i _oligo, _masked;
 #endif
 
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = high_rc & MASK5;	/* 16 */
+  masked = low_rc & MASK5;	/* 0 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
-  masked = (high_rc >> 2) & MASK5; /* 17 */
+  masked = (low_rc >> 2) & MASK5; /* 1 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
-  masked = (high_rc >> 4) & MASK5; /* 18 */
+  masked = (low_rc >> 4) & MASK5; /* 2 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
-  masked = (high_rc >> 6) & MASK5; /* 19 */
+  masked = (low_rc >> 6) & MASK5; /* 3 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
-  masked = (high_rc >> 8) & MASK5; /* 20 */
+  masked = (low_rc >> 8) & MASK5; /* 4 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
-  masked = (high_rc >> 10) & MASK5; /* 21 */
+  masked = (low_rc >> 10) & MASK5; /* 5 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
-  masked = (high_rc >> 12) & MASK5; /* 22 */
+  masked = (low_rc >> 12) & MASK5; /* 6 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
-  masked = (high_rc >> 14) & MASK5; /* 23 */
+  masked = (low_rc >> 14) & MASK5; /* 7 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
-  masked = (high_rc >> 16) & MASK5; /* 24 */
+  masked = (low_rc >> 16) & MASK5; /* 8 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
-  masked = (high_rc >> 18) & MASK5; /* 25 */
+  masked = (low_rc >> 18) & MASK5; /* 9 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
-  masked = (high_rc >> 20) & MASK5; /* 26 */
+  masked = (low_rc >> 20) & MASK5; /* 10 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
-  masked = high_rc >> 22;	/* 27, No mask necessary */
+  masked = low_rc >> 22;	/* 11, No mask necessary */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 
 #else
-  _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+  _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6);
 #ifdef SIMD_MASK_THEN_STORE
   _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
@@ -46016,30 +30046,26 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos));
+    table[positions[masked] + (--counts[masked])] = chrpos;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+    table[positions[masked] + (--counts[masked])] = chrpos - 1;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+    table[positions[masked] + (--counts[masked])] = chrpos - 2;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+    table[positions[masked] + (--counts[masked])] = chrpos - 3;
   }
 
 
@@ -46052,30 +30078,26 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+    table[positions[masked] + (--counts[masked])] = chrpos - 4;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+    table[positions[masked] + (--counts[masked])] = chrpos - 5;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+    table[positions[masked] + (--counts[masked])] = chrpos - 6;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+    table[positions[masked] + (--counts[masked])] = chrpos - 7;
   }
 
 
@@ -46088,64 +30110,56 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+    table[positions[masked] + (--counts[masked])] = chrpos - 8;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+    table[positions[masked] + (--counts[masked])] = chrpos - 9;
   }
 
   masked = EXTRACT(_masked,2);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+    table[positions[masked] + (--counts[masked])] = chrpos - 10;
   }
 
   masked = EXTRACT(_masked,3);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+    table[positions[masked] + (--counts[masked])] = chrpos - 11;
   }
 #endif
 
 
-  oligo = high_rc >> 24;	/* For 31..28 */
-  oligo |= nextlow_rc << 8;
+  oligo = low_rc >> 24;		/* For 15..12 */
+  oligo |= high_rc << 8;
 
 #ifdef INDIVIDUAL_SHIFTS
-  masked = oligo & MASK5; /* 28 */
+  masked = oligo & MASK5; /* 12 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
-  masked = (oligo >> 2) & MASK5; /* 29 */
+  masked = (oligo >> 2) & MASK5; /* 13 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
 
-  masked = (oligo >> 4) & MASK5; /* 30 */
+  masked = (oligo >> 4) & MASK5; /* 14 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
-  masked = (oligo >> 6) & MASK5; /* 31 */
+  masked = (oligo >> 6) & MASK5; /* 15 */
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
 
 #else
@@ -46158,1476 +30172,818 @@ store_5mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *posit
 
   masked = EXTRACT(_masked,0);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+    table[positions[masked] + (--counts[masked])] = chrpos - 12;
   }
 
   masked = EXTRACT(_masked,1);
   if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
-
-  masked = EXTRACT(_masked,2);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
-
-  masked = EXTRACT(_masked,3);
-  if (counts[masked]) {
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-#endif
-
-  return chrpos - 32;
-}
-
-#else	/* HAVE_AVX2 */
-
-static int
-store_5mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts,
-		 Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
-  Genomecomp_T masked, oligo;
-  __m256i _oligo, _masked, _counts;
-
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos));
-    table[--pointers[masked]] = chrpos;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
-    table[--pointers[masked]] = chrpos - 1;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
-    table[--pointers[masked]] = chrpos - 2;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
-    table[--pointers[masked]] = chrpos - 3;
-  }
-
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
-    table[--pointers[masked]] = chrpos - 4;
-  }
-
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
-    table[--pointers[masked]] = chrpos - 5;
-  }
-
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
-    table[--pointers[masked]] = chrpos - 6;
-  }
-
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
-    table[--pointers[masked]] = chrpos - 7;
-  }
-
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
-    table[--pointers[masked]] = chrpos - 8;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
-    table[--pointers[masked]] = chrpos - 9;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
-    table[--pointers[masked]] = chrpos - 10;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
-    table[--pointers[masked]] = chrpos - 11;
-  }
-
-
-  oligo = low_rc >> 24;		/* For 15..12 */
-  oligo |= high_rc << 8;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
-    table[--pointers[masked]] = chrpos - 12;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
-    table[--pointers[masked]] = chrpos - 13;
+    table[positions[masked] + (--counts[masked])] = chrpos - 13;
   }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
+
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
-    table[--pointers[masked]] = chrpos - 14;
+    table[positions[masked] + (--counts[masked])] = chrpos - 14;
   }
 
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
-    table[--pointers[masked]] = chrpos - 15;
+    table[positions[masked] + (--counts[masked])] = chrpos - 15;
   }
+#endif
 
 
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
+#ifdef INDIVIDUAL_SHIFTS
+  masked = high_rc & MASK5;	/* 16 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
-    table[--pointers[masked]] = chrpos - 16;
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
   }
 
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 2) & MASK5; /* 17 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
-    table[--pointers[masked]] = chrpos - 17;
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
   }
 
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 4) & MASK5; /* 18 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
-    table[--pointers[masked]] = chrpos - 18;
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
   }
 
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 6) & MASK5; /* 19 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
-    table[--pointers[masked]] = chrpos - 19;
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
   }
 
-  if (EXTRACT256(_counts,4)) {
-    masked = EXTRACT256(_masked,4);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 8) & MASK5; /* 20 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
-    table[--pointers[masked]] = chrpos - 20;
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
   }
 
-  if (EXTRACT256(_counts,5)) {
-    masked = EXTRACT256(_masked,5);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 10) & MASK5; /* 21 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
-    table[--pointers[masked]] = chrpos - 21;
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
   }
 
-  if (EXTRACT256(_counts,6)) {
-    masked = EXTRACT256(_masked,6);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 12) & MASK5; /* 22 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
-    table[--pointers[masked]] = chrpos - 22;
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
   }
 
-  if (EXTRACT256(_counts,7)) {
-    masked = EXTRACT256(_masked,7);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 14) & MASK5; /* 23 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
-    table[--pointers[masked]] = chrpos - 23;
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
   }
 
-
-  _oligo = _mm256_srli_epi32(_oligo, 16);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 16) & MASK5; /* 24 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
-    table[--pointers[masked]] = chrpos - 24;
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
   }
 
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 18) & MASK5; /* 25 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
-    table[--pointers[masked]] = chrpos - 25;
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
   }
 
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
+  masked = (high_rc >> 20) & MASK5; /* 26 */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
-    table[--pointers[masked]] = chrpos - 26;
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
 
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
+  masked = high_rc >> 22;	/* 27, No mask necessary */
+  if (counts[masked]) {
     debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
-    table[--pointers[masked]] = chrpos - 27;
-  }
-
-
-  oligo = high_rc >> 24;	/* For 31..28 */
-  oligo |= nextlow_rc << 8;
-
-  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14);
-  _masked = _mm256_and_si256(_oligo, bigmask5);
-  _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4);
-
-  if (EXTRACT256(_counts,0)) {
-    masked = EXTRACT256(_masked,0);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
-    table[--pointers[masked]] = chrpos - 28;
-  }
-
-  if (EXTRACT256(_counts,1)) {
-    masked = EXTRACT256(_masked,1);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
-    table[--pointers[masked]] = chrpos - 29;
-  }
-
-  if (EXTRACT256(_counts,2)) {
-    masked = EXTRACT256(_masked,2);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
-    table[--pointers[masked]] = chrpos - 30;
-  }
-
-  if (EXTRACT256(_counts,3)) {
-    masked = EXTRACT256(_masked,3);
-    assert(pointers[masked] > positions[masked]);
-    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
-    table[--pointers[masked]] = chrpos - 31;
-  }
-
-  return chrpos - 32;
-}
-
-#endif  /* HAVE_AVX2 */
-
-
-
-#ifndef USE_SIMD_FOR_COUNTS
-static void
-count_positions_rev_std (Count_T *counts, Inquery_T *inquery, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
-			 int genestrand) {
-  int startdiscard, enddiscard;
-  Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc,
-    low, high, nextlow;
-
-  debug(printf("Starting count_positions_rev_std\n"));
-
-
-  if (left_plus_length < (Univcoord_T) indexsize) {
-    left_plus_length = 0;
-  } else {
-    left_plus_length -= indexsize;
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
   }
 
-  ptr = startptr = left/32U*3;
-  endptr = left_plus_length/32U*3;
-  startdiscard = left % 32; /* (left+pos5) % 32 */
-  enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
-  
-  if (left_plus_length <= left) {
-    /* Skip */
-
-  } else if (startptr == endptr) {
-#ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
-#endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
-
-    low_rc = ~low;
-    high_rc = ~high;
-    nextlow_rc = ~nextlow;
-
-    if (indexsize == 9) {
-      count_9mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
-    } else if (indexsize == 8) {
-      count_8mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
-    } else if (indexsize == 7) {
-      count_7mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
-    } else if (indexsize == 6) {
-      count_6mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
-    } else if (indexsize == 5) {
-      count_5mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
-    } else {
-      fprintf(stderr,"indexsize %d not supported\n",indexsize);
-      abort();
-    }
-
-  } else {
-    /* Genome_print_blocks(ref_blocks,left,left+16); */
-
-    /* Start block */
-#ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
-#endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
-
-    low_rc = ~low;
-    high_rc = ~high;
-    nextlow_rc = ~nextlow;
-
-    if (indexsize == 9) {
-      count_9mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 8) {
-      count_8mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 7) {
-      count_7mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 6) {
-      count_6mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 5) {
-      count_5mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else {
-      fprintf(stderr,"indexsize %d not supported\n",indexsize);
-      abort();
-    }
-
-    ptr += 3;
-
-    /* Middle blocks */
-    if (indexsize == 9) {
-      while (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
-
-	count_9mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
-      }
-
-    } else if (indexsize == 8) {
-      while (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
-
-	count_8mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
-      }
-
-    } else if (indexsize == 7) {
-      while (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+  _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+    table[positions[masked] + (--counts[masked])] = chrpos - 16;
+  }
 
-	count_7mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
-      }
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+    table[positions[masked] + (--counts[masked])] = chrpos - 17;
+  }
 
-    } else if (indexsize == 6) {
-      while (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+    table[positions[masked] + (--counts[masked])] = chrpos - 18;
+  }
 
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+    table[positions[masked] + (--counts[masked])] = chrpos - 19;
+  }
 
-	count_6mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
-      }
 
-    } else if (indexsize == 5) {
-      while (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+    table[positions[masked] + (--counts[masked])] = chrpos - 20;
+  }
 
-	count_5mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
-      }
-    } else {
-      abort();
-    }
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+    table[positions[masked] + (--counts[masked])] = chrpos - 21;
+  }
 
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+    table[positions[masked] + (--counts[masked])] = chrpos - 22;
+  }
 
-    /* End block */
-    assert(ptr == endptr);
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+    table[positions[masked] + (--counts[masked])] = chrpos - 23;
+  }
 
-#ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+
+  _oligo = _mm_srli_epi32(_oligo, 8);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
 #else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
 
-    low_rc = ~low;
-    high_rc = ~high;
-    nextlow_rc = ~nextlow;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+    table[positions[masked] + (--counts[masked])] = chrpos - 24;
+  }
 
-    if (indexsize == 9) {
-      count_9mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 8) {
-      count_8mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 7) {
-      count_7mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 6) {
-      count_6mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
-    } else if (indexsize == 5) {
-      count_5mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
-    } else {
-      abort();
-    }
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+    table[positions[masked] + (--counts[masked])] = chrpos - 25;
+  }
 
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+    table[positions[masked] + (--counts[masked])] = chrpos - 26;
   }
-  
-  return;
-}
-#endif
- 
 
-#ifdef USE_SIMD_FOR_COUNTS
-static void
-count_positions_rev_simd (Count_T *counts, Inquery_T *inquery, int indexsize,
-			  Univcoord_T left, Univcoord_T left_plus_length, int genestrand) {
-  int startdiscard, enddiscard;
-  Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow;
-  Genomecomp_T low1_rc, high1_rc, high0, low1, high1;
-  __m128i current, next, invert3;
-  /* __m128i array[16]; */
-#ifdef HAVE_SSE4_1
-  __m128i temp;
-#else
-  Genomecomp_T low0_rc, high0_rc;
-#endif
-#ifdef HAVE_AVX2
-  Genomecomp_T low2, high2, low3, high3;
-  __m256i current256, next256, temp256, shift256;
-  __m256i biginvert3;
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+    table[positions[masked] + (--counts[masked])] = chrpos - 27;
+  }
 #endif
 
 
-  debug(printf("Starting count_positions_rev_simd\n"));
+  oligo = high_rc >> 24;	/* For 31..28 */
+  oligo |= nextlow_rc << 8;
 
-  if (left_plus_length < (Univcoord_T) indexsize) {
-    left_plus_length = 0;
-  } else {
-    left_plus_length -= indexsize;
+#ifdef INDIVIDUAL_SHIFTS
+  masked = oligo & MASK5; /* 28 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
   }
 
-  ptr = startptr = left/32U*3;
-  endptr = left_plus_length/32U*3;
-  startdiscard = left % 32; /* (left+pos5) % 32 */
-  enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
-  
-  invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
-#ifdef HAVE_AVX2
-  biginvert3 = _mm256_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
-  shift256 = _mm256_setr_epi32(7,0,1,2,3,4,5,6);
-#endif
+  masked = (oligo >> 2) & MASK5; /* 29 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
+  }
 
-  if (left_plus_length <= left) {
-    /* Skip */
+  masked = (oligo >> 4) & MASK5; /* 30 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
+  }
+
+  masked = (oligo >> 6) & MASK5; /* 31 */
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
+  }
 
-  } else if (startptr == endptr) {
-#ifdef WORDS_BIGENDIAN
-    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-    low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-    high1 = ref_blocks[ptr];
-    low1 = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
+  _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6);
+#ifdef SIMD_MASK_THEN_STORE
+  _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5));
+#else
+  _masked = _mm_and_si128(_oligo, mask5);
 #endif
-    if (mode == CMET_STRANDED) {
-      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
 
-    low1_rc = ~low1;
-    high1_rc = ~high1;
-    nextlow_rc = ~nextlow;
+  masked = EXTRACT(_masked,0);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+    table[positions[masked] + (--counts[masked])] = chrpos - 28;
+  }
 
-    if (indexsize == 9) {
-      count_9mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
-    } else if (indexsize == 8) {
-      count_8mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
-    } else if (indexsize == 7) {
-      count_7mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
-    } else if (indexsize == 6) {
-      count_6mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
-    } else if (indexsize == 5) {
-      count_5mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
-    } else {
-      fprintf(stderr,"indexsize %d not supported\n",indexsize);
-      abort();
-    }
+  masked = EXTRACT(_masked,1);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+    table[positions[masked] + (--counts[masked])] = chrpos - 29;
+  }
 
-  } else {
-    /* Genome_print_blocks(ref_blocks,left,left+16); */
+  masked = EXTRACT(_masked,2);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+    table[positions[masked] + (--counts[masked])] = chrpos - 30;
+  }
 
-    /* Start block */
-#ifdef WORDS_BIGENDIAN
-    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-    low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
-    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-    high1 = ref_blocks[ptr];
-    low1 = ref_blocks[ptr+1];
-    nextlow = ref_blocks[ptr+4];
+  masked = EXTRACT(_masked,3);
+  if (counts[masked]) {
+    debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+    table[positions[masked] + (--counts[masked])] = chrpos - 31;
+  }
 #endif
-    if (mode == CMET_STRANDED) {
-      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
-    } else if (mode == CMET_NONSTRANDED) {
-      if (genestrand > 0) {
-	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
-      } else {
-	high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
-      }
-    }
 
-    nextlow_rc = ~nextlow;
-    low1_rc = ~low1;
-    high1_rc = ~high1;
+  return chrpos - 32;
+}
 
-    if (indexsize == 9) {
-      count_9mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 8) {
-      count_8mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 7) {
-      count_7mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 6) {
-      count_6mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else if (indexsize == 5) {
-      count_5mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
-    } else {
-      fprintf(stderr,"indexsize %d not supported\n",indexsize);
-      abort();
+#else	/* HAVE_AVX2 */
+
+static int
+store_5mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts,
+		    Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) {
+  Genomecomp_T masked, oligo;
+  __m256i _oligo, _masked, _counts;
+  __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask;
+
+
+  _address_mask = _mm256_set1_epi32(0x3);
+  _count_mask = _mm256_set1_epi32(0xFF);
+
+
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
+
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
+
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos));
+      table[positions[masked] + (--counts[masked])] = chrpos;
     }
+  }
 
-    ptr += 3;
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 1));
+      table[positions[masked] + (--counts[masked])] = chrpos - 1;
+    }
+  }
 
-    /* Middle blocks */
-    if (indexsize == 9) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 2));
+      table[positions[masked] + (--counts[masked])] = chrpos - 2;
+    }
+  }
 
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 3));
+      table[positions[masked] + (--counts[masked])] = chrpos - 3;
+    }
+  }
 
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 4));
+      table[positions[masked] + (--counts[masked])] = chrpos - 4;
+    }
+  }
 
-        count_9mers_rev_simd_128(counts,inquery,current256,next256);
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 5));
+      table[positions[masked] + (--counts[masked])] = chrpos - 5;
+    }
+  }
 
-	ptr += 12;
-      }
-#endif
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 6));
+      table[positions[masked] + (--counts[masked])] = chrpos - 6;
+    }
+  }
 
-      while (ptr + 6 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 7));
+      table[positions[masked] + (--counts[masked])] = chrpos - 7;
+    }
+  }
 
-	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
-	current = _mm_xor_si128(current,invert3);
 
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
-#endif
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 8));
+      table[positions[masked] + (--counts[masked])] = chrpos - 8;
+    }
+  }
 
-#if 0
-	extract_9mers_rev_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
-#else
-        count_9mers_rev_simd(counts,inquery,current,next);
-#endif
-	ptr += 6;
-      }
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 9));
+      table[positions[masked] + (--counts[masked])] = chrpos - 9;
+    }
+  }
 
-      if (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 10));
+      table[positions[masked] + (--counts[masked])] = chrpos - 10;
+    }
+  }
 
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 11));
+      table[positions[masked] + (--counts[masked])] = chrpos - 11;
+    }
+  }
 
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
 
-	count_9mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
-      }
+  oligo = low_rc >> 24;		/* For 15..12 */
+  oligo |= high_rc << 8;
 
-    } else if (indexsize == 8) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 12));
+      table[positions[masked] + (--counts[masked])] = chrpos - 12;
+    }
+  }
 
-        count_8mers_rev_simd_128(counts,inquery,current256,next256);
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 13));
+      table[positions[masked] + (--counts[masked])] = chrpos - 13;
+    }
+  }
 
-	ptr += 12;
-      }
-#endif
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 14));
+      table[positions[masked] + (--counts[masked])] = chrpos - 14;
+    }
+  }
 
-      while (ptr + 6 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 15));
+      table[positions[masked] + (--counts[masked])] = chrpos - 15;
+    }
+  }
 
-	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
-	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
 
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
-#endif
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-#if 0
-	extract_8mers_rev_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
-#else
-	count_8mers_rev_simd(counts,inquery,current,next);
-#endif
-	ptr += 6;
-      }
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 16));
+      table[positions[masked] + (--counts[masked])] = chrpos - 16;
+    }
+  }
 
-      if (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 17));
+      table[positions[masked] + (--counts[masked])] = chrpos - 17;
+    }
+  }
 
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 18));
+      table[positions[masked] + (--counts[masked])] = chrpos - 18;
+    }
+  }
 
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 19));
+      table[positions[masked] + (--counts[masked])] = chrpos - 19;
+    }
+  }
 
-	count_8mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
-      }
+  if (EXTRACT256(_counts,4)) {
+    masked = EXTRACT256(_masked,4);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 20));
+      table[positions[masked] + (--counts[masked])] = chrpos - 20;
+    }}
 
-    } else if (indexsize == 7) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
+  if (EXTRACT256(_counts,5)) {
+    masked = EXTRACT256(_masked,5);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 21));
+      table[positions[masked] + (--counts[masked])] = chrpos - 21;
+    }
+  }
 
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+  if (EXTRACT256(_counts,6)) {
+    masked = EXTRACT256(_masked,6);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 22));
+      table[positions[masked] + (--counts[masked])] = chrpos - 22;
+    }
+  }
 
-        count_7mers_rev_simd_128(counts,inquery,current256,next256);
+  if (EXTRACT256(_counts,7)) {
+    masked = EXTRACT256(_masked,7);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 23));
+      table[positions[masked] + (--counts[masked])] = chrpos - 23;
+    }
+  }
 
-	ptr += 12;
-      }
-#endif
 
-      while (ptr + 6 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  _oligo = _mm256_srli_epi32(_oligo, 16);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
-	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 24));
+      table[positions[masked] + (--counts[masked])] = chrpos - 24;
+    }
+  }
 
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
-#endif
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 25));
+      table[positions[masked] + (--counts[masked])] = chrpos - 25;
+    }
+  }
 
-#if 0
-	extract_7mers_rev_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
-#else
-	count_7mers_rev_simd(counts,inquery,current,next);
-#endif
-	ptr += 6;
-      }
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 26));
+      table[positions[masked] + (--counts[masked])] = chrpos - 26;
+    }
+  }
 
-      if (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 27));
+      table[positions[masked] + (--counts[masked])] = chrpos - 27;
+    }
+  }
 
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
 
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
+  oligo = high_rc >> 24;	/* For 31..28 */
+  oligo |= nextlow_rc << 8;
 
-	count_7mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
-      }
+  _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14);
+  _masked = _mm256_and_si256(_oligo, bigmask5);
 
-    } else if (indexsize == 6) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */
+  _addresses = _mm256_and_si256(_masked,_address_mask);
+  _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */
+  _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4);
+  _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */
+  _counts = _mm256_and_si256(_counts,_count_mask);
 
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
+  if (EXTRACT256(_counts,0)) {
+    masked = EXTRACT256(_masked,0);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 28));
+      table[positions[masked] + (--counts[masked])] = chrpos - 28;
+    }
+  }
 
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+  if (EXTRACT256(_counts,1)) {
+    masked = EXTRACT256(_masked,1);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 29));
+      table[positions[masked] + (--counts[masked])] = chrpos - 29;
+    }
+  }
 
-        count_6mers_rev_simd_128(counts,inquery,current256,next256);
+  if (EXTRACT256(_counts,2)) {
+    masked = EXTRACT256(_masked,2);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 30));
+      table[positions[masked] + (--counts[masked])] = chrpos - 30;
+    }
+  }
 
-	ptr += 12;
-      }
-#endif
+  if (EXTRACT256(_counts,3)) {
+    masked = EXTRACT256(_masked,3);
+    if (counts[masked]) {	/* Have to re-check if there is a conflict */
+      debug(printf("Storing masked %u at %u\n",masked,chrpos - 31));
+      table[positions[masked] + (--counts[masked])] = chrpos - 31;
+    }
+  }
 
-      while (ptr + 6 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  return chrpos - 32;
+}
 
-	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
-	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
+#endif  /* HAVE_AVX2 */
 
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
 
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
-#endif
 
-#if 0
-	extract_6mers_rev_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
-#else
-	count_6mers_rev_simd(counts,inquery,current,next);
-#endif
-	ptr += 6;
-      }
+#ifndef HAVE_SSE2
+static void
+count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length,
+			 int genestrand) {
+  int startdiscard, enddiscard;
+  Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc,
+    low, high, nextlow;
 
-      if (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+  debug(printf("Starting count_positions_rev_std\n"));
 
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
 
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
+  if (left_plus_length < (Univcoord_T) indexsize) {
+    left_plus_length = 0;
+  } else {
+    left_plus_length -= indexsize;
+  }
 
-	count_6mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
-      }
+  ptr = startptr = left/32U*3;
+  endptr = left_plus_length/32U*3;
+  startdiscard = left % 32; /* (left+pos5) % 32 */
+  enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+  
+  if (left_plus_length <= left) {
+    /* Skip */
 
-    } else if (indexsize == 5) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
+  } else if (startptr == endptr) {
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+#else
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      }
+    }
 
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+    low_rc = ~low;
+    high_rc = ~high;
+    nextlow_rc = ~nextlow;
 
-        count_5mers_rev_simd_128(counts,inquery,current256,next256);
+    if (indexsize == 9) {
+      count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+    } else if (indexsize == 8) {
+      count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+    } else if (indexsize == 7) {
+      count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+    } else if (indexsize == 6) {
+      count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+    } else if (indexsize == 5) {
+      count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+    } else {
+      fprintf(stderr,"indexsize %d not supported\n",indexsize);
+      abort();
+    }
 
-	ptr += 12;
-      }
-#endif
+  } else {
+    /* Genome_print_blocks(ref_blocks,left,left+16); */
 
-      while (ptr + 6 <= endptr) {
+    /* Start block */
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
-	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      }
+    }
 
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
+    low_rc = ~low;
+    high_rc = ~high;
+    nextlow_rc = ~nextlow;
 
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
-#endif
+    if (indexsize == 9) {
+      count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 8) {
+      count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 7) {
+      count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 6) {
+      count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 5) {
+      count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else {
+      fprintf(stderr,"indexsize %d not supported\n",indexsize);
+      abort();
+    }
 
-#if 0
-	extract_5mers_rev_simd(array,current,next);
-	count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array);
-#else
-	count_5mers_rev_simd(counts,inquery,current,next);
-#endif
-	ptr += 6;
-      }
+    ptr += 3;
 
-      if (ptr + 3 <= endptr) {
+    /* Middle blocks */
+    while (ptr + 3 <= endptr) {
 #ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+      high = Bigendian_convert_uint(ref_blocks[ptr]);
+      low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
+      high = ref_blocks[ptr];
+      low = ref_blocks[ptr+1];
+      nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
-
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+	} else {
+	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+	}
+      } else if (mode == ATOI_STRANDED) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+	}
+      } else if (mode == TTOC_STRANDED) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+	} else {
+	  high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+	}
+      }
 
-	count_5mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
+      low_rc = ~low;
+      high_rc = ~high;
+      nextlow_rc = ~nextlow;
+
+      if (indexsize == 9) {
+	count_9mers_rev_32(counts,low_rc,high_rc,nextlow_rc);
+      } else if (indexsize == 8) {
+	count_8mers_rev_32(counts,low_rc,high_rc,nextlow_rc);
+      } else if (indexsize == 7) {
+	count_7mers_rev_32(counts,low_rc,high_rc,nextlow_rc);
+      } else if (indexsize == 6) {
+	count_6mers_rev_32(counts,low_rc,high_rc,nextlow_rc);
+      } else if (indexsize == 5) {
+	count_5mers_rev_32(counts,low_rc,high_rc,nextlow_rc);
+      } else {
+	abort();
       }
 
-    } else {
-      abort();
+      ptr += 3;
     }
 
 
@@ -47635,43 +30991,61 @@ count_positions_rev_simd (Count_T *counts, Inquery_T *inquery, int indexsize,
     assert(ptr == endptr);
 
 #ifdef WORDS_BIGENDIAN
-    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-    /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
     nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-    high1 = ref_blocks[ptr];
-    /* low1 = ref_blocks[ptr+1]; */
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
-      high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
-	high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
       } else {
-	high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
       }
     }
 
-    /* low1_rc = ~low1; */
-    low1_rc = nextlow_rc;
-
+    low_rc = ~low;
+    high_rc = ~high;
     nextlow_rc = ~nextlow;
-    high1_rc = ~high1;
 
     if (indexsize == 9) {
-      count_9mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 8) {
-      count_8mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 7) {
-      count_7mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 6) {
-      count_6mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 5) {
-      count_5mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else {
       abort();
     }
+
   }
   
   return;
@@ -47679,65 +31053,316 @@ count_positions_rev_simd (Count_T *counts, Inquery_T *inquery, int indexsize,
 #endif
  
 
-#ifndef USE_SIMD_FOR_COUNTS
+
+#ifdef HAVE_AVX2
+static __m256i
+apply_mode_rev_256 (Genomecomp_T *block_ptr, Mode_T mode, int genestrand, Genomecomp_T *nextlow, Genomecomp_T nextlow_rc) {
+  Genomecomp_T low0, high0, low1, high1, low2, high2, low3, high3;
+
+  high0 = block_ptr[0]; /* low0 = block_ptr[1]; */
+  high1 = block_ptr[3]; low1 = block_ptr[4];
+  high2 = block_ptr[6]; low2 = block_ptr[7];
+  high3 = block_ptr[9]; low3 = block_ptr[10];
+  *nextlow = block_ptr[13];
+
+  if (mode == CMET_STRANDED) {
+    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
+    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
+    *nextlow = Cmet_reduce_ga(*nextlow);
+  } else if (mode == CMET_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
+      high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+      high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
+      high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
+      *nextlow = Cmet_reduce_ct(*nextlow);
+    } else {
+      high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+      high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
+      high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
+      *nextlow = Cmet_reduce_ga(*nextlow);
+    }
+
+  } else if (mode == ATOI_STRANDED) {
+    high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+    high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+    high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+    *nextlow = Atoi_reduce_ag(*nextlow);
+  } else if (mode == CMET_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+      high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+      high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+      *nextlow = Atoi_reduce_tc(*nextlow);
+    } else {
+      high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+      high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+      high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+      *nextlow = Atoi_reduce_ag(*nextlow);
+    }
+
+  } else if (mode == TTOC_STRANDED) {
+    high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+    high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+    high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+    *nextlow = Atoi_reduce_tc(*nextlow);
+  } else if (mode == TTOC_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+      high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+      high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+      *nextlow = Atoi_reduce_ag(*nextlow);
+    } else {
+      high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+      high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+      high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+      *nextlow = Atoi_reduce_tc(*nextlow);
+    }
+  }
+
+  return _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
+}
+#endif
+
+#ifdef HAVE_AVX512
+static __m512i
+apply_mode_rev_512 (Genomecomp_T *block_ptr, Mode_T mode, int genestrand, Genomecomp_T *nextlow, Genomecomp_T nextlow_rc) {
+  Genomecomp_T low0, high0, low1, high1, low2, high2, low3, high3,
+    low4, high4, low5, high5, low6, high6, low7, high7;
+
+  high0 = block_ptr[0]; /* low0 = block_ptr[1]; */
+  high1 = block_ptr[3]; low1 = block_ptr[4];
+  high2 = block_ptr[6]; low2 = block_ptr[7];
+  high3 = block_ptr[9]; low3 = block_ptr[10];
+
+  high4 = block_ptr[12]; low4 = block_ptr[13];
+  high5 = block_ptr[15]; low5 = block_ptr[16];
+  high6 = block_ptr[18]; low6 = block_ptr[19];
+  high7 = block_ptr[21]; low7 = block_ptr[22];
+  *nextlow = block_ptr[25];
+
+  if (mode == CMET_STRANDED) {
+    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
+    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
+    high4 = Cmet_reduce_ga(high4); low4 = Cmet_reduce_ga(low4);
+    high5 = Cmet_reduce_ga(high5); low5 = Cmet_reduce_ga(low5);
+    high6 = Cmet_reduce_ga(high6); low6 = Cmet_reduce_ga(low6);
+    high7 = Cmet_reduce_ga(high7); low7 = Cmet_reduce_ga(low7);
+    *nextlow = Cmet_reduce_ga(*nextlow);
+  } else if (mode == CMET_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
+      high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+      high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
+      high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
+      high4 = Cmet_reduce_ct(high4); low4 = Cmet_reduce_ct(low4);
+      high5 = Cmet_reduce_ct(high5); low5 = Cmet_reduce_ct(low5);
+      high6 = Cmet_reduce_ct(high6); low6 = Cmet_reduce_ct(low6);
+      high7 = Cmet_reduce_ct(high7); low7 = Cmet_reduce_ct(low7);
+      *nextlow = Cmet_reduce_ct(*nextlow);
+    } else {
+      high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+      high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
+      high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
+      high4 = Cmet_reduce_ga(high4); low4 = Cmet_reduce_ga(low4);
+      high5 = Cmet_reduce_ga(high5); low5 = Cmet_reduce_ga(low5);
+      high6 = Cmet_reduce_ga(high6); low6 = Cmet_reduce_ga(low6);
+      high7 = Cmet_reduce_ga(high7); low7 = Cmet_reduce_ga(low7);
+      *nextlow = Cmet_reduce_ga(*nextlow);
+    }
+
+  } else if (mode == ATOI_STRANDED) {
+    high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+    high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+    high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+    high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4);
+    high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5);
+    high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6);
+    high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7);
+    *nextlow = Atoi_reduce_ag(*nextlow);
+  } else if (mode == ATOI_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+      high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+      high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+      high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4);
+      high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5);
+      high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6);
+      high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7);
+      *nextlow = Atoi_reduce_tc(*nextlow);
+    } else {
+      high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+      high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+      high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+      high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4);
+      high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5);
+      high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6);
+      high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7);
+      *nextlow = Atoi_reduce_ag(*nextlow);
+    }
+
+  } else if (mode == TTOC_STRANDED) {
+    high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+    high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+    high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+    high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4);
+    high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5);
+    high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6);
+    high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7);
+    *nextlow = Atoi_reduce_tc(*nextlow);
+  } else if (mode == TTOC_NONSTRANDED) {
+    if (genestrand > 0) {
+      high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+      high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2);
+      high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3);
+      high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4);
+      high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5);
+      high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6);
+      high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7);
+      *nextlow = Atoi_reduce_ag(*nextlow);
+    } else {
+      high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+      high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2);
+      high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3);
+      high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4);
+      high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5);
+      high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6);
+      high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7);
+      *nextlow = Atoi_reduce_tc(*nextlow);
+    }
+  }
+
+  return _mm512_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3,
+			  low4,high4,low5,high5,low6,high6,low7,high7);
+}
+#endif
+
+
+
+#ifdef HAVE_SSE2
 static void
-store_positions_rev_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, int indexsize,
-			 Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
-			 int genestrand) {
+count_positions_rev_simd (Count_T *counts, int indexsize,
+			  Univcoord_T left, Univcoord_T left_plus_length, int genestrand) {
   int startdiscard, enddiscard;
-  Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc,
-    low, high, nextlow;
+  Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow;
+  Genomecomp_T low1_rc, high1_rc, high0, low1, high1;
+  __m128i current, a, b, next, invert3, invert4;
+  __m128i array[16];
+#ifdef HAVE_AVX2
+  __m256i array256[16];
+  Genomecomp_T low2, high2, low3, high3;
+  __m256i current256, a256, b256, c256, d256, next256, temp256, shift256;
+  __m256i biginvert3, biginvert4;
+#endif
+#ifdef HAVE_AVX512
+  __m128i temp;
+  __m512i array512[16];
+  Genomecomp_T low4, high4, low5, high5, low6, high6, low7, high7;
+  __m512i current512, a512, b512, next512, temp512, shift512;
+  __m512i hugeinvert3, hugeinvert4;
+#endif
+
 
+  debug(printf("Starting count_positions_rev_simd\n"));
 
   if (left_plus_length < (Univcoord_T) indexsize) {
     left_plus_length = 0;
   } else {
     left_plus_length -= indexsize;
   }
-  chrpos += (left_plus_length - left); /* We are starting from the right */
 
   ptr = startptr = left/32U*3;
   endptr = left_plus_length/32U*3;
   startdiscard = left % 32; /* (left+pos5) % 32 */
   enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
   
+  invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
+  invert4 = _mm_set1_epi32(0xFFFFFFFF);
+#ifdef HAVE_AVX2
+  biginvert3 = _mm256_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
+  biginvert4 = _mm256_set1_epi32(0xFFFFFFFF);
+  shift256 = _mm256_setr_epi32(7,0,1,2,3,4,5,6);
+#endif
+#ifdef HAVE_AVX512
+  hugeinvert3 = _mm512_inserti64x4(_mm512_set1_epi32(0xFFFFFFFF), biginvert3, 0x1);
+  hugeinvert4 = _mm512_set1_epi32(0xFFFFFFFF);
+  shift512 = _mm512_setr_epi32(15,0,1,2,3,4,5,6, 7,8,9,10,11,12,13,14);
+#endif
+
   if (left_plus_length <= left) {
     /* Skip */
 
   } else if (startptr == endptr) {
 #ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+    low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
     nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
+    high1 = ref_blocks[ptr];
+    low1 = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
       } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+	high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
       }
     }
 
-    low_rc = ~low;
-    high_rc = ~high;
+    low1_rc = ~low1;
+    high1_rc = ~high1;
     nextlow_rc = ~nextlow;
 
     if (indexsize == 9) {
-      chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+      count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+      count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+      count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+      count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
+      count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
     } else {
       fprintf(stderr,"indexsize %d not supported\n",indexsize);
       abort();
@@ -47748,38 +31373,57 @@ store_positions_rev_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
 
     /* Start block */
 #ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+    low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
     nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
+    high1 = ref_blocks[ptr];
+    low1 = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
       } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+	high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
       }
     }
 
-    low_rc = ~low;
-    high_rc = ~high;
     nextlow_rc = ~nextlow;
+    low1_rc = ~low1;
+    high1_rc = ~high1;
 
     if (indexsize == 9) {
-      chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else {
       fprintf(stderr,"indexsize %d not supported\n",indexsize);
       abort();
@@ -47788,153 +31432,262 @@ store_positions_rev_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
     ptr += 3;
 
     /* Middle blocks */
-    if (indexsize == 9) {
-      while (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
+#ifdef HAVE_AVX512
+    while (ptr + 24 <= endptr) {
+
+      if (mode == STANDARD) {
+	a512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr]));
+	b512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr+7]));
+	current512 = _mm512_permutex2var_epi32(a512,_mm512_setr_epi32(16+14, 16+15, 16+11, 16+12, 16+8, 16+9, 12, 13, 9, 10, 6, 7, 3, 4, 0, 1), b512);
+	current512 = _mm512_xor_si512(current512,hugeinvert4);
+	nextlow = ref_blocks[ptr+25];
+      } else {
+	current512 = apply_mode_rev_512(&(ref_blocks[ptr]),mode,genestrand,&nextlow,nextlow_rc);
+	current512 = _mm512_xor_si512(current512,hugeinvert3);
+      }
 
-	chrpos = store_9mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
+      nextlow_rc = ~nextlow;	/* Take from this loop */
+      
+      current = _mm512_extracti32x4_epi32(current512,3);
+      temp = _mm_insert_epi32(current,nextlow_rc,0x03);			
+      temp512 = _mm512_inserti32x4(current512,temp,0x03);
+      next512 = _mm512_permutexvar_epi32(shift512,temp512); /* shift goes first! */
+      
+      if (indexsize == 9) {
+	extract_9mers_rev_simd_256(array512,current512,next512);
+      } else if (indexsize == 8) {
+	extract_8mers_rev_simd_256(array512,current512,next512);
+      } else if (indexsize == 7) {
+	extract_7mers_rev_simd_256(array512,current512,next512);
+      } else if (indexsize == 6) {
+	extract_6mers_rev_simd_256(array512,current512,next512);
+      } else if (indexsize == 5) {
+	extract_5mers_rev_simd_256(array512,current512,next512);
+      } else {
+	abort();
       }
+      count_fwdrev_simd_n(counts,(Genomecomp_T *) array512,256);
 
-    } else if (indexsize == 8) {
-      while (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
+      ptr += 24;
+    }
 #endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
 
-	chrpos = store_8mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
+#ifdef HAVE_AVX2
+    while (ptr + 12 <= endptr) {
+
+      if (mode == STANDARD) {
+	a256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr]));
+	b256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr+3]));
+	c256 = _mm256_unpacklo_epi64(b256,a256);
+	d256 = _mm256_unpackhi_epi64(b256,a256);
+	current256 = _mm256_permute2x128_si256(c256, d256, 0x03);
+	current256 = _mm256_xor_si256(current256,biginvert4);
+	nextlow = ref_blocks[ptr+13];
+      } else {
+	current256 = apply_mode_rev_256(&(ref_blocks[ptr]),mode,genestrand,&nextlow,nextlow_rc);
+	current256 = _mm256_xor_si256(current256,biginvert3);
       }
 
-    } else if (indexsize == 7) {
-      while (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+      nextlow_rc = ~nextlow;	/* Take from this loop */
+      
+#if 0
+      /* Doesn't work, because performs shift within 128-bit lanes */
+      next256 = _mm256_alignr_epi8(current256,_mm256_set1_epi32(nextlow_rc),0);
 #else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
+      temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
+      next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
 #endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+      
+      if (indexsize == 9) {
+	extract_9mers_rev_simd_128(array256,current256,next256);
+      } else if (indexsize == 8) {
+	extract_8mers_rev_simd_128(array256,current256,next256);
+      } else if (indexsize == 7) {
+	extract_7mers_rev_simd_128(array256,current256,next256);
+      } else if (indexsize == 6) {
+	extract_6mers_rev_simd_128(array256,current256,next256);
+      } else if (indexsize == 5) {
+	extract_5mers_rev_simd_128(array256,current256,next256);
+      } else {
+	abort();
+      }
+      count_fwdrev_simd_n(counts,(Genomecomp_T *) array256,128);
 
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
+      ptr += 12;
+    }
+#endif
 
-	chrpos = store_7mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
-      }
+    while (ptr + 6 <= endptr) {
 
-    } else if (indexsize == 6) {
-      while (ptr + 3 <= endptr) {
+      if (mode == STANDARD) {
+#ifdef HAVE_SSSE3
+	a = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr]));
+	b = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr+3]));
+	current = _mm_unpacklo_epi64(b,a);
+	current = _mm_xor_si128(current,invert4);
+	nextlow = ref_blocks[ptr+7];
+#else
+	/* Solution for SSE2.  Need separate values to construct "next" */
+	high0 = ref_blocks[ptr]; /* low0 = ref_blocks[ptr+1]; */
+	high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4];
+	nextlow = ref_blocks[ptr+7];
+
+	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+	current = _mm_xor_si128(current,invert3);
+#endif
+
+      } else {
 #ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+	high0 = Bigendian_convert_uint(ref_blocks[ptr]); /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
 #else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
+	high0 = ref_blocks[ptr]; /* low0 = ref_blocks[ptr+1]; */
+	high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4];
+	nextlow = ref_blocks[ptr+7];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+
+	if (mode == STANDARD) {
+	  /* Skip */
+	} else if (mode == CMET_STRANDED) {
+	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+	  nextlow = Cmet_reduce_ga(nextlow);
 	} else if (mode == CMET_NONSTRANDED) {
 	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
+	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
+	    nextlow = Cmet_reduce_ct(nextlow);
+	  } else {
+	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
+	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
+	    nextlow = Cmet_reduce_ga(nextlow);
+	  }
+	} else if (mode == ATOI_STRANDED) {
+	  high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+	  high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	  nextlow = Atoi_reduce_ag(nextlow);
+	} else if (mode == ATOI_NONSTRANDED) {
+	  if (genestrand > 0) {
+	    high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+	    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	    nextlow = Atoi_reduce_tc(nextlow);
+	  } else {
+	    high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+	    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	    nextlow = Atoi_reduce_ag(nextlow);
+	  }
+	} else if (mode == TTOC_STRANDED) {
+	  high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+	  high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	  nextlow = Atoi_reduce_tc(nextlow);
+	} else if (mode == TTOC_NONSTRANDED) {
+	  if (genestrand > 0) {
+	    high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+	    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	    nextlow = Atoi_reduce_ag(nextlow);
 	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+	    high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+	    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	    nextlow = Atoi_reduce_tc(nextlow);
 	  }
 	}
 
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
-
-	chrpos = store_6mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
+	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
+	current = _mm_xor_si128(current,invert3);
+      }
+      
+      nextlow_rc = ~nextlow;	/* Take from this loop */
+#if defined(HAVE_SSSE3)
+      next = _mm_alignr_epi8(current,_mm_set1_epi32(nextlow_rc),12);
+#elif 0
+      /* Previous solution for SSE4.1 */
+      temp = _mm_insert_epi32(current,nextlow_rc,0x03);
+      next = _mm_shuffle_epi32(temp,0x93);
+#else
+      /* Solution for SSE2 */
+      next = _mm_set_epi32(~high0,~low1,~high1,nextlow_rc);
+#endif
+
+      if (indexsize == 9) {
+	extract_9mers_rev_simd_64(array,current,next);
+      } else if (indexsize == 8) {
+	extract_8mers_rev_simd_64(array,current,next);
+      } else if (indexsize == 7) {
+	extract_7mers_rev_simd_64(array,current,next);
+      } else if (indexsize == 6) {
+	extract_6mers_rev_simd_64(array,current,next);
+      } else if (indexsize == 5) {
+	extract_5mers_rev_simd_64(array,current,next);
+      } else {
+	abort();
       }
+      count_fwdrev_simd_n(counts,(Genomecomp_T *) array,64);
 
-    } else if (indexsize == 5) {
-      while (ptr + 3 <= endptr) {
+      ptr += 6;
+    }
+
+    if (ptr + 3 <= endptr) {
 #ifdef WORDS_BIGENDIAN
-	high = Bigendian_convert_uint(ref_blocks[ptr]);
-	low = Bigendian_convert_uint(ref_blocks[ptr+1]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+      high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+      /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high = ref_blocks[ptr];
-	low = ref_blocks[ptr+1];
-	nextlow = ref_blocks[ptr+4];
+      high1 = ref_blocks[ptr];
+      /* low1 = ref_blocks[ptr+1]; */
+      nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	low_rc = ~low;
-	high_rc = ~high;
-	nextlow_rc = ~nextlow;
 
-	chrpos = store_5mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc);
-	ptr += 3;
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
+	} else {
+	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
+	}
+      } else if (mode == ATOI_STRANDED) {
+	high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+	}
+      } else if (mode == TTOC_STRANDED) {
+	high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+	} else {
+	  high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+	}
+      }
+      
+      /* low1_rc = ~low1; */
+      low1_rc = nextlow_rc;
+      
+      nextlow_rc = ~nextlow;
+      high1_rc = ~high1;
+      
+      if (indexsize == 9) {
+	count_9mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc);
+      } else if (indexsize == 8) {
+	count_8mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc);
+      } else if (indexsize == 7) {
+	count_7mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc);
+      } else if (indexsize == 6) {
+	count_6mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc);
+      } else if (indexsize == 5) {
+	count_5mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc);
+      } else {
+	abort();
       }
 
-    } else {
-      abort();
+      ptr += 3;
     }
 
 
@@ -47942,38 +31695,59 @@ store_positions_rev_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
     assert(ptr == endptr);
 
 #ifdef WORDS_BIGENDIAN
-    high = Bigendian_convert_uint(ref_blocks[ptr]);
-    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+    /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
     nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-    high = ref_blocks[ptr];
-    low = ref_blocks[ptr+1];
+    high1 = ref_blocks[ptr];
+    /* low1 = ref_blocks[ptr+1]; */
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
-      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
-	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+	high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
       } else {
-	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+	high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
       }
     }
 
-    low_rc = ~low;
-    high_rc = ~high;
+    /* low1_rc = ~low1; */
+    low1_rc = nextlow_rc;
+
     nextlow_rc = ~nextlow;
+    high1_rc = ~high1;
 
     if (indexsize == 9) {
-      chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else {
       abort();
     }
@@ -47982,31 +31756,17 @@ store_positions_rev_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Cou
   return;
 }
 #endif
+ 
 
-#ifdef USE_SIMD_FOR_COUNTS
+#ifndef HAVE_SSE2
 static void
-store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, int indexsize,
-			  Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
-			  int genestrand) {
+store_positions_rev_std (Chrpos_T *table, UINT4 *positions, Count_T *counts, int indexsize,
+			 Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
+			 int genestrand) {
   int startdiscard, enddiscard;
-  Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow;
-  Genomecomp_T low1_rc, high1_rc, high0, low1, high1;
-  __m128i current, next, invert3;
-  __m128i array[16];
-#ifdef HAVE_SSE4_1
-  __m128i temp;
-#else
-  Genomecomp_T low0_rc, high0_rc;
-#endif
-#ifdef HAVE_AVX2
-  __m256i array256[16];
-  Genomecomp_T low2, high2, low3, high3;
-  __m256i current256, next256, temp256, shift256;
-  __m256i biginvert3;
-#endif
-
+  Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc,
+    low, high, nextlow;
 
-  debug(printf("Starting store_positions_rev_simd\n"));
 
   if (left_plus_length < (Univcoord_T) indexsize) {
     left_plus_length = 0;
@@ -48020,49 +31780,62 @@ store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
   startdiscard = left % 32; /* (left+pos5) % 32 */
   enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
   
-  invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
-#ifdef HAVE_AVX2
-  biginvert3 = _mm256_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
-  shift256 = _mm256_setr_epi32(7,0,1,2,3,4,5,6);
-#endif
-
   if (left_plus_length <= left) {
     /* Skip */
 
   } else if (startptr == endptr) {
 #ifdef WORDS_BIGENDIAN
-    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-    low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
     nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-    high1 = ref_blocks[ptr];
-    low1 = ref_blocks[ptr+1];
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
-      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
-	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
       } else {
-	high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
       }
     }
 
-    low1_rc = ~low1;
-    high1_rc = ~high1;
+    low_rc = ~low;
+    high_rc = ~high;
     nextlow_rc = ~nextlow;
 
     if (indexsize == 9) {
-      chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+      chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+      chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+      chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+      chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+      chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard);
     } else {
       fprintf(stderr,"indexsize %d not supported\n",indexsize);
       abort();
@@ -48073,38 +31846,57 @@ store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
 
     /* Start block */
 #ifdef WORDS_BIGENDIAN
-    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-    low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
     nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-    high1 = ref_blocks[ptr];
-    low1 = ref_blocks[ptr+1];
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
-      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
-	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
       } else {
-	high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
       }
     }
 
+    low_rc = ~low;
+    high_rc = ~high;
     nextlow_rc = ~nextlow;
-    low1_rc = ~low1;
-    high1_rc = ~high1;
 
     if (indexsize == 9) {
-      chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+      chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
     } else {
       fprintf(stderr,"indexsize %d not supported\n",indexsize);
       abort();
@@ -48113,718 +31905,561 @@ store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
     ptr += 3;
 
     /* Middle blocks */
-    if (indexsize == 9) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
-
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
-
-	extract_9mers_rev_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
-	ptr += 12;
-      }
-#endif
-
-      while (ptr + 6 <= endptr) {
+    while (ptr + 3 <= endptr) {
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+      high = Bigendian_convert_uint(ref_blocks[ptr]);
+      low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
+      high = ref_blocks[ptr];
+      low = ref_blocks[ptr+1];
+      nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
-	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+	} else {
+	  high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+	}
+      } else if (mode == ATOI_STRANDED) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+	}
+      } else if (mode == TTOC_STRANDED) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+	} else {
+	  high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+	}
+      }
+      
+      low_rc = ~low;
+      high_rc = ~high;
+      nextlow_rc = ~nextlow;
+      
+      if (indexsize == 9) {
+	chrpos = store_9mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc);
+      } else {
+	abort();
+      }
 
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
+      ptr += 3;
+    }
 
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
-#endif
 
-	extract_9mers_rev_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
-	ptr += 6;
-      }
+    /* End block */
+    assert(ptr == endptr);
 
-      if (ptr + 3 <= endptr) {
 #ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+    high = Bigendian_convert_uint(ref_blocks[ptr]);
+    low = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
+    high = ref_blocks[ptr];
+    low = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
-
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
-
-	chrpos = store_9mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
       }
+    } else if (mode == TTOC_STRANDED) {
+      high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow);
+      } else {
+	high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow);
+      }
+    }
 
+    low_rc = ~low;
+    high_rc = ~high;
+    nextlow_rc = ~nextlow;
+
+    if (indexsize == 9) {
+      chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 8) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
+      chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 7) {
+      chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 6) {
+      chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+    } else if (indexsize == 5) {
+      chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+    } else {
+      abort();
+    }
+  }
+  
+  return;
+}
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
-
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
 
-	extract_8mers_rev_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
-	ptr += 12;
-      }
+#ifdef HAVE_SSE2
+static void
+store_positions_rev_simd (Chrpos_T *table, UINT4 *positions, Count_T *counts, int indexsize,
+			  Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos,
+			  int genestrand) {
+  int startdiscard, enddiscard;
+  Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow;
+  Genomecomp_T low1_rc, high1_rc, high0, low1, high1;
+  __m128i current, a, b, next, invert3, invert4;
+#ifdef HAVE_AVX2
+  Genomecomp_T low2, high2, low3, high3;
+  __m256i current256, a256, b256, c256, d256, next256, temp256, shift256;
+  __m256i biginvert3, biginvert4;
 #endif
-
-      while (ptr + 6 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
+#ifdef HAVE_AVX512
+  __m128i temp;
+  Genomecomp_T low4, high4, low5, high5, low6, high6, low7, high7;
+  __m512i current512, a512, b512, next512, temp512, shift512;
+  __m512i hugeinvert3, hugeinvert4;
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
-	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
 
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
+  debug(printf("Starting store_positions_rev_simd\n"));
+
+  if (left_plus_length < (Univcoord_T) indexsize) {
+    left_plus_length = 0;
+  } else {
+    left_plus_length -= indexsize;
+  }
+  chrpos += (left_plus_length - left); /* We are starting from the right */
 
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
+  ptr = startptr = left/32U*3;
+  endptr = left_plus_length/32U*3;
+  startdiscard = left % 32; /* (left+pos5) % 32 */
+  enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */
+  
+  invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
+  invert4 = _mm_set1_epi32(0xFFFFFFFF);
+#ifdef HAVE_AVX2
+  biginvert3 = _mm256_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF);
+  biginvert4 = _mm256_set1_epi32(0xFFFFFFFF);
+  shift256 = _mm256_setr_epi32(7,0,1,2,3,4,5,6);
+#endif
+#ifdef HAVE_AVX512
+  hugeinvert3 = _mm512_inserti64x4(_mm512_set1_epi32(0xFFFFFFFF), biginvert3, 0x1);
+  hugeinvert4 = _mm512_set1_epi32(0xFFFFFFFF);
+  shift512 = _mm512_setr_epi32(15,0,1,2,3,4,5,6, 7,8,9,10,11,12,13,14);
 #endif
 
-	extract_8mers_rev_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
-	ptr += 6;
-      }
+  if (left_plus_length <= left) {
+    /* Skip */
 
-      if (ptr + 3 <= endptr) {
+  } else if (startptr == endptr) {
 #ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+    low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
+    high1 = ref_blocks[ptr];
+    low1 = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
 
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
-
-	chrpos = store_8mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
       }
+    } else if (mode == ATOI_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    }
 
-    } else if (indexsize == 7) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
+    low1_rc = ~low1;
+    high1_rc = ~high1;
+    nextlow_rc = ~nextlow;
 
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+    if (indexsize == 9) {
+      /* chrpos = */ store_9mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+    } else if (indexsize == 8) {
+      /* chrpos = */ store_8mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+    } else if (indexsize == 7) {
+      /* chrpos = */ store_7mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+    } else if (indexsize == 6) {
+      /* chrpos = */ store_6mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+    } else if (indexsize == 5) {
+      /* chrpos = */ store_5mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard);
+    } else {
+      fprintf(stderr,"indexsize %d not supported\n",indexsize);
+      abort();
+    }
 
-	extract_7mers_rev_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
-	ptr += 12;
-      }
-#endif
+  } else {
+    /* Genome_print_blocks(ref_blocks,left,left+16); */
 
-      while (ptr + 6 <= endptr) {
+    /* Start block */
 #ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+    high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+    low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);
+    nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
+    high1 = ref_blocks[ptr];
+    low1 = ref_blocks[ptr+1];
+    nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
-	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
+      high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+    } else if (mode == CMET_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow);
+      } else {
+	high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow);
+      }
+    } else if (mode == ATOI_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow);
+      }
+    }
 
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
+    nextlow_rc = ~nextlow;
+    low1_rc = ~low1;
+    high1_rc = ~high1;
 
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
-#endif
+    if (indexsize == 9) {
+      chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 8) {
+      chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 7) {
+      chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 6) {
+      chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else if (indexsize == 5) {
+      chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31);
+    } else {
+      fprintf(stderr,"indexsize %d not supported\n",indexsize);
+      abort();
+    }
 
-	extract_7mers_rev_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
-	ptr += 6;
-      }
+    ptr += 3;
 
-      if (ptr + 3 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);*/
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
-#else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
+    /* Middle blocks */
+#ifdef HAVE_AVX512
+    while (ptr + 24 <= endptr) {
+
+      if (mode == STANDARD) {
+	a512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr]));
+	b512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr+7]));
+	current512 = _mm512_permutex2var_epi32(a512,_mm512_setr_epi32(16+14, 16+15, 16+11, 16+12, 16+8, 16+9, 12, 13, 9, 10, 6, 7, 3, 4, 0, 1), b512);
+	current512 = _mm512_xor_si512(current512,hugeinvert4);
+	nextlow = ref_blocks[ptr+25];
+      } else {
+	current512 = apply_mode_rev_512(&(ref_blocks[ptr]),mode,genestrand,&nextlow,nextlow_rc);
+	current512 = _mm512_xor_si512(current512,hugeinvert3);
+      }
 
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
+      nextlow_rc = ~nextlow;	/* Take from this loop */
+      
+      current = _mm512_extracti32x4_epi32(current512,3);
+      temp = _mm_insert_epi32(current,nextlow_rc,0x03);			
+      temp512 = _mm512_inserti32x4(current512,temp,0x03);
+      next512 = _mm512_permutexvar_epi32(shift512,temp512); /* shift goes first! */
+      
+      if (indexsize == 9) {
+	chrpos = store_9mers_rev_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_rev_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_rev_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_rev_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_rev_simd_256(chrpos,table,positions,counts,current512,next512);
+      } else {
+	abort();
+      }
 
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
+      ptr += 24;
+    }
+#endif
 
-	chrpos = store_7mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
-      }
 
-    } else if (indexsize == 6) {
 #ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
+    while (ptr + 12 <= endptr) {
+      
+      if (mode == STANDARD) {
+	a256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr]));
+	b256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr+3]));
+	c256 = _mm256_unpacklo_epi64(b256,a256);
+	d256 = _mm256_unpackhi_epi64(b256,a256);
+	current256 = _mm256_permute2x128_si256(c256, d256, 0x03);
+	current256 = _mm256_xor_si256(current256,biginvert4);
 	nextlow = ref_blocks[ptr+13];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
+      } else {
+	current256 = apply_mode_rev_256(&(ref_blocks[ptr]),mode,genestrand,&nextlow,nextlow_rc);
 	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
-
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+      }
 
-	extract_6mers_rev_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
-	ptr += 12;
+      nextlow_rc = ~nextlow;	/* Take from this loop */
+      
+#if 0
+      /* Doesn't work, because performs shift within 128-bit lanes */
+      next256 = _mm256_alignr_epi8(current256,_mm256_set1_epi32(nextlow_rc),28);
+#else
+      temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
+      next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
+#endif
+      
+      if (indexsize == 9) {
+	chrpos = store_9mers_rev_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_rev_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_rev_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_rev_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_rev_simd_128(chrpos,table,positions,counts,current256,next256);
+      } else {
+	abort();
       }
+
+      ptr += 12;
+    }
 #endif
 
-      while (ptr + 6 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
+    while (ptr + 6 <= endptr) {
+      
+      if (mode == STANDARD) {
+#ifdef HAVE_SSSE3
+	a = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr]));
+	b = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr+3]));
+	current = _mm_unpacklo_epi64(b,a);
+	current = _mm_xor_si128(current,invert4);
+	nextlow = ref_blocks[ptr+7];
 #else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
+	/* Solution for SSE2.  Need separate values to construct "next" */
+	high0 = ref_blocks[ptr]; /* low0 = ref_blocks[ptr+1]; */
+	high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4];
 	nextlow = ref_blocks[ptr+7];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
 
 	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
 	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
-
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
-
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
 #endif
 
-	extract_6mers_rev_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
-	ptr += 6;
-      }
-
-      if (ptr + 3 <= endptr) {
+      } else {
 #ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+	high0 = Bigendian_convert_uint(ref_blocks[ptr]); /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
+	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
 #else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
+	high0 = ref_blocks[ptr]; /* low0 = ref_blocks[ptr+1]; */
+	high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4];
+	nextlow = ref_blocks[ptr+7];
 #endif
-	if (mode == CMET_STRANDED) {
-	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
-	}
-
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
-
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
-
-	chrpos = store_6mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
-      }
 
-    } else if (indexsize == 5) {
-#ifdef HAVE_AVX2
-      while (ptr + 12 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	high2 = Bigendian_convert_uint(ref_blocks[ptr+6]);
-	low2 = Bigendian_convert_uint(ref_blocks[ptr+7]);
-	high3 = Bigendian_convert_uint(ref_blocks[ptr+9]);
-	low3 = Bigendian_convert_uint(ref_blocks[ptr+10]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	high2 = ref_blocks[ptr+6];
-	low2 = ref_blocks[ptr+7];
-	high3 = ref_blocks[ptr+9];
-	low3 = ref_blocks[ptr+10];
-	nextlow = ref_blocks[ptr+13];
-#endif
-	if (mode == CMET_STRANDED) {
+	if (mode == STANDARD) {
+	  /* Skip */
+	} else if (mode == CMET_STRANDED) {
 	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
 	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	  high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
 	  nextlow = Cmet_reduce_ga(nextlow);
 	} else if (mode == CMET_NONSTRANDED) {
 	  if (genestrand > 0) {
 	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
 	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2);
-	    high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3);
 	    nextlow = Cmet_reduce_ct(nextlow);
 	  } else {
 	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
 	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2);
-	    high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3);
 	    nextlow = Cmet_reduce_ga(nextlow);
 	  }
-	}
-
-	current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3);
-	current256 = _mm256_xor_si256(current256,biginvert3);
-	nextlow_rc = ~nextlow;
-
-	temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07);
-	next256 = _mm256_permutevar8x32_epi32(temp256,shift256);
-
-	extract_5mers_rev_simd_128(array256,current256,next256);
-	chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256);
-	ptr += 12;
-      }
-#endif
-
-      while (ptr + 6 <= endptr) {
-#ifdef WORDS_BIGENDIAN
-	high0 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	high1 = Bigendian_convert_uint(ref_blocks[ptr+3]);
-	low1 = Bigendian_convert_uint(ref_blocks[ptr+4]);
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]);
-#else
-	high0 = ref_blocks[ptr];
-	/* low0 = ref_blocks[ptr+1]; */
-	high1 = ref_blocks[ptr+3];
-	low1 = ref_blocks[ptr+4];
-	nextlow = ref_blocks[ptr+7];
-#endif
-	if (mode == CMET_STRANDED) {
-	  high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	  high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	  nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
+	} else if (mode == ATOI_STRANDED) {
+	  high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+	  high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	  nextlow = Atoi_reduce_ag(nextlow);
+	} else if (mode == ATOI_NONSTRANDED) {
 	  if (genestrand > 0) {
-	    high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */
-	    high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1);
-	    nextlow = Cmet_reduce_ct(nextlow);
+	    high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+	    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	    nextlow = Atoi_reduce_tc(nextlow);
 	  } else {
-	    high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */
-	    high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1);
-	    nextlow = Cmet_reduce_ga(nextlow);
+	    high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+	    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	    nextlow = Atoi_reduce_ag(nextlow);
+	  }
+	} else if (mode == TTOC_STRANDED) {
+	  high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+	  high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	  nextlow = Atoi_reduce_tc(nextlow);
+	} else if (mode == TTOC_NONSTRANDED) {
+	  if (genestrand > 0) {
+	    high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */
+	    high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1);
+	    nextlow = Atoi_reduce_ag(nextlow);
+	  } else {
+	    high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */
+	    high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1);
+	    nextlow = Atoi_reduce_tc(nextlow);
 	  }
 	}
 
 	current = _mm_set_epi32(nextlow_rc,high0,low1,high1);
 	current = _mm_xor_si128(current,invert3);
-	nextlow_rc = ~nextlow;
-#ifdef HAVE_SSE4_1
-	/* high0_rc = _mm_extract_epi32(current,2); */
-	/* low1_rc = _mm_extract_epi32(current,1); */
-	/* high1_rc = _mm_extract_epi32(current,0); */
-
-	temp = _mm_insert_epi32(current,nextlow_rc,0x03);
-	next = _mm_shuffle_epi32(temp,0x93);
-#else
-	high0_rc = ~high0;
-	low1_rc = ~low1;
-	high1_rc = ~high1;
-
-	next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc);
-#endif
+      }
 
-	extract_5mers_rev_simd(array,current,next);
-	chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array);
-	ptr += 6;
+      nextlow_rc = ~nextlow;	/* Take from this loop */
+
+#if defined(HAVE_SSSE3)
+      next = _mm_alignr_epi8(current,_mm_set1_epi32(nextlow_rc),12);
+#elif 0
+      /* Previous solution for SSE4.1 */
+      temp = _mm_insert_epi32(current,nextlow_rc,0x03);
+      next = _mm_shuffle_epi32(temp,0x93);
+#else
+      /* Solution for SSE2 */
+      next = _mm_set_epi32(~high0,~low1,~high1,nextlow_rc);
+#endif
+
+      if (indexsize == 9) {
+	chrpos = store_9mers_rev_simd_64(chrpos,table,positions,counts,current,next);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_rev_simd_64(chrpos,table,positions,counts,current,next);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_rev_simd_64(chrpos,table,positions,counts,current,next);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_rev_simd_64(chrpos,table,positions,counts,current,next);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_rev_simd_64(chrpos,table,positions,counts,current,next);
+      } else {
+	abort();
       }
 
-      if (ptr + 3 <= endptr) {
+      ptr += 6;
+    }
+
+    if (ptr + 3 <= endptr) {
 #ifdef WORDS_BIGENDIAN
-	high1 = Bigendian_convert_uint(ref_blocks[ptr]);
-	/* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
-	nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
+      high1 = Bigendian_convert_uint(ref_blocks[ptr]);
+      /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */
+      nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]);
 #else
-	high1 = ref_blocks[ptr];
-	/* low1 = ref_blocks[ptr+1]; */
-	nextlow = ref_blocks[ptr+4];
+      high1 = ref_blocks[ptr];
+      /* low1 = ref_blocks[ptr+1]; */
+      nextlow = ref_blocks[ptr+4];
 #endif
-	if (mode == CMET_STRANDED) {
+
+      if (mode == STANDARD) {
+	/* Skip */
+      } else if (mode == CMET_STRANDED) {
+	high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
+      } else if (mode == CMET_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
+	} else {
 	  high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	} else if (mode == CMET_NONSTRANDED) {
-	  if (genestrand > 0) {
-	    high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow);
-	  } else {
-	    high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
-	  }
 	}
-
-	/* low1_rc = ~low1; */
-	low1_rc = nextlow_rc;
-
-	nextlow_rc = ~nextlow;
-	high1_rc = ~high1;
-
-	chrpos = store_5mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc);
-	ptr += 3;
+      } else if (mode == ATOI_STRANDED) {
+	high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == ATOI_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+	}
+      } else if (mode == TTOC_STRANDED) {
+	high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+      } else if (mode == TTOC_NONSTRANDED) {
+	if (genestrand > 0) {
+	  high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+	} else {
+	  high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+	}
+      }
+      
+      /* low1_rc = ~low1; */
+      low1_rc = nextlow_rc;
+      
+      nextlow_rc = ~nextlow;
+      high1_rc = ~high1;
+      
+      if (indexsize == 9) {
+	chrpos = store_9mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc);
+      } else if (indexsize == 8) {
+	chrpos = store_8mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc);
+      } else if (indexsize == 7) {
+	chrpos = store_7mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc);
+      } else if (indexsize == 6) {
+	chrpos = store_6mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc);
+      } else if (indexsize == 5) {
+	chrpos = store_5mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc);
+      } else {
+	abort();
       }
 
-    } else {
-      abort();
+      ptr += 3;
     }
 
 
@@ -48840,7 +32475,10 @@ store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
     /* low1 = ref_blocks[ptr+1]; */
     nextlow = ref_blocks[ptr+4];
 #endif
-    if (mode == CMET_STRANDED) {
+
+    if (mode == STANDARD) {
+      /* Skip */
+    } else if (mode == CMET_STRANDED) {
       high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
     } else if (mode == CMET_NONSTRANDED) {
       if (genestrand > 0) {
@@ -48848,6 +32486,22 @@ store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
       } else {
 	high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow);
       }
+    } else if (mode == ATOI_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == ATOI_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+      }
+    } else if (mode == TTOC_STRANDED) {
+      high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+    } else if (mode == TTOC_NONSTRANDED) {
+      if (genestrand > 0) {
+	high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow);
+      } else {
+	high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow);
+      }
     }
 
     /* low1_rc = ~low1; */
@@ -48857,15 +32511,15 @@ store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
     high1_rc = ~high1;
 
     if (indexsize == 9) {
-      chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 8) {
-      chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 7) {
-      chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 6) {
-      chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else if (indexsize == 5) {
-      chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
+      chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard);
     } else {
       abort();
     }
@@ -48883,16 +32537,146 @@ store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Co
 #define POLY_T 0xFFFF
 
 
-#ifdef HAVE_AVX2
+#ifdef HAVE_AVX512
 static Chrpos_T *
-allocate_positions (UINT4 *pointers, UINT4 *positions,
-		    Inquery_T *inquery, Count_T *counts, int oligospace) {
+allocate_positions (UINT4 *__restrict__ positions,
+		    Inquery_T *__restrict__ inquery, Count_T *counts, int oligospace) {
+  Chrpos_T *table;
+  UINT4 p;
+  int totalcounts = 0;
+  int i, j, k;
+  __m512i *inquery_ptr, *counts_ptr, *end_ptr, qcounts;
+  __m512i terms_ptr[1];
+  Count_T *terms;
+  int *nskip, *nskip_ptr;
+
+#if 0
+  /* Causes problems with new algorithm */
+  inquery[POLY_A & mask] = INQUERY_FALSE;
+  inquery[POLY_C & mask] = INQUERY_FALSE;
+  inquery[POLY_G & mask] = INQUERY_FALSE;
+  inquery[POLY_T & mask] = INQUERY_FALSE;
+#endif
+
+  /* nskip is a run-length of zero counts, which allows faster processing the second time through */
+  nskip_ptr = nskip = (int *) MALLOCA((oligospace/SIMD_NELTS + 1) * sizeof(int));
+  *nskip_ptr = 0;
+
+  inquery_ptr = (__m512i *) inquery;
+  counts_ptr = (__m512i *) counts;
+  end_ptr = &(counts_ptr[oligospace/SIMD_NELTS]);
+  terms = (Count_T *) terms_ptr;
+
+  i = 0;
+  while (counts_ptr < end_ptr) {
+    debug(printf("%d\n",i));
+    debug(print_counts_512(*counts_ptr,"counts"));
+    qcounts = _mm512_and_si512(*counts_ptr,*inquery_ptr++); /* counts in query (zeroed if INQUERY_FALSE, which can happen if count > MAXCOUNT) */
+    _mm512_store_si512(counts_ptr++,qcounts); /* and store back, so we don't need inquery or overabundant any more */
+    if (_mm512_test_epi32_mask(qcounts,qcounts) == 0) {
+      /* All counts are zero, so incrementing nskip */
+      (*nskip_ptr) += 1;
+
+    } else {
+      /* A valid count found */
+      _mm512_store_si512(terms_ptr,qcounts);
+      for (k = 0; k < SIMD_NELTS; k++) {
+	totalcounts += terms[k];
+      }
+      *(++nskip_ptr) = 0;	/* Advance ptr and initialize */
+    }
+
+    i += SIMD_NELTS;
+  }
+
+#if 0
+  /* For debugging */
+  totalcounts_old = 0;
+  for (i = 0; i < oligospace; i++) {
+    if (inquery[i] == INQUERY_TRUE) {
+      totalcounts_old += counts[i];
+    }
+  }
+
+  fprintf(stderr,"Old method %d, new method %d\n",totalcounts_old,totalcounts);
+  if (totalcounts != totalcounts_old) {
+    abort();
+  }
+#endif
+
+  debug(printf("totalcounts is %d\n",totalcounts));
+  if (totalcounts == 0) {
+    table = (Chrpos_T *) NULL;
+  } else {
+    /* Need to assign positions[0] so we can free the space */
+    /* pointers_end = &(pointers[-1]); */ /* or pointers_allocated[0] */
+    table = (Chrpos_T *) MALLOC(totalcounts * sizeof(Chrpos_T));
+    p = 0;
+
+    i = 0;
+    nskip_ptr = nskip;
+    j = *nskip_ptr++;
+    while (i + j*SIMD_NELTS < oligospace) {
+#if 0
+      while (--j >= 0) {
+	positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+	positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+      }
+#elif 0
+      /* Not necessary to assign since we check for counts[i] == 0 */
+      pointers_end[i] = /* positions[i] = */ p;
+      i += j*SIMD_NELTS;
+#else
+      i += j*SIMD_NELTS;
+#endif
+
+      for (k = 0; k < SIMD_NELTS; k++) {
+        /* pointers_end[i] = */ positions[i] = p;
+	p += counts[i++];
+      }
+
+      j = *nskip_ptr++;
+    }
+
+#if 0
+    while (--j >= 0) {
+      /* Not necessary to assign since we check for counts[i] == 0 */
+      positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+      positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
+    }
+#elif 0
+    if (j > 0) {
+      pointers_end[i] = /* positions[i] = */ p;
+      /* i += j*SIMD_NELTS; */
+    }
+#endif
+  }
+  
+#if 0
+  /* Faster to assign each individual pointer above */
+  memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(Chrpos_T *));
+#endif
+  /* pointers[oligospace-1] = p; */	/* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */
+
+  /* dump_allocations(positions,counts,oligospace,indexsize,positions_space); */
+
+  FREEA(nskip);
+
+  return table;
+}
+
+
+#elif defined(HAVE_AVX2)
+static Chrpos_T *
+allocate_positions (UINT4 *__restrict__ positions,
+		    Inquery_T *__restrict__ inquery, Count_T *counts, int oligospace) {
   Chrpos_T *table;
-  UINT4 *pointers_end, p;
+  UINT4 p;
   int totalcounts = 0;
   int i, j, k;
   __m256i *inquery_ptr, *counts_ptr, *end_ptr, qcounts;
   __m256i terms_ptr[1];
+  __m256i _overflowp, _maxcounts;
   Count_T *terms;
   int *nskip, *nskip_ptr;
 
@@ -48913,21 +32697,35 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
   end_ptr = &(counts_ptr[oligospace/SIMD_NELTS]);
   terms = (Count_T *) terms_ptr;
 
+#ifdef CHECK_FOR_OVERFLOW
+  _maxcounts = _mm256_set1_epi8(MAXCOUNT);
+#endif
+
   i = 0;
   while (counts_ptr < end_ptr) {
     debug(printf("%d\n",i));
-    debug(print_counts(*counts_ptr,"counts"));
-    qcounts = _mm256_and_si256(*counts_ptr,*inquery_ptr++); /* counts in query (zeroed if INQUERY_FALSE, which can happen if count > MAXCOUNT) */
-    _mm256_store_si256(counts_ptr++,qcounts); /* and store back, so we don't need inquery or overabundant any more */
+    qcounts = _mm256_load_si256(counts_ptr);
+    debug(print_counts_256(qcounts,"counts"));
+    qcounts = _mm256_and_si256(qcounts,*inquery_ptr++); /* counts in query (zeroed if INQUERY_FALSE, which can happen if count > MAXCOUNT) */
+    debug(print_counts_256(qcounts,"qcounts"));
     if (_mm256_testz_si256(qcounts,qcounts)) {
-      /* All counts are zero, so incrementing nskip */
+      /* All counts are zero, so incrementing nskip, but need to store back */
+      _mm256_stream_si256(counts_ptr++,qcounts); /* Store back, so we don't need inquery or overabundant any more */
       (*nskip_ptr) += 1;
-
+	
     } else {
       /* A valid count found */
+#ifdef CHECK_FOR_OVERFLOW
+      _overflowp = _mm256_cmpgt_epi8(qcounts,_maxcounts);
+      debug(print_counts_256(_overflowp,"overflow"));
+      qcounts = _mm256_andnot_si256(_overflowp,qcounts); /* Remove counts that have overflowed */
+      debug(print_counts_256(qcounts,"qcounts"));
+#endif
+
+      _mm256_stream_si256(counts_ptr++,qcounts); /* Store back, so we don't need inquery or overabundant any more */
       _mm256_store_si256(terms_ptr,qcounts);
       for (k = 0; k < SIMD_NELTS; k++) {
-	totalcounts += terms[k];
+	totalcounts += (int) terms[k];
       }
       *(++nskip_ptr) = 0;	/* Advance ptr and initialize */
     }
@@ -48935,11 +32733,12 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
     i += SIMD_NELTS;
   }
 
+
 #if 0
   /* For debugging */
   totalcounts_old = 0;
   for (i = 0; i < oligospace; i++) {
-    if (inquery[i] == INQUERY_TRUE) {
+    if (inquery[i] == INQUERY_TRUE && counts[i] > 0 && counts[i] <= MAXCOUNT) {
       totalcounts_old += counts[i];
     }
   }
@@ -48955,7 +32754,7 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
     table = (Chrpos_T *) NULL;
   } else {
     /* Need to assign positions[0] so we can free the space */
-    pointers_end = &(pointers[-1]);  /* or pointers_allocated[0] */
+    /* pointers_end = &(pointers[-1]); */ /* or pointers_allocated[0] */
     table = (Chrpos_T *) MALLOC(totalcounts * sizeof(Chrpos_T));
     p = 0;
 
@@ -48968,34 +32767,108 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
 	positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
 	positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
       }
-#else
+#elif 0
       /* Not necessary to assign since we check for counts[i] == 0 */
       pointers_end[i] = /* positions[i] = */ p;
       i += j*SIMD_NELTS;
+#else
+      i += j*SIMD_NELTS;
 #endif
 
-      pointers_end[i] = positions[i] = p;		/* 0 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 0 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 1 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 2 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 3 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 4 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 5 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 6 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 7 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 8 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 9 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 10 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 11 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 12 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 13 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 14 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 15 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 16 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 17 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 1 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 18 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 2 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 19 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 3 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 20 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 4 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 21 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 5 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 22 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 6 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 23 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 7 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 24 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 25 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 26 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 27 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 28 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 29 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 30 */
+      p += counts[i++];
+
+      /* pointers_end[i] = */ positions[i] = p;		/* 31 (SIMD_NELTS - 1) in bytes */
       p += counts[i++];
 
       j = *nskip_ptr++;
@@ -49007,7 +32880,7 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
       positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
       positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
     }
-#else
+#elif 0
     if (j > 0) {
       pointers_end[i] = /* positions[i] = */ p;
       /* i += j*SIMD_NELTS; */
@@ -49019,7 +32892,7 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
   /* Faster to assign each individual pointer above */
   memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(Chrpos_T *));
 #endif
-  pointers[oligospace-1] = p;	/* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */
+  /* pointers[oligospace-1] = p; */ /* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */
 
   /* dump_allocations(positions,counts,oligospace,indexsize,positions_space); */
 
@@ -49031,10 +32904,10 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
 
 #elif defined(HAVE_SSE2)
 static Chrpos_T *
-allocate_positions (UINT4 *pointers, UINT4 *positions,
-		    Inquery_T *inquery, Count_T *counts, int oligospace) {
+allocate_positions (UINT4 *__restrict__ positions,
+		    Inquery_T *__restrict__ inquery, Count_T *__restrict__ counts, int oligospace) {
   Chrpos_T *table;
-  UINT4 *pointers_end, p;
+  UINT4 p;
   int totalcounts = 0;
   int i, j, k;
   __m128i *inquery_ptr, *counts_ptr, *end_ptr, qcounts;
@@ -49062,7 +32935,7 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
   end_ptr = &(counts_ptr[oligospace/SIMD_NELTS]);
   terms = (Count_T *) terms_ptr;
 #ifndef HAVE_SSE4_1
-  zero = _mm_set1_epi8(0);
+  zero = _mm_setzero_si128();
 #endif
 
   i = 0;
@@ -49084,32 +32957,9 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
     } else {
       /* A valid count found */
       _mm_store_si128(terms_ptr,qcounts);
-#ifdef HAVE_AVX2
-      if (_mm_extract_epi32(qcounts,0)) {
-	totalcounts += terms[0];
-      } else {
-	counts[i] = 0;
-      }
-      if (_mm_extract_epi32(qcounts,1)) {
-	totalcounts += terms[1];
-      } else {
-	counts[i+1] = 0;
-      }
-      if (_mm_extract_epi32(qcounts,2)) {
-	totalcounts += terms[2];
-      } else {
-	counts[i+2] = 0;
-      }
-      if (_mm_extract_epi32(qcounts,3)) {
-	totalcounts += terms[3];
-      } else {
-	counts[i+3] = 0;
-      }
-#else
       for (k = 0; k < SIMD_NELTS; k++) {
 	totalcounts += terms[k];
       }
-#endif
       *(++nskip_ptr) = 0;	/* Advance ptr and initialize */
     }
 
@@ -49136,7 +32986,7 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
     table = (Chrpos_T *) NULL;
   } else {
     /* Need to assign positions[0] so we can free the space */
-    pointers_end = &(pointers[-1]);  /* or pointers_allocated[0] */
+    /* pointers_end = &(pointers[-1]); */ /* or pointers_allocated[0] */
     table = (Chrpos_T *) MALLOC(totalcounts * sizeof(Chrpos_T));
     p = 0;
 
@@ -49151,74 +33001,61 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
 	positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
 	positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
       }
-#else
+#elif 0
       /* Not necessary to assign since we check for counts[i] == 0 */
       pointers_end[i] = /* positions[i] = */ p;
       i += j*SIMD_NELTS;
+#else
+      i += j*SIMD_NELTS;
 #endif
 
-#ifdef HAVE_AVX2
-      pointers_end[i] = positions[i] = p;		/* 0 */
-      p += counts[i++];
-
-      pointers_end[i] = positions[i] = p;		/* 1 */
-      p += counts[i++];
-
-      pointers_end[i] = positions[i] = p;		/* 2 */
-      p += counts[i++];
-
-      pointers_end[i] = positions[i] = p;		/* 3 */
-      p += counts[i++];
-
-#else
-      pointers_end[i] = positions[i] = p;		/* 0 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 0 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 1 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 1 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 2 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 2 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 3 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 3 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 4 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 4 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 5 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 5 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 6 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 6 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 7 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 7 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 8 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 8 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 9 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 9 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 10 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 10 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 11 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 11 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 12 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 12 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 13 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 13 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 14 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 14 */
       p += counts[i++];
 
-      pointers_end[i] = positions[i] = p;		/* 15 */
+      /* pointers_end[i] = */ positions[i] = p;		/* 15 (SIMD_NELTS - 1) in bytes */
       p += counts[i++];
-#endif
 
       j = *nskip_ptr++;
     }
@@ -49231,7 +33068,7 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
       positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
       positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p;
     }
-#else
+#elif 0
     if (j > 0) {
       pointers_end[i] = /* positions[i] = */ p;
       /* i += j*SIMD_NELTS; */
@@ -49243,7 +33080,7 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
   /* Faster to assign each individual pointer above */
   memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(Chrpos_T *));
 #endif
-  pointers[oligospace-1] = p;	/* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */
+  /* pointers[oligospace-1] = p;*/ /* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */
 
   /* dump_allocations(positions,counts,oligospace,indexsize,positions_space); */
 
@@ -49254,7 +33091,7 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
 
 #else
 static Chrpos_T *
-allocate_positions (UINT4 *pointers, UINT4 *positions,
+allocate_positions (UINT4 *positions,
 		    Inquery_T *inquery, Count_T *counts, int oligospace) {
   Chrpos_T *table;
   UINT4 p;
@@ -49263,10 +33100,10 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
 
 #if 0
   /* Causes problems with new algorithm */
-  inquery[POLY_A & mask] = false;
-  inquery[POLY_C & mask] = false;
-  inquery[POLY_G & mask] = false;
-  inquery[POLY_T & mask] = false;
+  inquery[POLY_A & mask] = INQUERY_FALSE;
+  inquery[POLY_C & mask] = INQUERY_FALSE;
+  inquery[POLY_G & mask] = INQUERY_FALSE;
+  inquery[POLY_T & mask] = INQUERY_FALSE;
 #endif
 
   for (i = 0; i < oligospace; i++) {
@@ -49293,8 +33130,8 @@ allocate_positions (UINT4 *pointers, UINT4 *positions,
       positions[i] = p;
       p += counts[i];
     }
-    memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(UINT4));
-    pointers[oligospace-1] = p;
+    /* memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(UINT4)); */
+    /* pointers[oligospace-1] = p; */
   }
 
   return table;
@@ -49318,7 +33155,7 @@ counts_compare (Count_T *counts1, Count0_T *counts2, Oligospace_T oligospace) {
 }
 
 static void
-positions_compare (Chrpos_T **positions1, Count_T *counts1, Inquery_T *inquery1,
+positions_compare (Chrpos_T *table, UINT4 *positions1, Count_T *counts1, Inquery_T *inquery1,
 		   Chrpos_T **positions2, Count0_T *counts2, Oligospace_T oligospace,
 		   int indexsize, Shortoligomer_T mask) {
   Oligospace_T i;
@@ -49334,6 +33171,8 @@ positions_compare (Chrpos_T **positions1, Count_T *counts1, Inquery_T *inquery1,
       /* Can happen if count > MAXCOUNT */
       if (i == (POLY_A & mask) || i == (POLY_C & mask) || i == (POLY_G & mask) || i == (POLY_T & mask)) {
 	/* Ignore */
+      } else if (counts2[i] == 0) {
+	/* Ignore (overabundant) */
       } else {
 	nt = shortoligo_nt(i,indexsize);
 	printf("At oligo %s (%llu), counts1 %d != counts2 %d, inquery1 %hd\n",
@@ -49343,10 +33182,10 @@ positions_compare (Chrpos_T **positions1, Count_T *counts1, Inquery_T *inquery1,
       }
     } else {
       for (hit = 0; hit < counts1[i]; hit++) {
-	if (positions1[i][hit] != positions2[i][hit]) {
+	if (table[positions1[i]+hit] != positions2[i][hit]) {
 	  nt = shortoligo_nt(i,indexsize);
 	  printf("At oligo %s (%llu), hit %d/%d, positions1 %u != positions2 %u\n",
-		 nt,(unsigned long long) i,hit,counts1[i],positions1[i][hit],positions2[i][hit]);
+		 nt,(unsigned long long) i,hit,counts1[i],table[positions1[i]+hit],positions2[i][hit]);
 	  FREE(nt);
 	  abort();
 	}
@@ -49663,7 +33502,7 @@ Oligoindex_set_inquery (int *badoligos, int *repoligos, int *trimoligos, int *tr
       masked = oligo & this->mask;
       noligos++;
       debug(nt = shortoligo_nt(oligo,indexsize);
-	    printf("At querypos %d, oligo %s seen\n",i,nt);
+	    printf("At querypos %d, oligo %s (%08X fwd, %08X rev) seen\n",i,nt,masked,~oligo & this->mask);
 	    FREE(nt));
 
       this->counts[masked] += 1;
@@ -50010,6 +33849,8 @@ void
 Oligoindex_hr_tally (T this, Univcoord_T mappingstart, Univcoord_T mappingend, bool plusp,
 		     char *queryuc_ptr, int querystart, int queryend, Chrpos_T chrpos, int genestrand) {
   int badoligos, repoligos, trimoligos, trim_start, trim_end;
+  Count_T *working_counts;
+  Oligospace_T i;
 #ifdef DEBUG14
   Count0_T *counts_old;
   Chrpos_T **positions_old;
@@ -50027,31 +33868,42 @@ Oligoindex_hr_tally (T this, Univcoord_T mappingstart, Univcoord_T mappingend, b
   if (plusp == true) {
     debug0(printf("plus, origin is %u\n",chrpos));
 
-#ifdef USE_SIMD_FOR_COUNTS
-    count_positions_fwd_simd(this->counts,this->inquery,this->indexsize,mappingstart,mappingend,genestrand);
+#ifdef HAVE_SSE2
+    count_positions_fwd_simd(this->counts,this->indexsize,mappingstart,mappingend,genestrand);
 #else
-    count_positions_fwd_std(this->counts,this->inquery,this->indexsize,mappingstart,mappingend,genestrand);
+    count_positions_fwd_std(this->counts,this->indexsize,mappingstart,mappingend,genestrand);
 #endif
     
-    if ((this->table = allocate_positions(this->pointers,this->positions,this->inquery,this->counts,
+    if ((this->table = allocate_positions(this->positions,this->inquery,this->counts,
 					  this->oligospace)) != NULL) {
+      working_counts = (Count_T *) MALLOC(this->oligospace*sizeof(Count_T));
+      memcpy((void *) working_counts,(const void *) this->counts,this->oligospace*sizeof(Count_T));
 
-#ifdef USE_SIMD_FOR_COUNTS
-      store_positions_fwd_simd(this->table,this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend,
+#ifdef HAVE_SSE2
+      store_positions_fwd_simd(this->table,this->positions,working_counts,this->indexsize,mappingstart,mappingend,
 			       chrpos,genestrand);
 #else
-      store_positions_fwd_std(this->table,this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend,
+      store_positions_fwd_std(this->table,this->positions,working_counts,this->indexsize,mappingstart,mappingend,
 			      chrpos,genestrand);
 #endif
 
+#ifdef CHECK_ASSERTIONS
+      /* Check if storage routine matches counting routine */
+      for (i = 0; i < this->oligospace; i++) {
+	assert(working_counts[i] == 0);
+      }
+#endif
+
+      FREE(working_counts);
+
       debug9(printf("plus, origin is %u\n",chrpos));
       debug9(dump_positions(this->table,this->positions,this->counts,this->inquery,this->oligospace,this->indexsize));
   
 #ifdef DEBUG14
       positions_old = Oligoindex_old_tally(&counts_old,mappingstart,mappingend,plusp,
-					   queryuc_ptr,querylength,chrpos,genestrand,
+					   queryuc_ptr,querystart,queryend,chrpos,genestrand,
 					   this->oligospace,this->indexsize,this->mask);
-      positions_compare(this->positions,this->counts,this->inquery,
+      positions_compare(this->table,this->positions,this->counts,this->inquery,
 			positions_old,counts_old,this->oligospace,this->indexsize,this->mask);
       FREE(counts_old);
       FREE(positions_old[0]);
@@ -50062,30 +33914,42 @@ Oligoindex_hr_tally (T this, Univcoord_T mappingstart, Univcoord_T mappingend, b
   } else {
     debug0(printf("minus, origin is %u\n",chrpos));
 
-#ifdef USE_SIMD_FOR_COUNTS
-    count_positions_rev_simd(this->counts,this->inquery,this->indexsize,mappingstart,mappingend,genestrand);
+#ifdef HAVE_SSE2
+    count_positions_rev_simd(this->counts,this->indexsize,mappingstart,mappingend,genestrand);
 #else
-    count_positions_rev_std(this->counts,this->inquery,this->indexsize,mappingstart,mappingend,genestrand);
+    count_positions_rev_std(this->counts,this->indexsize,mappingstart,mappingend,genestrand);
 #endif
     
-    if ((this->table = allocate_positions(this->pointers,this->positions,this->inquery,this->counts,
+    if ((this->table = allocate_positions(this->positions,this->inquery,this->counts,
 					  this->oligospace)) != NULL) {
-#ifdef USE_SIMD_FOR_COUNTS
-      store_positions_rev_simd(this->table,this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend,
+      working_counts = (Count_T *) MALLOC(this->oligospace*sizeof(Count_T));
+      memcpy((void *) working_counts,(const void *) this->counts,this->oligospace*sizeof(Count_T));
+
+#ifdef HAVE_SSE2
+      store_positions_rev_simd(this->table,this->positions,working_counts,this->indexsize,mappingstart,mappingend,
 			       chrpos,genestrand);
 #else
-      store_positions_rev_std(this->table,this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend,
+      store_positions_rev_std(this->table,this->positions,working_counts,this->indexsize,mappingstart,mappingend,
 			      chrpos,genestrand);
 #endif
 
+#ifdef CHECK_ASSERTIONS
+      /* Check if storage routine matches counting routine */
+      for (i = 0; i < this->oligospace; i++) {
+	assert(working_counts[i] == 0);
+      }
+#endif
+
+      FREE(working_counts);
+
       debug9(printf("minus, origin is %u\n",chrpos));
       debug9(dump_positions(this->table,this->positions,this->counts,this->inquery,this->oligospace,this->indexsize));
   
 #ifdef DEBUG14
       positions_old = Oligoindex_old_tally(&counts_old,mappingstart,mappingend,plusp,
-					   queryuc_ptr,querylength,chrpos,genestrand,
+					   queryuc_ptr,querystart,queryend,chrpos,genestrand,
 					   this->oligospace,this->indexsize,this->mask);
-      positions_compare(this->positions,this->counts,this->inquery,
+      positions_compare(this->table,this->positions,this->counts,this->inquery,
 			positions_old,counts_old,this->oligospace,this->indexsize,this->mask);
       FREE(counts_old);
       FREE(positions_old[0]);
@@ -50137,7 +34001,7 @@ Oligoindex_clear_inquery (T this, char *queryuc_ptr, int querystart, int queryen
       masked = oligo & this->mask;
 #ifdef DEBUG      
       nt = shortoligo_nt(oligo,indexsize);
-      printf("At querypos %d, oligo %s seen\n",i,nt);
+      printf("At querypos %d, oligo %s (%08X fwd, %08X rev) seen\n",i,nt,masked,~oligo & this->mask);
       FREE(nt);
 #endif
 
@@ -50188,7 +34052,7 @@ Oligoindex_untally (T this, char *queryuc_ptr, int querylength) {
 static void
 Oligoindex_free (T *old) {
   if (*old) {
-    FREE((*old)->pointers_allocated);
+    /* FREE((*old)->pointers_allocated); */
     FREE((*old)->positions);
     FREE((*old)->table);
 #ifdef HAVE_SSE2
@@ -50226,11 +34090,11 @@ lookup (int *nhits, T this, Shortoligomer_T masked) {
   char *nt;
 #endif
 
-  if ((*nhits = this->counts[masked]) >= 1) {
+  if ((*nhits = this->counts[masked]) > 0) {
 #ifdef DEBUG
     nt = shortoligo_nt(masked,this->indexsize);
-    printf("masked is %s (%u) => %d entries: %u...%u\n",
-	   nt,masked,*nhits,
+    printf("masked is %s [%08X] (%u) => %d entries: %u...%u\n",
+	   nt,masked,masked,*nhits,
 #if 0
 	   this->positions[masked],this->positions[masked]+(*nhits)-1,
 #endif
@@ -50495,5 +34359,3 @@ Oligoindex_get_mappings (List_T diagonals, bool *coveredp, Chrpos_T **mappings,
   return diagonals;
 }
 
-
-
diff --git a/src/oligoindex_hr.h b/src/oligoindex_hr.h
index c52da8b..8c96b8f 100644
--- a/src/oligoindex_hr.h
+++ b/src/oligoindex_hr.h
@@ -1,4 +1,4 @@
-/* $Id: oligoindex_hr.h 180701 2015-12-10 19:54:31Z twu $ */
+/* $Id: oligoindex_hr.h 203523 2017-02-14 18:39:36Z twu $ */
 #ifndef OLIGOINDEX_HR_INCLUDED
 #define OLIGOINDEX_HR_INCLUDED
 
@@ -10,58 +10,28 @@
 #include "diagpool.h"
 
 
+#if 0
+/* Old code, no longer used */
 #define OVERABUNDANCE_CHECK 50
 #define OVERABUNDANCE_PCT 0.97
 #define OVERABUNDANCE_MIN 200
-
-
-#ifdef HAVE_AVX2
-/* Attempted to use int, so we don't need to check for count > 255.  However, SIMD is much faster on bytes than on ints */
-typedef int Count_T;
-typedef unsigned int Inquery_T;
-#define INQUERY_FALSE 0x00000000
-#define INQUERY_TRUE  0xFFFFFFFF
-#define SIMD_NELTS 8		/* 8 ints in 256 bits */
-
-/* #define CHECK_FOR_OVERFLOW 1 -- Optional if we use int for Count_T */
-#define CHECK_FOR_OVERFLOW 1
-
-#ifdef CHECK_FOR_OVERFLOW
-#define MAXCOUNT 255
-#define INCR_COUNT(counts,inquery) if (++counts > MAXCOUNT) inquery = INQUERY_FALSE;
-#else
-#define INCR_COUNT(counts,inquery) counts += 1;
 #endif
 
-
-#elif defined(HAVE_SSE2)
-typedef char Count_T;
+/* Attempted to use int, so we could use i32gather_epi32.  However, SIMD is much faster on bytes than on ints */
+typedef unsigned char Count_T;
 typedef unsigned char Inquery_T;
 #define INQUERY_FALSE 0x00
 #define INQUERY_TRUE  0xFF
-#define SIMD_NELTS 16		/* 16 bytes in 128 bits */
-
-#define CHECK_FOR_OVERFLOW 1	/* Required, since a char can hold only 127 positive counts */
-#ifdef CHECK_FOR_OVERFLOW
-#define INCR_COUNT(counts,inquery) if (++counts < 0) inquery = INQUERY_FALSE;
-#else
-#define INCR_COUNT(counts,inquery) counts += 1;
-#endif
+#define INCR_COUNT(counts) counts += 1;
 
-#else
-typedef char Count_T;
-typedef bool Inquery_T;
-#define INQUERY_FALSE false
-#define INQUERY_TRUE true
-
-#define CHECK_FOR_OVERFLOW 1	/* Required, since a char can hold only 127 positive counts */
-#ifdef CHECK_FOR_OVERFLOW
-#define INCR_COUNT(counts,inquery) if (++counts < 0) inquery = false;
-#else
-#define INCR_COUNT(counts,inquery) counts += 1;
+#if defined(HAVE_AVX512)
+#define SIMD_NELTS 64		/* 64 bytes in 256 bits */
+#elif defined(HAVE_AVX2)
+#define SIMD_NELTS 32		/* 32 bytes in 256 bits */
+#elif defined(HAVE_SSE2)
+#define SIMD_NELTS 16		/* 16 bytes in 128 bits */
 #endif
 
-#endif
 
 
 #define T Oligoindex_T
diff --git a/src/outbuffer.c b/src/outbuffer.c
index d01d375..ce4bcf6 100644
--- a/src/outbuffer.c
+++ b/src/outbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: outbuffer.c 184468 2016-02-18 00:10:24Z twu $";
+static char rcsid[] = "$Id: outbuffer.c 200473 2016-11-14 20:54:20Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -74,22 +74,20 @@ static char *failedinput_root;
 
 #ifdef USE_MPI
 static MPI_File *outputs;
+static MPI_File output_failedinput;
 #ifdef GSNAP
 static MPI_File output_failedinput_1;
 static MPI_File output_failedinput_2;
-#else
-static MPI_File output_failedinput;
 #endif
 
 
 #else
 static char *write_mode;
 static FILE **outputs = NULL;
+static FILE *output_failedinput;
 #ifdef GSNAP
 static FILE *output_failedinput_1;
 static FILE *output_failedinput_2;
-#else
-static FILE *output_failedinput;
 #endif
 
 #endif
@@ -228,8 +226,30 @@ failedinput_open (char *failedinput_root) {
     exit(9);
   }
 #endif
+
+  /* Re-use filename, since it is shorter */
+  sprintf(filename,"%s",failedinput_root);
+#ifdef USE_MPI
+  if (appendp == true) {
+    MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_APPEND,
+                  MPI_INFO_NULL,&output_failedinput);
+  } else {
+    /* Need to remove existing file, if any */
+    MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE,
+		  MPI_INFO_NULL,&output_failedinput);
+    MPI_File_close(&output_failedinput);
+    MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY,
+		  MPI_INFO_NULL,&output_failedinput);
+  }
+#else
+  if ((output_failedinput = fopen(filename,write_mode)) == NULL) {
+    fprintf(stderr,"Cannot open file %s for writing\n",filename);
+    exit(9);
+  }
+#endif
   FREE(filename);
 
+
 #else  /* GMAP */
   filename = (char *) MALLOC((strlen(failedinput_root)+1) * sizeof(char));
   sprintf(filename,"%s",failedinput_root);
@@ -357,10 +377,9 @@ Outbuffer_setup (int argc_in, char **argv_in, int optind_in,
 
   failedinput_root = failedinput_root_in;
   if (failedinput_root == NULL) {
+    output_failedinput = NULL;
 #ifdef GSNAP
     output_failedinput_1 = output_failedinput_2 = NULL;
-#else
-    output_failedinput = NULL;
 #endif
   } else {
     failedinput_open(failedinput_root);
@@ -381,11 +400,10 @@ typedef struct RRlist_T *RRlist_T;
 struct RRlist_T {
   int id;
   Filestring_T fp;
+  Filestring_T fp_failedinput;
 #ifdef GSNAP
   Filestring_T fp_failedinput_1;
   Filestring_T fp_failedinput_2;
-#else
-  Filestring_T fp_failedinput;
 #endif
   RRlist_T next;
 };
@@ -410,21 +428,19 @@ RRlist_dump (RRlist_T head, RRlist_T tail) {
 /* Returns new tail */
 static RRlist_T
 RRlist_push (RRlist_T *head, RRlist_T tail, Filestring_T fp,
-#ifdef GSNAP
-	     Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2
-#else
 	     Filestring_T fp_failedinput
+#ifdef GSNAP
+	     , Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2
 #endif
 	     ) {
   RRlist_T new;
 
   new = (RRlist_T) MALLOC_OUT(sizeof(*new)); /* Called by worker thread */
   new->fp = fp;
+  new->fp_failedinput = fp_failedinput;
 #ifdef GSNAP
   new->fp_failedinput_1 = fp_failedinput_1;
   new->fp_failedinput_2 = fp_failedinput_2;
-#else
-  new->fp_failedinput = fp_failedinput;
 #endif
   new->next = (RRlist_T) NULL;
   
@@ -441,20 +457,18 @@ RRlist_push (RRlist_T *head, RRlist_T tail, Filestring_T fp,
 /* Returns new head */
 static RRlist_T
 RRlist_pop (RRlist_T head, Filestring_T *fp,
-#ifdef GSNAP
-	    Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2
-#else
 	    Filestring_T *fp_failedinput
+#ifdef GSNAP
+	    , Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2
 #endif
 	    ) {
   RRlist_T newhead;
 
   *fp = head->fp;
+  *fp_failedinput = head->fp_failedinput;
 #ifdef GSNAP
   *fp_failedinput_1 = head->fp_failedinput_1;
   *fp_failedinput_2 = head->fp_failedinput_2;
-#else
-  *fp_failedinput = head->fp_failedinput;
 #endif
 
   newhead = head->next;
@@ -466,10 +480,9 @@ RRlist_pop (RRlist_T head, Filestring_T *fp,
 
 static RRlist_T
 RRlist_insert (RRlist_T list, int id, Filestring_T fp,
-#ifdef GSNAP	       
-	       Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2
-#else
 	       Filestring_T fp_failedinput
+#ifdef GSNAP	       
+	       , Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2
 #endif
 	       ) {
   RRlist_T *p;
@@ -483,11 +496,10 @@ RRlist_insert (RRlist_T list, int id, Filestring_T fp,
   new = (RRlist_T) MALLOC_OUT(sizeof(*new));
   new->id = id;
   new->fp = fp;
+  new->fp_failedinput = fp_failedinput;
 #ifdef GSNAP
   new->fp_failedinput_1 = fp_failedinput_1;
   new->fp_failedinput_2 = fp_failedinput_2;
-#else
-  new->fp_failedinput = fp_failedinput;
 #endif
   
   new->next = *p;
@@ -498,21 +510,19 @@ RRlist_insert (RRlist_T list, int id, Filestring_T fp,
 /* Returns new head */
 static RRlist_T
 RRlist_pop_id (RRlist_T head, int *id, Filestring_T *fp,
-#ifdef GSNAP
-	       Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2
-#else
 	       Filestring_T *fp_failedinput
+#ifdef GSNAP
+	       , Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2
 #endif
 	       ) {
   RRlist_T newhead;
 
   *id = head->id;
   *fp = head->fp;
+  *fp_failedinput = head->fp_failedinput;
 #ifdef GSNAP
   *fp_failedinput_1 = head->fp_failedinput_1;
   *fp_failedinput_2 = head->fp_failedinput_2;
-#else
-  *fp_failedinput = head->fp_failedinput;
 #endif
 
   newhead = head->next;
@@ -711,19 +721,17 @@ Outbuffer_close_files () {
 
   if (failedinput_root != NULL) {
 #ifdef USE_MPI
+    MPI_File_close(&output_failedinput);
 #ifdef GSNAP
     MPI_File_close(&output_failedinput_1);
     MPI_File_close(&output_failedinput_2);
-#else
-    MPI_File_close(&output_failedinput);
 #endif
     
 #else
+    fclose(output_failedinput);
 #ifdef GSNAP
     fclose(output_failedinput_1);
     fclose(output_failedinput_2);
-#else
-    fclose(output_failedinput);
 #endif
 #endif
 
@@ -841,13 +849,13 @@ Outbuffer_add_nbeyond (T this) {
 
 #ifdef GSNAP
 void
-Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) {
+Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) {
 
 #ifdef HAVE_PTHREAD
   pthread_mutex_lock(&this->lock);
 #endif
 
-  this->tail = RRlist_push(&this->head,this->tail,fp,fp_failedinput_1,fp_failedinput_2);
+  this->tail = RRlist_push(&this->head,this->tail,fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
   debug1(RRlist_dump(this->head,this->tail));
   this->nprocessed += 1;
 
@@ -886,7 +894,7 @@ Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput)
 
 #ifdef GSNAP
 void
-Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) {
+Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) {
   SAM_split_output_type split_output;
 #ifdef USE_MPI
   MPI_File output;
@@ -929,6 +937,13 @@ Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput_1, Fil
   Filestring_free(&fp);
 
   if (failedinput_root != NULL) {
+    if (fp_failedinput != NULL) {
+#ifdef USE_MPI
+      Filestring_stringify(fp_failedinput);
+#endif
+      Filestring_print(output_failedinput,fp_failedinput);
+      Filestring_free(&fp_failedinput);
+    }
     if (fp_failedinput_1 != NULL) {
 #ifdef USE_MPI
       Filestring_stringify(fp_failedinput_1);
@@ -1015,10 +1030,9 @@ Outbuffer_thread_anyorder (void *data) {
   unsigned int output_buffer_size = this->output_buffer_size;
   unsigned int noutput = 0, ntotal, nbeyond;
   Filestring_T fp;
+  Filestring_T fp_failedinput;
 #ifdef GSNAP
   Filestring_T fp_failedinput_1, fp_failedinput_2;
-#else
-  Filestring_T fp_failedinput;
 #endif
   
   /* Obtain this->ntotal while locked, to prevent race between output thread and input thread */
@@ -1049,7 +1063,7 @@ Outbuffer_thread_anyorder (void *data) {
 
     } else {
 #ifdef GSNAP
-      this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2);
+      this->head = RRlist_pop(this->head,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2);
 #else
       this->head = RRlist_pop(this->head,&fp,&fp_failedinput);
 #endif
@@ -1061,7 +1075,7 @@ Outbuffer_thread_anyorder (void *data) {
 #endif
 
 #ifdef GSNAP
-      Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+      Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 #else
       Outbuffer_print_filestrings(fp,fp_failedinput);
 #endif
@@ -1076,14 +1090,14 @@ Outbuffer_thread_anyorder (void *data) {
 	/* Clear out backlog */
 	while (this->head && this->nprocessed - noutput > output_buffer_size) {
 #ifdef GSNAP
-	  this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2);
+	  this->head = RRlist_pop(this->head,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2);
 #else
 	  this->head = RRlist_pop(this->head,&fp,&fp_failedinput);
 #endif
 	  debug1(RRlist_dump(this->head,this->tail));
 
 #ifdef GSNAP
-	  Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+	  Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 #else
 	  Outbuffer_print_filestrings(fp,fp_failedinput);
 #endif
@@ -1124,10 +1138,9 @@ Outbuffer_thread_ordered (void *data) {
   unsigned int output_buffer_size = this->output_buffer_size;
   unsigned int noutput = 0, nqueued = 0, ntotal, nbeyond;
   Filestring_T fp;
+  Filestring_T fp_failedinput;
 #ifdef GSNAP
   Filestring_T fp_failedinput_1, fp_failedinput_2;
-#else
-  Filestring_T fp_failedinput;
 #endif
   RRlist_T queue = NULL;
   int id;
@@ -1161,7 +1174,7 @@ Outbuffer_thread_ordered (void *data) {
 
     } else {
 #ifdef GSNAP
-      this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2);
+      this->head = RRlist_pop(this->head,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2);
 #else
       this->head = RRlist_pop(this->head,&fp,&fp_failedinput);
 #endif
@@ -1173,14 +1186,14 @@ Outbuffer_thread_ordered (void *data) {
       if ((id = Filestring_id(fp)) != (int) noutput) {
 	/* Store in queue */
 #ifdef GSNAP
-	queue = RRlist_insert(queue,id,fp,fp_failedinput_1,fp_failedinput_2);
+	queue = RRlist_insert(queue,id,fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 #else
 	queue = RRlist_insert(queue,id,fp,fp_failedinput);
 #endif
 	nqueued++;
       } else {
 #ifdef GSNAP
-	Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+	Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 #else
 	Outbuffer_print_filestrings(fp,fp_failedinput);
 #endif
@@ -1192,13 +1205,13 @@ Outbuffer_thread_ordered (void *data) {
 	/* Print out rest of stored queue */
 	while (queue != NULL && queue->id == (int) noutput) {
 #ifdef GSNAP
-	  queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput_1,&fp_failedinput_2);
+	  queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2);
 #else
 	  queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput);
 #endif
 	  nqueued--;
 #ifdef GSNAP
-	  Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+	  Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 #else
 	  Outbuffer_print_filestrings(fp,fp_failedinput);
 #endif
@@ -1216,21 +1229,21 @@ Outbuffer_thread_ordered (void *data) {
 	/* Clear out backlog */
 	while (this->head && this->nprocessed - nqueued - noutput > output_buffer_size) {
 #ifdef GSNAP
-	  this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2);
+	  this->head = RRlist_pop(this->head,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2);
 #else
 	  this->head = RRlist_pop(this->head,&fp,&fp_failedinput);
 #endif
 	  if ((id = Filestring_id(fp)) != (int) noutput) {
 	    /* Store in queue */
 #ifdef GSNAP
-	    queue = RRlist_insert(queue,id,fp,fp_failedinput_1,fp_failedinput_2);
+	    queue = RRlist_insert(queue,id,fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 #else
 	    queue = RRlist_insert(queue,id,fp,fp_failedinput);
 #endif
 	    nqueued++;
 	  } else {
 #ifdef GSNAP
-	    Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+	    Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 #else
 	    Outbuffer_print_filestrings(fp,fp_failedinput);
 #endif
@@ -1241,13 +1254,13 @@ Outbuffer_thread_ordered (void *data) {
 	    /* Print out rest of stored queue */
 	    while (queue != NULL && queue->id == (int) noutput) {
 #ifdef GSNAP
-	      queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput_1,&fp_failedinput_2);
+	      queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2);
 #else
 	      queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput);
 #endif
 	      nqueued--;
 #ifdef GSNAP
-	      Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2);
+	      Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2);
 #else
 	      Outbuffer_print_filestrings(fp,fp_failedinput);
 #endif
diff --git a/src/outbuffer.h b/src/outbuffer.h
index 7c0d318..dd31ea5 100644
--- a/src/outbuffer.h
+++ b/src/outbuffer.h
@@ -1,4 +1,4 @@
-/* $Id: outbuffer.h 157571 2015-01-28 00:04:37Z twu $ */
+/* $Id: outbuffer.h 200473 2016-11-14 20:54:20Z twu $ */
 #ifndef OUTBUFFER_INCLUDED
 #define OUTBUFFER_INCLUDED
 
@@ -63,10 +63,10 @@ Outbuffer_add_nread (T this, unsigned int nread);
 
 #ifdef GSNAP
 extern void
-Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2);
+Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2);
 
 extern void
-Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2);
+Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2);
 #else
 extern void
 Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput);
diff --git a/src/output.c b/src/output.c
index c662a34..b456280 100644
--- a/src/output.c
+++ b/src/output.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: output.c 184470 2016-02-18 00:11:42Z twu $";
+static char rcsid[] = "$Id: output.c 207201 2017-06-12 18:40:57Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -37,8 +37,6 @@ static bool print_m8_p;
 static bool invert_first_p;
 static bool invert_second_p;
 
-static bool merge_samechr_p;
-
 #else
 static Printtype_T printtype;
 static int invertmode;
@@ -81,7 +79,6 @@ Output_setup (Univ_IIT_T chromosome_iit_in,
 	      char *failedinput_root_in, int quality_shift_in,
 #ifdef GSNAP
 	      bool output_sam_p_in, bool print_m8_p_in,	bool invert_first_p_in, bool invert_second_p_in,
-	      bool merge_samechr_p_in,
 #else
 	      Printtype_T printtype_in, int invertmode_in, int wraplength_in, int ngap_in,
 	      bool nointronlenp_in, bool sam_paired_p_in, int cds_startpos_in,
@@ -111,8 +108,6 @@ Output_setup (Univ_IIT_T chromosome_iit_in,
   invert_first_p = invert_first_p_in;
   invert_second_p = invert_second_p_in;
 
-  merge_samechr_p = merge_samechr_p_in;
-
 #else
   printtype = printtype_in;
   invertmode = invertmode_in;
@@ -158,44 +153,53 @@ Output_setup (Univ_IIT_T chromosome_iit_in,
 
 /* Taken from print_result_sam from old outbuffer.c */
 static Filestring_T
-filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+filestring_fromresult_sam (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
 			   Result_T result, Request_T request) {
   Filestring_T fp;
   Resulttype_T resulttype;
   Shortread_T queryseq1;
   Stage3end_T *stage3array, stage3;
-  Chrpos_T chrpos;
+  Chrnum_T chrnum;
+  Chrpos_T chrpos_low;
   int npaths_primary, npaths_altloc, pathnum, first_absmq, second_absmq;
   char *abbrev;
 
   fp = Filestring_new(Request_id(request));
-  if (failedinput_root == NULL) {
-    *fp_failedinput_1 = (Filestring_T) NULL;
-  } else {
-    *fp_failedinput_1 = Filestring_new(Request_id(request));
-  }
 
   resulttype = Result_resulttype(result);
   if (resulttype == SINGLEEND_NOMAPPING) {
-    *fp_failedinput_2 = (Filestring_T) NULL;
+    if (failedinput_root == NULL) {
+      *fp_failedinput = (Filestring_T) NULL;
+    } else {
+      *fp_failedinput = Filestring_new(Request_id(request));
+    }
+    *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL;
+
     queryseq1 = Request_queryseq1(request);
     if (nofailsp == true) {
       /* Skip */
     } else {
       Filestring_set_split_output(fp,OUTPUT_NM); /* Needs to go outside of nofailsp */
       SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,
-			  queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
+			  queryseq1,/*queryseq_mate*/NULL,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
 			  /*acc2*/NULL,chromosome_iit,resulttype,
 			  /*first_read_p*/true,/*pathnum*/0,/*npaths_primary*/0,/*npaths_altloc*/0,
-			  /*artificial_mate_p*/false,/*npaths_mate*/0,/*mate_chrpos*/0U,
+			  /*artificial_mate_p*/false,/*npaths_mate*/0,
+			  /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 			  quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
       if (failedinput_root != NULL) {
-	Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
+	Shortread_print_query_singleend(*fp_failedinput,queryseq1,/*headerseq*/queryseq1);
       }
     }
 
   } else if (resulttype == SINGLEEND_UNIQ) {
-    *fp_failedinput_2 = (Filestring_T) NULL;
+    if (failedinput_root == NULL) {
+      *fp_failedinput = (Filestring_T) NULL;
+    } else {
+      *fp_failedinput = Filestring_new(Request_id(request));
+    }
+    *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL;
+
     if (failsonlyp == true) {
       /* Skip */
     } else {
@@ -204,10 +208,11 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
       stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
       stage3 = stage3array[0];
       if (Stage3end_hittype(stage3) == SAMECHR_SPLICE || Stage3end_hittype(stage3) == TRANSLOC_SPLICE) {
-	chrpos = 0;
+	chrnum = 0;
+	chrpos_low = 0;
       } else {
-	chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
-				    /*first_read_p*/true);
+	chrpos_low = SAM_compute_chrpos(&chrnum,/*hardclip_low*/0,/*hardclip_high*/0,
+					stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
       }
       if (Stage3end_circularpos(stage3) > 0) {
 	Filestring_set_split_output(fp,OUTPUT_UC);
@@ -216,18 +221,23 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
 	Filestring_set_split_output(fp,OUTPUT_UU);
 	abbrev = ABBREV_UNPAIRED_UNIQ;
       }
-      SAM_print(fp,*fp_failedinput_1,abbrev,stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),/*acc2*/NULL,
+      SAM_print(fp,*fp_failedinput,abbrev,stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),/*acc2*/NULL,
 		/*pathnum*/1,npaths_primary,npaths_altloc,Stage3end_absmq_score(stage3array[0]),first_absmq,second_absmq,
 		Stage3end_mapq_score(stage3array[0]),
-		chromosome_iit,queryseq1,/*queryseq2*/NULL,
-		/*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
-		/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		chromosome_iit,queryseq1,/*queryseq2*/NULL,/*pairedlength*/0,chrnum,chrpos_low,
+		/*mate_chrnum*/0,/*mate_chrpos_low*/0U,
+		/*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/0,quality_shift,
-		sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		sam_read_group_id,invert_first_p,invert_second_p);
     }
 
   } else if (resulttype == SINGLEEND_TRANSLOC) {
-    *fp_failedinput_2 = (Filestring_T) NULL;
+    if (failedinput_root == NULL) {
+      *fp_failedinput = (Filestring_T) NULL;
+    } else {
+      *fp_failedinput = Filestring_new(Request_id(request));
+    }
+    *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL;
 
     Filestring_set_split_output(fp,OUTPUT_UT);
     stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
@@ -236,13 +246,14 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
     } else if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths_report) {
       queryseq1 = Request_queryseq1(request);
       SAM_print_nomapping(fp,ABBREV_UNPAIRED_TRANSLOC,
-			  queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
+			  queryseq1,/*queryseq_mate*/NULL,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
 			  /*acc2*/NULL,chromosome_iit,resulttype,
 			  /*first_read_p*/true,/*pathnum*/1,npaths_primary,npaths_altloc,
-			  /*artificial_mate_p*/false,/*npaths_mate*/0,/*mate_chrpos*/0U,
+			  /*artificial_mate_p*/false,/*npaths_mate*/0,
+			  /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 			  quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
       if (failedinput_root != NULL) {
-	Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
+	Shortread_print_query_singleend(*fp_failedinput,queryseq1,/*headerseq*/queryseq1);
       }
 
     } else {
@@ -251,26 +262,33 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
       for (pathnum = 1; pathnum <= npaths_primary + npaths_altloc && pathnum <= maxpaths_report; pathnum++) {
 	stage3 = stage3array[pathnum-1];
 	if (Stage3end_hittype(stage3) == SAMECHR_SPLICE || Stage3end_hittype(stage3) == TRANSLOC_SPLICE) {
-	  chrpos = 0;
+	  chrnum = 0;
+	  chrpos_low = 0;
 	} else {
-	  chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
-				      /*first_read_p*/true);
+	  chrpos_low = SAM_compute_chrpos(&chrnum,/*hardclip_low*/0,/*hardclip_high*/0,
+					  stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
 	}
-	SAM_print(fp,*fp_failedinput_1,ABBREV_UNPAIRED_TRANSLOC,
+	SAM_print(fp,*fp_failedinput,ABBREV_UNPAIRED_TRANSLOC,
 		  stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
 		  /*acc2*/NULL,pathnum,npaths_primary,npaths_altloc,
 		  Stage3end_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq,
 		  Stage3end_mapq_score(stage3array[pathnum-1]),
-		  chromosome_iit,queryseq1,/*queryseq2*/NULL,
-		  /*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
-		  /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		  chromosome_iit,queryseq1,/*queryseq2*/NULL,/*pairedlength*/0,chrnum,chrpos_low,
+		  /*mate_chrnum*/0,/*mate_chrpos_low*/0U,
+		  /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		  resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/0,quality_shift,
-		  sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		  sam_read_group_id,invert_first_p,invert_second_p);
       }
     }
 
   } else if (resulttype == SINGLEEND_MULT) {
-    *fp_failedinput_2 = (Filestring_T) NULL;
+    if (failedinput_root == NULL) {
+      *fp_failedinput = (Filestring_T) NULL;
+    } else {
+      *fp_failedinput = Filestring_new(Request_id(request));
+    }
+    *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL;
+
     stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
 
     if (failsonlyp == true) {
@@ -279,13 +297,14 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
       Filestring_set_split_output(fp,OUTPUT_UX);
       queryseq1 = Request_queryseq1(request);
       SAM_print_nomapping(fp,ABBREV_UNPAIRED_MULT_XS,
-			  queryseq1,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
+			  queryseq1,/*queryseq_mate*/NULL,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
 			  /*acc2*/NULL,chromosome_iit,resulttype,
 			  /*first_read_p*/true,/*pathnum*/1,npaths_primary,npaths_altloc,
-			  /*artificial_mate_p*/false,/*npaths_mate*/0,/*mate_chrpos*/0U,
+			  /*artificial_mate_p*/false,/*npaths_mate*/0,
+			  /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 			  quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
       if (failedinput_root != NULL) {
-	Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
+	Shortread_print_query_singleend(*fp_failedinput,queryseq1,/*headerseq*/queryseq1);
       }
 
     } else {
@@ -294,34 +313,38 @@ filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_fail
       for (pathnum = 1; pathnum <= npaths_primary + npaths_altloc && pathnum <= maxpaths_report; pathnum++) {
 	stage3 = stage3array[pathnum-1];
 	if (Stage3end_hittype(stage3) == SAMECHR_SPLICE || Stage3end_hittype(stage3) == TRANSLOC_SPLICE) {
-	  chrpos = 0;
+	  chrnum = 0;
+	  chrpos_low = 0U;
 	} else {
-	  chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
-				      /*first_read_p*/true);
+	  chrpos_low = SAM_compute_chrpos(&chrnum,/*hardclip_low*/0,/*hardclip_high*/0,
+					  stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
 	}
-	SAM_print(fp,*fp_failedinput_1,ABBREV_UNPAIRED_MULT,
+	SAM_print(fp,*fp_failedinput,ABBREV_UNPAIRED_MULT,
 		  stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),
 		  /*acc2*/NULL,pathnum,npaths_primary,npaths_altloc,
 		  Stage3end_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq,
 		  Stage3end_mapq_score(stage3array[pathnum-1]),
-		  chromosome_iit,queryseq1,/*queryseq2*/NULL,
-		  /*pairedlength*/0,chrpos,/*mate_chrpos*/0U,
-		  /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		  chromosome_iit,queryseq1,/*queryseq2*/NULL,/*pairedlength*/0,chrnum,chrpos_low,
+		  /*mate_chrnum*/0,/*mate_chrpos_low*/0U,
+		  /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		  resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/0,quality_shift,
-		  sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		  sam_read_group_id,invert_first_p,invert_second_p);
       }
     }
 
   } else {
+    *fp_failedinput = (Filestring_T) NULL;
     if (failedinput_root == NULL) {
+      *fp_failedinput_1 = (Filestring_T) NULL;
       *fp_failedinput_2 = (Filestring_T) NULL;
     } else {
+      *fp_failedinput_1 = Filestring_new(Request_id(request));
       *fp_failedinput_2 = Filestring_new(Request_id(request));
     }
     SAM_print_paired(fp,*fp_failedinput_1,*fp_failedinput_2,result,resulttype,chromosome_iit,
 		     Request_queryseq1(request),Request_queryseq2(request),
 		     invert_first_p,invert_second_p,nofailsp,failsonlyp,
-		     merge_samechr_p,quality_shift,sam_read_group_id);
+		     quality_shift,sam_read_group_id);
   }
 
   return fp;
@@ -360,7 +383,7 @@ print_header_singleend (Filestring_T fp, Request_T request, bool translocationp,
 
 /* Taken from print_result_gsnap from old outbuffer.c */
 static Filestring_T
-filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+filestring_fromresult_gsnap (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
 			     Result_T result, Request_T request) {
   Filestring_T fp;
   Resulttype_T resulttype;
@@ -369,16 +392,16 @@ filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_fa
   int npaths_primary, npaths_altloc, pathnum, first_absmq, second_absmq;
 
   fp = Filestring_new(Request_id(request));
-  if (failedinput_root == NULL) {
-    *fp_failedinput_1 = (Filestring_T) NULL;
-  } else {
-    *fp_failedinput_1 = Filestring_new(Request_id(request));
-  }
 
   resulttype = Result_resulttype(result);
-
   if (resulttype == SINGLEEND_NOMAPPING) {
-    *fp_failedinput_2 = (Filestring_T) NULL;
+    if (failedinput_root == NULL) {
+      *fp_failedinput = (Filestring_T) NULL;
+    } else {
+      *fp_failedinput = Filestring_new(Request_id(request));
+    }
+    *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL;
+
     if (nofailsp == true) {
       /* Skip */
     } else if (print_m8_p) {
@@ -390,12 +413,18 @@ filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_fa
 
       if (failedinput_root != NULL) {
 	queryseq1 = Request_queryseq1(request);
-	Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1);
+	Shortread_print_query_singleend(*fp_failedinput,queryseq1,/*headerseq*/queryseq1);
       }
     }
 
   } else if (resulttype == SINGLEEND_UNIQ) {
-    *fp_failedinput_2 = (Filestring_T) NULL;
+    if (failedinput_root == NULL) {
+      *fp_failedinput = (Filestring_T) NULL;
+    } else {
+      *fp_failedinput = Filestring_new(Request_id(request));
+    }
+    *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL;
+
     if (failsonlyp == true) {
       /* Skip */
     } else {
@@ -417,7 +446,13 @@ filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_fa
     }
 
   } else if (resulttype == SINGLEEND_TRANSLOC) {
-    *fp_failedinput_2 = (Filestring_T) NULL;
+    if (failedinput_root == NULL) {
+      *fp_failedinput = (Filestring_T) NULL;
+    } else {
+      *fp_failedinput = Filestring_new(Request_id(request));
+    }
+    *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL;
+
     Filestring_set_split_output(fp,OUTPUT_UT);
 
     stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
@@ -447,7 +482,13 @@ filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_fa
     }
 
   } else if (resulttype == SINGLEEND_MULT) {
-    *fp_failedinput_2 = (Filestring_T) NULL;
+    if (failedinput_root == NULL) {
+      *fp_failedinput = (Filestring_T) NULL;
+    } else {
+      *fp_failedinput = Filestring_new(Request_id(request));
+    }
+    *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL;
+
     stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
 
     if (failsonlyp == true) {
@@ -479,9 +520,12 @@ filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_fa
     }
 
   } else if (resulttype == PAIREDEND_NOMAPPING) {
+    *fp_failedinput = (Filestring_T) NULL;
     if (failedinput_root == NULL) {
+      *fp_failedinput_1 = (Filestring_T) NULL;
       *fp_failedinput_2 = (Filestring_T) NULL;
     } else {
+      *fp_failedinput_1 = Filestring_new(Request_id(request));
       *fp_failedinput_2 = Filestring_new(Request_id(request));
     }
 
@@ -509,9 +553,12 @@ filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_fa
     }
 
   } else {
+    *fp_failedinput = (Filestring_T) NULL;
     if (failedinput_root == NULL) {
+      *fp_failedinput_1 = (Filestring_T) NULL;
       *fp_failedinput_2 = (Filestring_T) NULL;
     } else {
+      *fp_failedinput_1 = Filestring_new(Request_id(request));
       *fp_failedinput_2 = Filestring_new(Request_id(request));
     }
 
@@ -539,12 +586,12 @@ filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_fa
 }
 
 Filestring_T
-Output_filestring_fromresult (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+Output_filestring_fromresult (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
 			      Result_T result, Request_T request) {
   if (output_sam_p == true) {
-    return filestring_fromresult_sam(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+    return filestring_fromresult_sam(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
   } else {
-    return filestring_fromresult_gsnap(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
+    return filestring_fromresult_gsnap(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request);
   }
 }
 
@@ -559,8 +606,10 @@ print_npaths (Filestring_T fp, int npaths, char *chrsubset_name, bool mergedp,
 
   if (npaths == 0) {
     FPRINTF(fp,"Paths (0):");
+#if 0
   } else if (mergedp == true) {
     FPRINTF(fp,"Paths (1):");
+#endif
   } else {
     FPRINTF(fp,"Paths (%d):",npaths);
   }
@@ -637,6 +686,7 @@ Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Req
       fprintf(stderr,"No paths found for %s\n",Sequence_accession(headerseq));
     }
 
+#if 0
   } else if ((mergedp = Result_mergedp(result)) == true) {
     if (Stage3_circularpos(stage3array[0]) > 0) {
       Filestring_set_split_output(fp,OUTPUT_UC);
@@ -655,6 +705,7 @@ Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Req
 #endif
 		       querylength,fulllengthp,cds_startpos,truncatep,strictp);
     }
+#endif
 
   } else if ((chimera = Result_chimera(result)) != NULL) {
     if (chimeras_allowed_p == true) {
@@ -884,6 +935,11 @@ Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Req
       }
 
 #ifndef PMAP
+    } else if (printtype == BEDPE) {
+      for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
+	Stage3_print_bedpe(fp,stage3array[pathnum-1],pathnum,chromosome_iit,queryseq,querylength);
+      }
+
     } else if (printtype == SAM) {
       if (npaths_primary + npaths_altloc == 0) {
 	Pair_print_sam_nomapping(fp,abbrev,/*acc1*/Sequence_accession(headerseq),/*acc2*/NULL,
@@ -897,6 +953,7 @@ Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Req
 				 Sequence_fulllength(queryseq),quality_shift,
 				 Sequence_firstp(queryseq),sam_paired_p,sam_read_group_id);
 
+#if 0
       } else if (mergedp == true) {
 	Stage3_print_sam(fp,abbrev,stage3array[0],/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
 			 Stage3_absmq_score(stage3array[0]),second_absmq,
@@ -904,6 +961,7 @@ Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Req
 			 chromosome_iit,usersegment,queryseq,
 			 /*chimera_part*/0,/*chimera*/NULL,quality_shift,sam_paired_p,
 			 sam_read_group_id);
+#endif
 
       } else if (chimera != NULL) {
 	Stage3_print_sam(fp,abbrev,stage3array[0],/*pathnum*/1,npaths_primary,npaths_altloc,
diff --git a/src/output.h b/src/output.h
index 36ded05..d2cc76c 100644
--- a/src/output.h
+++ b/src/output.h
@@ -1,4 +1,4 @@
-/* $Id: output.h 155282 2014-12-12 19:42:54Z twu $ */
+/* $Id: output.h 206763 2017-05-30 17:48:24Z twu $ */
 #ifndef OUTPUT_INCLUDED
 #define OUTPUT_INCLUDED
 
@@ -27,7 +27,6 @@ Output_setup (Univ_IIT_T chromosome_iit_in,
 	      char *failedinput_root_in, int quality_shift_in,
 #ifdef GSNAP
 	      bool output_sam_p_in, bool print_m8_p_in, bool invert_first_p_in, bool invert_second_p_in,
-	      bool merge_samechr_p_in,
 #else
 	      Printtype_T printtype_in, int invertmode_in, int wraplength_in, int ngap_in,
 	      bool nointronlenp_in, bool sam_paired_p_in, int cds_startpos_in,
@@ -44,7 +43,7 @@ Output_setup (Univ_IIT_T chromosome_iit_in,
 
 #ifdef GSNAP
 extern Filestring_T
-Output_filestring_fromresult (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
+Output_filestring_fromresult (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2,
 			      Result_T result, Request_T request);
 #else
 extern Filestring_T
diff --git a/src/pair.c b/src/pair.c
index 8a80e3f..83bbc20 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 200236 2016-11-08 00:58:17Z twu $";
+static char rcsid[] = "$Id: pair.c 209124 2017-08-15 19:31:32Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -31,16 +31,17 @@ static char rcsid[] = "$Id: pair.c 200236 2016-11-08 00:58:17Z twu $";
 #include "maxent.h"
 #include "maxent_hr.h"
 #include "mapq.h"
+#include "resulthr.h"		/* For Pairtype_T */
 
 
 #ifndef PMAP
 #include "substring.h"		/* For Endtype_T */
-#include "stage3hr.h"
 #include "samflags.h"
 #endif
 
 #ifdef GSNAP
 #include "samprint.h"
+#include "cigar.h"
 #endif
 
 
@@ -149,8 +150,8 @@ static bool snps_p;
 static bool print_nsnpdiffs_p;
 static double genomelength;	/* For BLAST E-value */
 
-static bool gff3_phase_swap_p = true;
-static bool cigar_extended_p = true;
+static bool gff3_phase_swap_p;
+static bool cigar_extended_p;
 
 
 void
@@ -1750,6 +1751,39 @@ Pair_codon_changepos (struct T *pairs, int npairs, int aapos, int cdna_direction
 }  
 
 
+#if 0
+bool
+Pair_identical_p (List_T pairs1, List_T pairs2) {
+  List_T p, q;
+  T pair1, pair2;
+
+  p = pairs1;
+  q = pairs2;
+  while (p && q) {
+    pair1 = (T) List_head(p);
+    pair2 = (T) List_head(q);
+    if (pair1->gapp != pair2->gapp) {
+      return false;
+    } else if (pair1->querypos != pair2->querypos) {
+      return false;
+    } else if (pair1->genomepos != pair2->genomepos) {
+      return false;
+    } else if (pair1->comp != pair2->comp) {
+      return false;
+    }
+    p = List_next(p);
+    q = List_next(q);
+  }
+
+  if (p || q) {
+    return false;
+  } else {
+    return true;
+  }
+}
+#endif
+
+
 void
 Pair_check_list (List_T pairs) {
   T this;
@@ -1823,10 +1857,45 @@ Pair_check_array (struct T *pairs, int npairs) {
 }  
 
 
+/* Modeled after Pair_convert_array_to_pairs */
+List_T
+Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp,
+			     Chrpos_T chrlength, Pairpool_T pairpool) {
+  T pair;
+  int i;
+
+  if (plusp == true) {
+    for (i = 0; i < npairs; i++) {
+      pair = &(pairarray[i]);
+      if (pair->gapp) {
+	/* Skip */
+      } else {
+	pairs = Pairpool_push(pairs,pairpool,pair->querypos /*+ queryseq_offset*/,pair->genomepos,
+			      pair->cdna,pair->comp,pair->genome,pair->genomealt,/*dynprogindex*/0);
+      }
+    }
+
+  } else {
+    for (i = 0; i < npairs; i++) {
+      pair = &(pairarray[i]);
+      if (pair->gapp) {
+	/* Skip */
+      } else {
+	pairs = Pairpool_push(pairs,pairpool,pair->querypos /*+ queryseq_offset*/,chrlength - pair->genomepos,
+			      pair->cdna,pair->comp,pair->genome,pair->genomealt,/*dynprogindex*/0);
+      }
+    }
+  }
+
+      
+  return pairs;
+}
+
+
 /* Called by output thread for --merge-overlap feature.  Modeled after Substring_convert_to_pairs. */
 List_T
-Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength,
-			     int hardclip_low, int hardclip_high, int queryseq_offset) {
+Pair_convert_array_to_pairs_out (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength,
+				 int hardclip_low, int hardclip_high, int queryseq_offset) {
   T pair;
   int querystart, queryend, i;
 
@@ -2195,7 +2264,7 @@ Pair_tokens_free (List_T *tokens) {
     token = (char *) List_head(p);
     FREE_OUT(token);
   }
-  List_free(&(*tokens));
+  List_free_out(&(*tokens));
 
   return;
 }
@@ -2210,7 +2279,7 @@ Pair_tokens_copy (List_T old) {
     old_token = (char *) List_head(old);
     new_token = (char *) MALLOC_OUT((strlen(old_token)+1) * sizeof(char));
     strcpy(new_token,old_token);
-    new = List_push(new,(void *) new_token);
+    new = List_push_out(new,(void *) new_token);
     old = List_next(old);
   }
 
@@ -2249,7 +2318,7 @@ print_tokens_compressed (Filestring_T fp, List_T tokens) {
 
   for (p = tokens; p != NULL; p = List_next(p)) {
     token = (char *) List_head(p);
-    FREE(token);
+    FREE_OUT(token);
   }
 
   return;
@@ -2273,7 +2342,7 @@ print_tokens_gff3 (Filestring_T fp, List_T tokens) {
 
   for (p = tokens; p != NULL; p = List_next(p)) {
     token = (char *) List_head(p);
-    FREE(token);
+    FREE_OUT(token);
   }
 
   return;
@@ -2285,7 +2354,7 @@ push_token (List_T tokens, char *token) {
 
   copy = (char *) MALLOC_OUT((strlen(token)+1) * sizeof(char));
   strcpy(copy,token);
-  return List_push(tokens,(void *) copy);
+  return List_push_out(tokens,(void *) copy);
 }
 
 
@@ -2405,40 +2474,45 @@ print_gff3_exon (Filestring_T fp, int exonno, int pathnum, char *sourcename, cha
 		 int exon_querystart, int exon_queryend, bool watsonp, int cdna_direction,
 		 int pctidentity) {
 
-  FPRINTF(fp,"%s\t",chrstring);	/* 1: seqid */
-  FPRINTF(fp,"%s\t",sourcename);	/* 2: source */
-  FPRINTF(fp,"exon\t");		/* 3: type */
-  if (exon_genomestart < exon_genomeend) {
-    FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
-  } else {
-    FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
-  }
-  FPRINTF(fp,"%d\t",pctidentity);	/* 6: score */
+  if (exon_genomestart == exon_genomeend) {
+    /* Due to a query skip, so don't print */
 
-  if (watsonp == true) {
-    if (cdna_direction >= 0) {
-      FPRINTF(fp,"+\t");
+  } else {
+    FPRINTF(fp,"%s\t",chrstring);	/* 1: seqid */
+    FPRINTF(fp,"%s\t",sourcename);	/* 2: source */
+    FPRINTF(fp,"exon\t");		/* 3: type */
+    if (exon_genomestart < exon_genomeend) {
+      FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
     } else {
-      FPRINTF(fp,"-\t");
+      FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
     }
-  } else {
-    if (cdna_direction >= 0) {
-      FPRINTF(fp,"-\t");		/* 7: strand */
+    FPRINTF(fp,"%d\t",pctidentity);	/* 6: score */
+
+    if (watsonp == true) {
+      if (cdna_direction >= 0) {
+	FPRINTF(fp,"+\t");
+      } else {
+	FPRINTF(fp,"-\t");
+      }
     } else {
-      FPRINTF(fp,"+\t");
+      if (cdna_direction >= 0) {
+	FPRINTF(fp,"-\t");		/* 7: strand */
+      } else {
+	FPRINTF(fp,"+\t");
+      }
     }
-  }
 
-  FPRINTF(fp,".\t");		/* 8: phase */
+    FPRINTF(fp,".\t");		/* 8: phase */
 
-  /* 9: features */
-  FPRINTF(fp,"ID=%s.mrna%d.exon%d;",accession,pathnum,exonno);
-  FPRINTF(fp,"Name=%s;",accession);
-  FPRINTF(fp,"Parent=%s.mrna%d;",accession,pathnum);
-  if (cdna_direction >= 0) {
-    FPRINTF(fp,"Target=%s %d %d +\n",accession,exon_querystart,exon_queryend);
-  } else {
-    FPRINTF(fp,"Target=%s %d %d -\n",accession,exon_queryend,exon_querystart);
+    /* 9: features */
+    FPRINTF(fp,"ID=%s.mrna%d.exon%d;",accession,pathnum,exonno);
+    FPRINTF(fp,"Name=%s;",accession);
+    FPRINTF(fp,"Parent=%s.mrna%d;",accession,pathnum);
+    if (cdna_direction >= 0) {
+      FPRINTF(fp,"Target=%s %d %d +\n",accession,exon_querystart,exon_queryend);
+    } else {
+      FPRINTF(fp,"Target=%s %d %d -\n",accession,exon_queryend,exon_querystart);
+    }
   }
 
   return;
@@ -2450,53 +2524,58 @@ print_gff3_cds (Filestring_T fp, int cdsno, int pathnum, char *sourcename, char
 		int cds_querystart, int cds_queryend, bool watsonp, int cdna_direction,
 		int pctidentity, int cds_phase) {
 
-  FPRINTF(fp,"%s\t",chrstring);	/* 1: seqid */
-  FPRINTF(fp,"%s\t",sourcename);	/* 2: source */
-  FPRINTF(fp,"CDS\t");		/* 3: type */
-  if (cds_genomestart < cds_genomeend) {
-    FPRINTF(fp,"%u\t%u\t",cds_genomestart,cds_genomeend); /* 4,5: start, end */
+  if (cds_genomestart == cds_genomeend) {
+    /* Due to a query skip, so don't print */
+
   } else {
-    FPRINTF(fp,"%u\t%u\t",cds_genomeend,cds_genomestart); /* 4,5: start, end */
-  }
-  FPRINTF(fp,"%d\t",pctidentity);	/* 6: score */
+    FPRINTF(fp,"%s\t",chrstring);	/* 1: seqid */
+    FPRINTF(fp,"%s\t",sourcename);	/* 2: source */
+    FPRINTF(fp,"CDS\t");		/* 3: type */
+    if (cds_genomestart < cds_genomeend) {
+      FPRINTF(fp,"%u\t%u\t",cds_genomestart,cds_genomeend); /* 4,5: start, end */
+    } else {
+      FPRINTF(fp,"%u\t%u\t",cds_genomeend,cds_genomestart); /* 4,5: start, end */
+    }
+    FPRINTF(fp,"%d\t",pctidentity);	/* 6: score */
 
-  if (watsonp == true) {
-    if (cdna_direction >= 0) {
-      FPRINTF(fp,"+\t");
+    if (watsonp == true) {
+      if (cdna_direction >= 0) {
+	FPRINTF(fp,"+\t");
+      } else {
+	FPRINTF(fp,"-\t");
+      }
     } else {
-      FPRINTF(fp,"-\t");
+      if (cdna_direction >= 0) {
+	FPRINTF(fp,"-\t");		/* 7: strand */
+      } else {
+	FPRINTF(fp,"+\t");
+      }
     }
-  } else {
+
+    if (gff3_phase_swap_p == true && cds_phase > 0) {
+      /* Some analysis programs want phase in gff3 to be different */
+      FPRINTF(fp,"%d\t",3 - cds_phase);	/* 8: phase */
+    } else {
+      /* This appears to be the specification: a phase of 0 indicates
+	 that the next codon begins at the first base of the region
+	 described by the current line, a phase of 1 indicates that the
+	 next codon begins at the second base of this region, and a
+	 phase of 2 indicates that the codon begins at the third base of
+	 this region. */
+      FPRINTF(fp,"%d\t",cds_phase);	/* 8: phase */
+    }
+
+    /* 9: features */
+    FPRINTF(fp,"ID=%s.mrna%d.cds%d;",accession,pathnum,cdsno);
+    FPRINTF(fp,"Name=%s;",accession);
+    FPRINTF(fp,"Parent=%s.mrna%d;",accession,pathnum);
     if (cdna_direction >= 0) {
-      FPRINTF(fp,"-\t");		/* 7: strand */
+      FPRINTF(fp,"Target=%s %d %d +\n",accession,cds_querystart,cds_queryend);
     } else {
-      FPRINTF(fp,"+\t");
+      FPRINTF(fp,"Target=%s %d %d -\n",accession,cds_queryend,cds_querystart);
     }
   }
 
-  if (gff3_phase_swap_p == true && cds_phase > 0) {
-    /* Some analysis programs want phase in gff3 to be different */
-    FPRINTF(fp,"%d\t",3 - cds_phase);	/* 8: phase */
-  } else {
-    /* This appears to be the specification: a phase of 0 indicates
-       that the next codon begins at the first base of the region
-       described by the current line, a phase of 1 indicates that the
-       next codon begins at the second base of this region, and a
-       phase of 2 indicates that the codon begins at the third base of
-       this region. */
-    FPRINTF(fp,"%d\t",cds_phase);	/* 8: phase */
-  }
-
-  /* 9: features */
-  FPRINTF(fp,"ID=%s.mrna%d.cds%d;",accession,pathnum,cdsno);
-  FPRINTF(fp,"Name=%s;",accession);
-  FPRINTF(fp,"Parent=%s.mrna%d;",accession,pathnum);
-  if (cdna_direction >= 0) {
-    FPRINTF(fp,"Target=%s %d %d +\n",accession,cds_querystart,cds_queryend);
-  } else {
-    FPRINTF(fp,"Target=%s %d %d -\n",accession,cds_queryend,cds_querystart);
-  }
-
   return;
 }
 
@@ -2507,31 +2586,36 @@ print_gff3_cdna_match (Filestring_T fp, int pathnum, char *sourcename, char *acc
 		       int exon_querystart, int exon_queryend, bool watsonp,
 		       int pctidentity, List_T tokens) {
   
-  FPRINTF(fp,"%s\t",chrstring);	/* 1: seqid */
-  FPRINTF(fp,"%s\t",sourcename);	/* 2: source */
-  FPRINTF(fp,"cDNA_match\t");		/* 3: type */
-  if (exon_genomestart < exon_genomeend) {
-    FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
-  } else {
-    FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
-  }
-  FPRINTF(fp,"%d\t",pctidentity);	/* 6: score */
+  if (exon_genomestart == exon_genomeend) {
+    /* Due to a query skip, so don't print */
 
-  /* 7: strand */
-  if (watsonp == true) {
-    FPRINTF(fp,"+\t");
   } else {
-    FPRINTF(fp,"-\t");
-  }
+    FPRINTF(fp,"%s\t",chrstring);	/* 1: seqid */
+    FPRINTF(fp,"%s\t",sourcename);	/* 2: source */
+    FPRINTF(fp,"cDNA_match\t");		/* 3: type */
+    if (exon_genomestart < exon_genomeend) {
+      FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
+    } else {
+      FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
+    }
+    FPRINTF(fp,"%d\t",pctidentity);	/* 6: score */
 
-  FPRINTF(fp,".\t");		/* 8: phase */
+    /* 7: strand */
+    if (watsonp == true) {
+      FPRINTF(fp,"+\t");
+    } else {
+      FPRINTF(fp,"-\t");
+    }
 
-  /* 9: features */
-  FPRINTF(fp,"ID=%s.path%d;",accession,pathnum);
-  FPRINTF(fp,"Name=%s;",accession);
-  FPRINTF(fp,"Target=%s %d %d;Gap=",accession,exon_querystart,exon_queryend);
-  print_tokens_gff3(fp,tokens);
-  PUTC('\n',fp);
+    FPRINTF(fp,".\t");		/* 8: phase */
+
+    /* 9: features */
+    FPRINTF(fp,"ID=%s.path%d;",accession,pathnum);
+    FPRINTF(fp,"Name=%s;",accession);
+    FPRINTF(fp,"Target=%s %d %d;Gap=",accession,exon_querystart,exon_queryend);
+    print_tokens_gff3(fp,tokens);
+    PUTC('\n',fp);
+  }
 
   return;
 }
@@ -2560,54 +2644,61 @@ print_gff3_est_match (Filestring_T fp, int pathnum, T start, T end,
   int den;
   int querypos1, querypos2;
 
-  FPRINTF(fp,"%s\t",chrstring);	/* 1: seqid */
-  FPRINTF(fp,"%s\t",sourcename);	/* 2: source */
-  FPRINTF(fp,"EST_match\t");	/* 3: type */
-  if (exon_genomestart < exon_genomeend) {
-    FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
+  if (exon_genomestart == exon_genomeend) {
+    /* Due to a query skip, so don't print */
+
   } else {
-    FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
-  }
-  FPRINTF(fp,"%d\t",pctidentity);	/* 6: score */
+    FPRINTF(fp,"%s\t",chrstring);	/* 1: seqid */
+    FPRINTF(fp,"%s\t",sourcename);	/* 2: source */
+    FPRINTF(fp,"EST_match\t");	/* 3: type */
+    if (exon_genomestart < exon_genomeend) {
+      FPRINTF(fp,"%u\t%u\t",exon_genomestart,exon_genomeend); /* 4,5: start, end */
+    } else {
+      FPRINTF(fp,"%u\t%u\t",exon_genomeend,exon_genomestart); /* 4,5: start, end */
+    }
+    FPRINTF(fp,"%d\t",pctidentity);	/* 6: score */
 
-  /* 7: strand */
-  feature_strand = watsonp ? cdna_direction : -cdna_direction;
-  FPRINTF(fp,"%c\t",strand_char(feature_strand));
+    /* 7: strand */
+    feature_strand = watsonp ? cdna_direction : -cdna_direction;
+    FPRINTF(fp,"%c\t",strand_char(feature_strand));
 
-  FPRINTF(fp,".\t");		/* 8: phase */
+    FPRINTF(fp,".\t");		/* 8: phase */
 
-  /* 9: features */
-  FPRINTF(fp,"ID=%s.path%d;",accession,pathnum);
-  FPRINTF(fp,"Name=%s;",accession);
-  target_strand = cdna_direction != 0 ? cdna_direction : (watsonp ? 1 : -1);
-  FPRINTF(fp,"Target=%s %d %d %c;Gap=",accession,exon_querystart,exon_queryend,
-      strand_char(target_strand));
-  print_tokens_gff3(fp,tokens);
+    /* 9: features */
+    FPRINTF(fp,"ID=%s.path%d;",accession,pathnum);
+    FPRINTF(fp,"Name=%s;",accession);
+    target_strand = cdna_direction != 0 ? cdna_direction : (watsonp ? 1 : -1);
+    FPRINTF(fp,"Target=%s %d %d %c;Gap=",accession,exon_querystart,exon_queryend,
+	    strand_char(target_strand));
+    print_tokens_gff3(fp,tokens);
 
-  querypos1 = start->querypos;
-  querypos2 = end->querypos;
+    querypos1 = start->querypos;
+    querypos2 = end->querypos;
 
 #ifdef PMAP
-  coverage = (double) (querypos2 - querypos1 + 1)/(double) (3*(querylength_given + skiplength));
-  /* Can have coverage greater than given querylength because of added '*' at end */
-  if (coverage > 1.0) {
-    coverage = 1.0;
-  }
+    coverage = (double) (querypos2 - querypos1 + 1)/(double) (3*(querylength_given + skiplength));
+    /* Can have coverage greater than given querylength because of added '*' at end */
+    if (coverage > 1.0) {
+      coverage = 1.0;
+    }
 #else
-  coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given + skiplength);
+    coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given + skiplength);
 #endif
-  FPRINTF(fp,";coverage=%.1f",((double) rint(1000.0*coverage))/10.0);
+    FPRINTF(fp,";coverage=%.1f",((double) rint(1000.0*coverage))/10.0);
 
-  if ((den = matches + mismatches + qindels + tindels) == 0) {
-    fracidentity = 1.0;
-  } else {
-    fracidentity = (double) matches/(double) den;
+    if ((den = matches + mismatches + qindels + tindels) == 0) {
+      fracidentity = 1.0;
+    } else {
+      fracidentity = (double) matches/(double) den;
+    }
+    FPRINTF(fp,";identity=%.1f",((double) rint(1000.0*fracidentity))/10.0);
+    FPRINTF(fp,";matches=%d;mismatches=%d;indels=%d;unknowns=%d",
+	    matches,mismatches,qindels+tindels,unknowns);
+
+    PUTC('\n',fp);
   }
-  FPRINTF(fp,";identity=%.1f",((double) rint(1000.0*fracidentity))/10.0);
-  FPRINTF(fp,";matches=%d;mismatches=%d;indels=%d;unknowns=%d",
-	  matches,mismatches,qindels+tindels,unknowns);
 
-  PUTC('\n',fp);
+  return;
 }
 
 
@@ -2672,7 +2763,7 @@ print_gff3_exons_forward (Filestring_T fp, struct T *pairs, int npairs, int path
 	    /* ++exonno; */
 	    print_gff3_cdna_match(fp,pathnum,sourcename,accession,chrstring,exon_genomestart,exon_genomeend,
 				  exon_querystart,exon_queryend,watsonp,pctidentity,tokens);
-	    List_free(&tokens);
+	    List_free_out(&tokens);
 	  }
 	}
 
@@ -2842,7 +2933,7 @@ print_gff3_exons_forward (Filestring_T fp, struct T *pairs, int npairs, int path
       print_gff3_cdna_match(fp,pathnum,sourcename,accession,chrstring,exon_genomestart,exon_genomeend,
 			    exon_querystart,exon_queryend,watsonp,pctidentity,tokens);
     }
-    List_free(&tokens);
+    List_free_out(&tokens);
   }
 
   return;
@@ -3225,7 +3316,7 @@ Pair_circularpos (int *alias, struct T *pairs, int npairs, Chrpos_T chrlength, b
   Chrpos_T low, high;
   struct T *ptr;
   int i, ninsertions, querypos;
-  /* Chrpos_T chrhigh; */
+  /* Univcoord_T chrhigh; */
 
   debug12(Pair_dump_array(pairs,npairs,true));
 
@@ -3834,8 +3925,10 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  /* Finish rest of this line */
 	  for (querypos = exon_queryend; querypos < querylength; querypos++) {
@@ -3910,8 +4003,10 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  /* Finish rest of this line */
 	  for (querypos = exon_queryend + nindels; querypos < querylength; querypos++) {
@@ -4089,108 +4184,49 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs_querydir, int npairs, int nse
 #endif	/* PMAP */
 
 
-/* Taken from NCBI Blast 2.2.29, algo/blast/core/blast_stat.c */
-/* Karlin-Altschul formula: m n exp(-lambda * S + log k) = k m n exp(-lambda * S) */
-/* Also in substring.c */
-
-static double
-blast_evalue (int alignlength, int nmismatches) {
-  double k = 0.1;
-  double lambda = 1.58;		/* For a +1, -1 scoring scheme */
-  double score;
-  
-  score = (double) ((alignlength - nmismatches) /* scored as +1 */ - nmismatches /* scored as -1 */);
-
-  return k * (double) alignlength * genomelength * exp(-lambda * score);
-}
-
-
-#ifdef GSNAP
-static double
-blast_bitscore (int alignlength, int nmismatches) {
-  double k = 0.1;
-  double lambda = 1.58;		/* For a +1, -1 scoring scheme */
-  double score;
-  
-  score = (double) ((alignlength - nmismatches) /* scored as +1 */ - nmismatches /* scored as -1 */);
-  return (score * lambda - log(k)) / log(2.0);
-}
-
-
-static void
-print_m8_line (Filestring_T fp, int exon_querystart, int exon_queryend,
-	       char *chr, Chrpos_T exon_genomestart, Chrpos_T exon_genomeend,
-	       int nmismatches_bothdiff, Shortread_T headerseq, char *acc_suffix) {
-  double identity;
-  int alignlength_trim;
-
-  FPRINTF(fp,"%s%s",Shortread_accession(headerseq),acc_suffix); /* field 0: accession */
-
-  FPRINTF(fp,"\t%s",chr);	/* field 1: chr */
-
-  /* field 2: identity */
-  alignlength_trim = exon_queryend - exon_querystart;
-  identity = (double) (alignlength_trim - nmismatches_bothdiff)/(double) alignlength_trim;
-  FPRINTF(fp,"\t%.1f",100.0*identity);
-
-
-  FPRINTF(fp,"\t%d",alignlength_trim); /* field 3: query length */
-
-  FPRINTF(fp,"\t%d",nmismatches_bothdiff); /* field 4: nmismatches */
-
-  FPRINTF(fp,"\t0");		/* field 5: gap openings */
-
-  /* fields 6 and 7: query start and end */
-  FPRINTF(fp,"\t%d\t%d",exon_querystart,exon_queryend);
-
-  /* fields 8 and 9: chr start and end */
-  FPRINTF(fp,"\t%u\t%u",exon_genomestart,exon_genomeend);
-
-  /* field 10: E value */
-  FPRINTF(fp,"\t%.2g",blast_evalue(alignlength_trim,nmismatches_bothdiff));
-
- /* field 11: bit score */
-  FPRINTF(fp,"\t%.1f",blast_bitscore(alignlength_trim,nmismatches_bothdiff));
-  
-  FPRINTF(fp,"\n");
-
-  return;
-}
-
 
+#ifndef PMAP
 void
-Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool invertedp,
-	       Chrnum_T chrnum, Shortread_T queryseq, Shortread_T headerseq,
-	       char *acc_suffix, Univ_IIT_T chromosome_iit) {
+Pair_print_bedpe (Filestring_T fp, struct T *pairarray, int npairs,
+		  Chrnum_T chrnum, int querylength, bool watsonp, int cdna_direction,
+		  Univ_IIT_T chromosome_iit) {
   bool in_exon = true;
-  struct T *pairs, *ptr, *ptr0, *this = NULL;
-  int exon_querystart = -1, exon_queryend;
+  struct T *ptr, *ptr0, *this = NULL, *start;
   Chrpos_T exon_genomestart = -1U, exon_genomeend;
-  int nmismatches_refdiff, nmismatches_bothdiff, nmatches, i;
-  int last_querypos = -1;
+  int nindels, i;
+  /* int last_querypos = -1; */
   Chrpos_T last_genomepos = -1U;
-  char *chr;
-  int querylength;
+  char *chr, strand;
   bool allocp;
 
-  querylength = Shortread_fulllength(queryseq);
 
+#if 0
   if (invertedp == true) {
     pairs = invert_and_revcomp_path_and_coords(pairs_querydir,npairs,querylength);
+    watsonp = !watsonp;
+    cdna_direction = -cdna_direction;
   } else {
     pairs = pairs_querydir;
   }
+#endif
 
 
   chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
+  if (watsonp == true) {
+    strand = '+';
+  } else {
+    strand = '-';
+  }
 
-  ptr = pairs;
-  exon_querystart = ptr->querypos + 1;
+
+  ptr = pairarray;
+  /* exon_querystart = ptr->querypos + 1; */
   exon_genomestart = ptr->genomepos + 1;
-  nmismatches_refdiff = nmismatches_bothdiff = nmatches = 0;
+
 
   i = 0;
   while (i < npairs) {
+    /* prev = this; */
     this = ptr++;
     i++;
 
@@ -4201,14 +4237,9 @@ Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool inver
 	while (ptr0->gapp) {
 	  ptr0++;
 	}
-	exon_queryend = last_querypos + 1;
+	/* exon_queryend = last_querypos + 1; */
 	exon_genomeend = last_genomepos + 1;
 
-	print_m8_line(fp,exon_querystart,exon_queryend,chr,exon_genomestart,exon_genomeend,
-		      nmismatches_bothdiff,headerseq,acc_suffix);
-
-	nmismatches_refdiff = nmismatches_bothdiff = nmatches = 0;
-
 	in_exon = false;
       }
     } else if (this->comp == INTRONGAP_COMP) {
@@ -4219,24 +4250,286 @@ Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool inver
 	 SHORTGAP_COMP, or MISMATCH_COMP */
       if (in_exon == false) {
 	/* SPLICE CONTINUATION */
-	exon_querystart = this->querypos + 1;
+	/* exon_querystart = this->querypos + 1; */
 	exon_genomestart = this->genomepos + 1;
 
 	in_exon = true;
+	if (strand == '+') {
+	  FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomeend-1,exon_genomeend);
+	  FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomestart-1,exon_genomestart);
+	  FPRINTF(fp,"DELETION\t0\t");
+	  FPRINTF(fp,"+\t+\t");
+	  FPRINTF(fp,"%d\n",exon_genomestart - exon_genomeend - 1);
+	} else {
+	  FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomestart-1,exon_genomestart);
+	  FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomeend-1,exon_genomeend);
+	  FPRINTF(fp,"DELETION\t0\t");
+	  FPRINTF(fp,"+\t+\t");
+	  FPRINTF(fp,"%d\n",exon_genomeend - exon_genomestart - 1);
+	}
       }
+
       if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
 	if (this->genome == ' ') {
 	  /* INSERTION */
-	  exon_queryend = last_querypos + 1;
+	  /* exon_queryend = last_querypos + 1; */
 	  exon_genomeend = last_genomepos + 1;
 
 	  /* indel_pos = this->querypos; */
+	  start = this;
+	  nindels = 0;
 	  while (i < npairs && this->gapp == false && this->genome == ' ') {
+	    nindels++;
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
+
+	  this = ptr;
+	  /* exon_querystart = this->querypos + 1; */
+	  exon_genomestart = this->genomepos + 1;
+
+	  if (strand == '+') {
+	    FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomeend-1,exon_genomeend);
+	    FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomestart-1,exon_genomestart);
+	    FPRINTF(fp,"INSERTION\t0\t");
+	    FPRINTF(fp,"+\t+\t");
+	    while (start < this) {
+	      FPRINTF(fp,"%c",start->cdna);
+	      start++;
+	    }
+	  } else {
+	    FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomestart-1,exon_genomestart);
+	    FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomeend-1,exon_genomeend);
+	    FPRINTF(fp,"INSERTION\t0\t");
+	    FPRINTF(fp,"+\t+\t");
+	    while (start < this) {
+	      FPRINTF(fp,"%c",complCode[(int) start->cdna]);
+	      start++;
+	    }
+	  }
+	  FPRINTF(fp,"\n");
+
+	} else if (this->cdna == ' ') {
+	  /* DELETION */
+	  /* exon_queryend = last_querypos + 1; */
+	  exon_genomeend = last_genomepos + 1;
+
+	  /* indel_pos = this->querypos; */
+	  nindels = 0;
+	  while (i < npairs && this->gapp == false && this->cdna == ' ') {
+	    nindels++;
+	    this = ptr++;
+	    i++;
+	  }
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
+
+	  this = ptr;
+	  /* exon_querystart = this->querypos + 1; */
+	  exon_genomestart = this->genomepos + 1;
+
+	  if (strand == '+') {
+	    FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomeend-1,exon_genomeend);
+	    FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomestart-1,exon_genomestart);
+	  } else {
+	    FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomestart-1,exon_genomestart);
+	    FPRINTF(fp,"%s\t%u\t%u\t",chr,exon_genomeend-1,exon_genomeend);
+	  }
+	  FPRINTF(fp,"DELETION\t0\t");
+	  FPRINTF(fp,"+\t+\t");
+	  FPRINTF(fp,"%d\n",nindels);
+
+	} else {
+	  fprintf(stderr,"Error at %c%c%c\n",this->genome,this->comp,this->cdna);
+	  exit(9);
+	}
+
+      } else {
+	/* Match or mismatch */
+      }
+    }
+
+#if 0
+    if (this->cdna != ' ') {
+      last_querypos = this->querypos;
+    }
+#endif
+    if (this->genome != ' ') {
+      last_genomepos = this->genomepos;
+    }
+  }
+
+  if (allocp) {
+    FREE(chr);
+  }
+
+#if 0
+  if (invertedp == true) {
+    FREE(pairs);
+  }
+#endif
+
+  return;
+}
+#endif
+
+
+/* Taken from NCBI Blast 2.2.29, algo/blast/core/blast_stat.c */
+/* Karlin-Altschul formula: m n exp(-lambda * S + log k) = k m n exp(-lambda * S) */
+/* Also in substring.c */
+
+static double
+blast_evalue (int alignlength, int nmismatches) {
+  double k = 0.1;
+  double lambda = 1.58;		/* For a +1, -1 scoring scheme */
+  double score;
+  
+  score = (double) ((alignlength - nmismatches) /* scored as +1 */ - nmismatches /* scored as -1 */);
+
+  return k * (double) alignlength * genomelength * exp(-lambda * score);
+}
+
+
+#ifdef GSNAP
+static double
+blast_bitscore (int alignlength, int nmismatches) {
+  double k = 0.1;
+  double lambda = 1.58;		/* For a +1, -1 scoring scheme */
+  double score;
+  
+  score = (double) ((alignlength - nmismatches) /* scored as +1 */ - nmismatches /* scored as -1 */);
+  return (score * lambda - log(k)) / log(2.0);
+}
+
+
+static void
+print_m8_line (Filestring_T fp, int exon_querystart, int exon_queryend,
+	       char *chr, Chrpos_T exon_genomestart, Chrpos_T exon_genomeend,
+	       int nmismatches_bothdiff, Shortread_T headerseq, char *acc_suffix) {
+  double identity;
+  int alignlength_trim;
+
+  FPRINTF(fp,"%s%s",Shortread_accession(headerseq),acc_suffix); /* field 0: accession */
+
+  FPRINTF(fp,"\t%s",chr);	/* field 1: chr */
+
+  /* field 2: identity */
+  alignlength_trim = exon_queryend - exon_querystart;
+  identity = (double) (alignlength_trim - nmismatches_bothdiff)/(double) alignlength_trim;
+  FPRINTF(fp,"\t%.1f",100.0*identity);
+
+
+  FPRINTF(fp,"\t%d",alignlength_trim); /* field 3: query length */
+
+  FPRINTF(fp,"\t%d",nmismatches_bothdiff); /* field 4: nmismatches */
+
+  FPRINTF(fp,"\t0");		/* field 5: gap openings */
+
+  /* fields 6 and 7: query start and end */
+  FPRINTF(fp,"\t%d\t%d",exon_querystart,exon_queryend);
+
+  /* fields 8 and 9: chr start and end */
+  FPRINTF(fp,"\t%u\t%u",exon_genomestart,exon_genomeend);
+
+  /* field 10: E value */
+  FPRINTF(fp,"\t%.2g",blast_evalue(alignlength_trim,nmismatches_bothdiff));
+
+ /* field 11: bit score */
+  FPRINTF(fp,"\t%.1f",blast_bitscore(alignlength_trim,nmismatches_bothdiff));
+  
+  FPRINTF(fp,"\n");
+
+  return;
+}
+
+
+void
+Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool invertedp,
+	       Chrnum_T chrnum, Shortread_T queryseq, Shortread_T headerseq,
+	       char *acc_suffix, Univ_IIT_T chromosome_iit) {
+  bool in_exon = true;
+  struct T *pairs, *ptr, *ptr0, *this = NULL;
+  int exon_querystart = -1, exon_queryend;
+  Chrpos_T exon_genomestart = -1U, exon_genomeend;
+  int nmismatches_refdiff, nmismatches_bothdiff, nmatches, i;
+  int last_querypos = -1;
+  Chrpos_T last_genomepos = -1U;
+  char *chr;
+  int querylength;
+  bool allocp;
+
+  querylength = Shortread_fulllength(queryseq);
+
+  if (invertedp == true) {
+    pairs = invert_and_revcomp_path_and_coords(pairs_querydir,npairs,querylength);
+  } else {
+    pairs = pairs_querydir;
+  }
+
+
+  chr = Univ_IIT_label(chromosome_iit,chrnum,&allocp);
+
+  ptr = pairs;
+  exon_querystart = ptr->querypos + 1;
+  exon_genomestart = ptr->genomepos + 1;
+  nmismatches_refdiff = nmismatches_bothdiff = nmatches = 0;
+
+  i = 0;
+  while (i < npairs) {
+    this = ptr++;
+    i++;
+
+    if (this->gapp) {
+      if (in_exon == true) {
+	/* SPLICE START */
+	ptr0 = ptr;
+	while (ptr0->gapp) {
+	  ptr0++;
+	}
+	exon_queryend = last_querypos + 1;
+	exon_genomeend = last_genomepos + 1;
+
+	print_m8_line(fp,exon_querystart,exon_queryend,chr,exon_genomestart,exon_genomeend,
+		      nmismatches_bothdiff,headerseq,acc_suffix);
+
+	nmismatches_refdiff = nmismatches_bothdiff = nmatches = 0;
+
+	in_exon = false;
+      }
+    } else if (this->comp == INTRONGAP_COMP) {
+      /* May want to print dinucleotides */
+
+    } else {
+      /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP, 
+	 SHORTGAP_COMP, or MISMATCH_COMP */
+      if (in_exon == false) {
+	/* SPLICE CONTINUATION */
+	exon_querystart = this->querypos + 1;
+	exon_genomestart = this->genomepos + 1;
+
+	in_exon = true;
+      }
+      if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
+	if (this->genome == ' ') {
+	  /* INSERTION */
+	  exon_queryend = last_querypos + 1;
+	  exon_genomeend = last_genomepos + 1;
+
+	  /* indel_pos = this->querypos; */
+	  while (i < npairs && this->gapp == false && this->genome == ' ') {
+	    this = ptr++;
+	    i++;
+	  }
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  this = ptr;
 	  exon_querystart = this->querypos + 1;
@@ -4253,8 +4546,10 @@ Pair_print_m8 (Filestring_T fp, struct T *pairs_querydir, int npairs, bool inver
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  /* Finish rest of this line */
 	  print_m8_line(fp,exon_querystart,exon_queryend,chr,exon_genomestart,exon_genomeend,
@@ -4371,8 +4666,10 @@ Pair_min_evalue (struct T *pairarray, int npairs) {
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  this = ptr;
 	  exon_querystart = this->querypos + 1;
@@ -4387,8 +4684,10 @@ Pair_min_evalue (struct T *pairarray, int npairs) {
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  /* Finish rest of this line */
 	  alignlength_trim = exon_queryend - exon_querystart;
@@ -4550,8 +4849,10 @@ Pair_guess_cdna_direction_array (int *sensedir, struct T *pairs_querydir, int np
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  splice_site_probs(&sense_prob,&antisense_prob,
 			    prev_splicesitep,splicesitep,chroffset,
@@ -4570,8 +4871,10 @@ Pair_guess_cdna_direction_array (int *sensedir, struct T *pairs_querydir, int np
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  splice_site_probs(&sense_prob,&antisense_prob,
 			    prev_splicesitep,splicesitep,chroffset,
@@ -4744,8 +5047,10 @@ Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons,
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  (*nindelbreaks) += 1;
 	  nsegments++;
@@ -4757,8 +5062,10 @@ Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons,
 	    this = ptr++;
 	    i++;
 	  }
-	  ptr--;
-	  i--;
+	  if (i < npairs) {
+	    ptr--;
+	    i--;
+	  }
 
 	  (*nindelbreaks) += 1;
 	  nsegments++;
@@ -4785,112 +5092,27 @@ Pair_gsnap_nsegments (int *total_nmismatches, int *total_nindels, int *nintrons,
  *   SAM
  ************************************************************************/
 
-/* Derived from print_tokens_gff3 */
+/* Modeled after Shortread_print_chopped */
 static void
-print_tokens_sam (Filestring_T fp, List_T tokens) {
-  List_T p;
-  char *token;
-  
-  for (p = tokens; p != NULL; p = List_next(p)) {
-    token = (char *) List_head(p);
-    FPRINTF(fp,"%s",token);
-    /* FREE(token); -- Now freed within Stage3end_free or Stage3_free */
-  }
+print_chopped (Filestring_T fp, char *contents, int querylength,
+	       int hardclip_start, int hardclip_end) {
+  int i;
 
+  for (i = hardclip_start; i < querylength - hardclip_end; i++) {
+    PUTC(contents[i],fp);
+  }
   return;
 }
 
-#if 0
+/* Differs from Shortread version, in that hardclip_high and hardclip_low are not reversed */
 static void
-print_tokens_stdout (List_T tokens) {
-  List_T p;
-  char *token;
-  
-  for (p = tokens; p != NULL; p = List_next(p)) {
-    token = (char *) List_head(p);
-    printf("%s",token);
-  }
+print_chopped_revcomp (Filestring_T fp, char *contents, int querylength,
+		       int hardclip_start, int hardclip_end) {
+  int i;
 
-  return;
-}
-#endif
-
-
-/* Derived from print_tokens_gff3 */
-int
-Pair_tokens_cigarlength (List_T tokens) {
-  int length = 0, tokenlength;
-  List_T p;
-  char *token;
-  char type;
-  
-  for (p = tokens; p != NULL; p = List_next(p)) {
-    token = (char *) List_head(p);
-    type = token[strlen(token)-1];
-    /* Should include 'H', but that gets added according to hardclip_low and hardclip_high */
-    if (type == 'S' || type == 'I' || type == 'M' || type == 'X' || type == '=') {
-      sscanf(token,"%d",&tokenlength);
-      length += tokenlength;
-    }
-  }
-
-  return length;
-}
-
-
-
-/* Only for GMAP program */
-static unsigned int
-compute_sam_flag_nomate (int npaths, bool first_read_p, bool watsonp, bool sam_paired_p) {
-  unsigned int flag = 0U;
-
-  if (sam_paired_p == true) {
-    flag |= PAIRED_READ;
-    if (first_read_p == true) {
-      flag |= FIRST_READ_P;
-    } else {
-      flag |= SECOND_READ_P;
-    }
-  }
-
-  if (npaths == 0) {
-    flag |= QUERY_UNMAPPED;
-  } else if (watsonp == false) {
-    flag |= QUERY_MINUSP;
-  }
-
-#if 0
-  /* Will let external program decide what is primary */
-  if (pathnum > 1) {
-    flag |= NOT_PRIMARY;
-  }
-#endif
-
-  return flag;
-}
-
-
-/* Modeled after Shortread_print_chopped */
-static void
-print_chopped (Filestring_T fp, char *contents, int querylength,
-	       int hardclip_start, int hardclip_end) {
-  int i;
-
-  for (i = hardclip_start; i < querylength - hardclip_end; i++) {
-    PUTC(contents[i],fp);
-  }
-  return;
-}
-
-/* Differs from Shortread version, in that hardclip_high and hardclip_low are not reversed */
-static void
-print_chopped_revcomp (Filestring_T fp, char *contents, int querylength,
-		       int hardclip_start, int hardclip_end) {
-  int i;
-
-  for (i = querylength - 1 - hardclip_end; i >= hardclip_start; --i) {
-    PUTC(complCode[(int) contents[i]],fp);
-  }
+  for (i = querylength - 1 - hardclip_end; i >= hardclip_start; --i) {
+    PUTC(complCode[(int) contents[i]],fp);
+  }
   return;
 }
 
@@ -5022,272 +5244,111 @@ print_quality_revcomp (Filestring_T fp, char *quality, int querylength,
 }
 
 
+/* Only for GMAP program */
+static unsigned int
+compute_sam_flag_nomate (int npaths, bool first_read_p, bool watsonp, bool sam_paired_p) {
+  unsigned int flag = 0U;
+
+  if (sam_paired_p == true) {
+    flag |= PAIRED_READ;
+    if (first_read_p == true) {
+      flag |= FIRST_READ_P;
+    } else {
+      flag |= SECOND_READ_P;
+    }
+  }
+
+  if (npaths == 0) {
+    flag |= QUERY_UNMAPPED;
+  } else if (watsonp == false) {
+    flag |= QUERY_MINUSP;
+  }
+
 #if 0
-static int
-sensedir_from_cdna_direction (int cdna_direction) {
-  if (cdna_direction > 0) {
-    return SENSE_FORWARD;
-  } else if (cdna_direction < 0) {
-    return SENSE_ANTI;
-  } else {
-    return SENSE_NULL;
+  /* Will let external program decide what is primary */
+  if (pathnum > 1) {
+    flag |= NOT_PRIMARY;
   }
-}
 #endif
 
+  return flag;
+}
 
-/* Derived from print_gff3_cdna_match */
-/* Assumes pairarray has been hard clipped already */
-static void
-print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, char *acc2, char *chrstring,
-		bool watsonp, int sensedir, List_T cigar_tokens, List_T md_tokens,
-		int nmismatches_refdiff, int nmismatches_bothdiff, int nindels,
-		bool intronp, char *queryseq_ptr, char *quality_string,
-		int hardclip_start, int hardclip_end, int querylength, Chimera_T chimera, int quality_shift,
-		int pathnum, int npaths_primary, int npaths_altloc, int absmq_score, int second_absmq, unsigned int flag,
-		Univ_IIT_T chromosome_iit, Chrpos_T chrpos, Chrpos_T chrlength,
-#ifdef GSNAP
-		Shortread_T queryseq, Resulttype_T resulttype, int pair_mapq_score, int end_mapq_score,
-		char *mate_chrstring, Chrpos_T mate_chrpos, Chrpos_T mate_chrlength, int mate_sensedir, int pairedlength,
-		bool invertp,
-#else
-		int mapq_score,
-#endif
-		double min_evalue, char *sam_read_group_id, bool merged_overlap_p, bool sarrayp) {
 
-#if 0
-  /* Should already be checked when Stage3_T or Stage3end_T object was created */
-  if (cigar_action == CIGAR_ACTION_IGNORE) {
-    /* Don't check */
-  } else if (Pair_tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end == querylength) {
-    /* Okay */
-  } else if (cigar_action == CIGAR_ACTION_WARNING) {
-    fprintf(stderr,"Warning: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
-	    acc1,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
-  } else if (cigar_action == CIGAR_ACTION_NOPRINT) {
-    fprintf(stderr,"Warning: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
-	    acc1,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
-    return;
-  } else {
-    /* CIGAR_ACTION_ABORT */
-    fprintf(stderr,"Error: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
-	    acc1,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
-    abort();
-  }
+
+void
+Pair_print_sam_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
+			  char *quality_string, int querylength, int quality_shift,
+			  bool first_read_p, bool sam_paired_p, char *sam_read_group_id) {
+  unsigned int flag;
+
+#ifdef GSNAP
+  fprintf(stderr,"Unexpected call to Pair_print_sam_nomapping in GSNAP\n");
+  abort();
 #endif
 
-  /* 1. QNAME or Accession */
+  /* 1. QNAME */
   if (acc2 == NULL) {
-    FPRINTF(fp,"%s\t",acc1);
+    FPRINTF(fp,"%s",acc1);
   } else {
-    FPRINTF(fp,"%s,%s\t",acc1,acc2);
+    FPRINTF(fp,"%s,%s",acc1,acc2);
   }
+  
+  /* 2. FLAG */
+  flag = compute_sam_flag_nomate(/*npaths*/0,first_read_p,/*watsonp*/true,sam_paired_p);
+  FPRINTF(fp,"\t%u",flag);
 
-  /* 2. Flags */
-  FPRINTF(fp,"%u\t",flag);
+  /* 3. RNAME: chr */
+  FPRINTF(fp,"\t*");
 
-  /* 3. RNAME or Chrstring */
-  /* 4. POS or Chrlow */
-  /* Taken from GMAP part of SAM_chromosomal_pos */
-  if (chrpos > chrlength) {
-    FPRINTF(fp,"%s\t%u\t",chrstring,chrpos - chrlength /*+ 1U*/);
-  } else {
-    FPRINTF(fp,"%s\t%u\t",chrstring,chrpos /*+ 1U*/);
-  }
+  /* 4. POS: chrpos */
+  FPRINTF(fp,"\t0");
 
-  /* 5. MAPQ or Mapping quality */
-#ifdef GSNAP
-  FPRINTF(fp,"%d\t",pair_mapq_score);
-#else
-  FPRINTF(fp,"%d\t",mapq_score);
-#endif
+  /* 5. MAPQ: Mapping quality */
+  /* Picard says MAPQ should be 0 for an unmapped read */
+  FPRINTF(fp,"\t0");
 
   /* 6. CIGAR */
-  print_tokens_sam(fp,cigar_tokens);
+  FPRINTF(fp,"\t*");
 
   /* 7. MRNM: Mate chr */
   /* 8. MPOS: Mate chrpos */
-#ifdef GSNAP
-  if (mate_chrpos == 0U) {
-    FPRINTF(fp,"\t*\t0");
-  } else if (mate_chrpos > mate_chrlength) {
-    FPRINTF(fp,"\t%s\t%u",mate_chrstring,mate_chrpos - mate_chrlength /* +1U*/);
-  } else {
-    FPRINTF(fp,"\t%s\t%u",mate_chrstring,mate_chrpos /* +1U*/);
-  }
-#else
-  FPRINTF(fp,"\t*\t0");
-#endif
-
   /* 9. ISIZE: Insert size */
-#ifdef GSNAP
-  if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
-    if (watsonp == invertp) {
-      FPRINTF(fp,"\t%d",-pairedlength);
-    } else {
-      FPRINTF(fp,"\t%d",pairedlength);
-    }
-  } else if (mate_chrpos == 0) {
-    FPRINTF(fp,"\t%d",pairedlength);
-  } else if (chrpos < mate_chrpos) {
-    FPRINTF(fp,"\t%d",pairedlength);
-  } else if (chrpos > mate_chrpos) {
-    FPRINTF(fp,"\t%d",-pairedlength);
-  } else if (first_read_p == true) {
-    FPRINTF(fp,"\t%d",pairedlength);
-  } else {
-    FPRINTF(fp,"\t%d",-pairedlength);
-  }
-#else
-  FPRINTF(fp,"\t0");
-#endif
+  FPRINTF(fp,"\t*\t0\t0\t");
 
-  /* 10. SEQ: queryseq and 11. QUAL: quality_scores */
+  /* 10. SEQ: queryseq and 11. QUAL: quality scores */
+  print_chopped(fp,queryseq_ptr,querylength,/*hardclip_start*/0,/*hardclip_end*/0);
   FPRINTF(fp,"\t");
-  if (watsonp == true) {
-    print_chopped(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
-    FPRINTF(fp,"\t");
-    print_quality(fp,quality_string,querylength,hardclip_start,hardclip_end,
-		  quality_shift);
-  } else {
-    print_chopped_revcomp(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
-    FPRINTF(fp,"\t");
-    print_quality_revcomp(fp,quality_string,querylength,hardclip_start,hardclip_end,
-			  quality_shift);
-  }
+  print_quality(fp,quality_string,querylength,/*hardclip_start*/0,/*hardclip_end*/0,
+		quality_shift);
 
   /* 12. TAGS: RG */
   if (sam_read_group_id != NULL) {
     FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
   }
+  
+  /* 12. TAGS: XO */
+  FPRINTF(fp,"\tXO:Z:%s",abbrev);
 
-  /* 12. TAGS: XH and XI */
-  if (hardclip_start > 0 || hardclip_end > 0) {
-    FPRINTF(fp,"\tXH:Z:");
-    if (watsonp == true) {
-      print_chopped_end(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
-    } else {
-      print_chopped_end_revcomp(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
-    }
+  FPRINTF(fp,"\n");
 
-    if (quality_string != NULL) {
-      FPRINTF(fp,"\tXI:Z:");
-      if (watsonp == true) {
-	print_chopped_end_quality(fp,quality_string,querylength,hardclip_start,hardclip_end);
-      } else {
-	print_chopped_end_quality_reverse(fp,quality_string,querylength,hardclip_start,hardclip_end);
-      }
-    }
-  }
+  return;
+}
 
-#ifdef GSNAP
-  if (queryseq != NULL) {
-    /* 12. TAGS: XB */
-    Shortread_print_barcode(fp,queryseq);
 
-    /* 12. TAGS: XP.  Logically should be last in reconstructing a read. */
-    Shortread_print_chop(fp,queryseq,invertp);
-  }
-#endif
-
-  /* 12. TAGS: MD string */
-  FPRINTF(fp,"\tMD:Z:");
-  print_tokens_sam(fp,md_tokens);
-
-  /* 12. TAGS: NH */
-  FPRINTF(fp,"\tNH:i:%d",npaths_primary + npaths_altloc);
-  
-  /* 12. TAGS: HI */
-  FPRINTF(fp,"\tHI:i:%d",pathnum);
-
-  /* 12. TAGS: NM */
-  FPRINTF(fp,"\tNM:i:%d",nmismatches_refdiff + nindels);
-
-  if (snps_p) {
-    /* 12. TAGS: XW and XV */
-    FPRINTF(fp,"\tXW:i:%d",nmismatches_bothdiff);
-    FPRINTF(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
-  }
-
-
-  /* 12. TAGS: SM */
-#ifdef GSNAP
-  FPRINTF(fp,"\tSM:i:%d",end_mapq_score);
-#else
-  FPRINTF(fp,"\tSM:i:%d",40);
-#endif
-
-  /* 12. TAGS: XQ */
-  FPRINTF(fp,"\tXQ:i:%d",absmq_score);
-
-  /* 12. TAGS: X2 */
-  FPRINTF(fp,"\tX2:i:%d",second_absmq);
-
-  /* 12. TAGS: XO */
-  FPRINTF(fp,"\tXO:Z:%s",abbrev);
-
-  /* 12. TAGS: XS */
-#ifdef GSNAP
-  /* Use mate sensedir, to be consistent with samprint method */
-  if (sensedir == SENSE_NULL) {
-    sensedir = mate_sensedir;
-  }
-#endif
-
-  if (sensedir == SENSE_FORWARD) {
-    if (watsonp == true) {
-      FPRINTF(fp,"\tXS:A:+");
-    } else {
-      FPRINTF(fp,"\tXS:A:-");
-    }
-
-  } else if (sensedir == SENSE_ANTI) {
-    if (watsonp == true) {
-      FPRINTF(fp,"\tXS:A:-");
-    } else {
-      FPRINTF(fp,"\tXS:A:+");
-    }
-
-  } else if (intronp == false) {
-    /* Skip.  No intron in this end and mate is not revealing. */
 
 #if 0
-  } else if (force_xs_direction_p == true) {
-    /* Don't print XS field for SENSE_NULL */
-    /* Could not determine sense, so just report arbitrarily as + */
-    /* This option provided for users of Cufflinks, which cannot handle XS:A:? */
-    FPRINTF(fp,"\tXS:A:+");
-    
-  } else {
-    /* Non-canonical.  Don't report. */
-    FPRINTF(fp,"\tXS:A:?");
-#endif
-  }
-
-  /* 12. TAGS: XT */
-  if (chimera != NULL) {
-    FPRINTF(fp,"\tXT:Z:");
-    Chimera_print_sam_tag(fp,chimera,chromosome_iit);
-  }
-
-  /* 12. TAGS: XG */
-  if (merged_overlap_p) {
-    FPRINTF(fp,"\tXG:Z:O");
-  } else if (sarrayp == true) {
-    FPRINTF(fp,"\tXG:Z:B");
+static int
+sensedir_from_cdna_direction (int cdna_direction) {
+  if (cdna_direction > 0) {
+    return SENSE_FORWARD;
+  } else if (cdna_direction < 0) {
+    return SENSE_ANTI;
   } else {
-    FPRINTF(fp,"\tXG:Z:M");
+    return SENSE_NULL;
   }
-
-#if 0
-  /* 12. TAGS: XE (BLAST E-value) */
-  FPRINTF(fp,"\tXE:f:%.2g",min_evalue);
-#endif
-
-  FPRINTF(fp,"\n");
-
-  return;
 }
+#endif
 
 
 void
@@ -5325,8 +5386,8 @@ Pair_unalias_circular (struct T *pairs, int npairs, Chrpos_T chrlength) {
 }
 
 
-static struct T *
-hardclip_pairs (int *clipped_npairs, int hardclip_start, int hardclip_end,
+struct T *
+Pair_hardclip (int *clipped_npairs, int hardclip_start, int hardclip_end,
 		struct T *pairs, int npairs, int querylength) {
   struct T *clipped_pairs, *ptr;
   int i, starti;
@@ -5415,10 +5476,10 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
       if (last_type == ' ') {
 	/* Skip */
       } else if (duplicatep == false) {
-	unique = List_push(unique,(void *) last_token);
+	unique = List_push_out(unique,(void *) last_token);
       } else {
 	length += atoi(last_token);
-	FREE(last_token);
+	FREE_OUT(last_token);
 	sprintf(token,"%d%c",length,last_type);
 	unique = push_token(unique,token);
       }
@@ -5431,14 +5492,14 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
   if (last_type == ' ') {
     /* Skip */
   } else if (duplicatep == false) {
-    unique = List_push(unique,(void *) last_token);
+    unique = List_push_out(unique,(void *) last_token);
   } else {
     length += atoi(last_token);
     FREE_OUT(last_token);
     sprintf(token,"%d%c",length,last_type);
     unique = push_token(unique,token);
   }
-  List_free(&tokens);
+  List_free_out(&tokens);
 
 
   if (sam_insert_0M_p == false) {
@@ -5463,10 +5524,10 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
       } else if (last_type == 'D' && type == 'I') {
 	clean = push_token(clean,"0M");
       }
-      clean = List_push(clean,(void *) curr_token);
+      clean = List_push_out(clean,(void *) curr_token);
       last_type = type;
     }
-    List_free(&unique);
+    List_free_out(&unique);
 
     /* Return result */
     if (watsonp) {
@@ -5480,6 +5541,43 @@ Pair_clean_cigar (List_T tokens, bool watsonp) {
 }
 
 
+/* Derived from print_tokens_gff3 */
+int
+Pair_cigar_length (List_T tokens) {
+  int length = 0, tokenlength;
+  List_T p;
+  char *token;
+  char type;
+  
+  for (p = tokens; p != NULL; p = List_next(p)) {
+    token = (char *) List_head(p);
+    type = token[strlen(token)-1];
+    /* Should include 'H', but that gets added according to hardclip_low and hardclip_high */
+    if (type == 'S' || type == 'I' || type == 'M' || type == 'X' || type == '=') {
+      sscanf(token,"%d",&tokenlength);
+      length += tokenlength;
+    }
+  }
+
+  return length;
+}
+
+/* Derived from print_tokens_gff3 */
+void
+Pair_print_tokens (Filestring_T fp, List_T tokens) {
+  List_T p;
+  char *token;
+  
+  for (p = tokens; p != NULL; p = List_next(p)) {
+    token = (char *) List_head(p);
+    FPRINTF(fp,"%s",token);
+    /* FREE_OUT(token); -- Now freed within Stage3end_free or Stage3_free */
+  }
+
+  return;
+}
+
+
 static List_T
 compute_cigar_standard (bool *intronp, int *hardclip_start, int *hardclip_end, struct T *pairs, int npairs, int querylength_given,
 			bool watsonp, int sensedir, int chimera_part) {
@@ -5533,7 +5631,7 @@ compute_cigar_standard (bool *intronp, int *hardclip_start, int *hardclip_end, s
     this = ptr++;
 
 #if 0
-    /* print_tokens_sam(stdout,tokens); */
+    /* Cigar_print_tokens(stdout,tokens); */
     Pair_dump_one(this,true);
     printf("\n");
 #endif
@@ -5809,7 +5907,7 @@ compute_cigar_extended (bool *intronp, int *hardclip_start, int *hardclip_end, s
     this = ptr++;
 
 #if 0
-    /* print_tokens_sam(stdout,tokens); */
+    /* Cigar_print_tokens(stdout,tokens); */
     Pair_dump_one(this,true);
     printf("\n");
 #endif
@@ -6090,62 +6188,463 @@ Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struc
 }
 
 
-#if 0
-/* Copied from samprint.c */
-static bool
-check_cigar_types (Intlist_T cigar_types) {
-  Intlist_T p;
-  int type;
-  /* int last_type = 'M'; */
-  bool M_present_p = false;
+/* Derived from print_gff3_cdna_match */
+/* Assumes pairarray has been hard clipped already */
+static void
+print_sam_line (Filestring_T fp, char *abbrev, bool first_read_p, char *acc1, char *acc2, char *chrstring,
+		bool watsonp, int sensedir, List_T cigar_tokens, List_T md_tokens,
+		int nmismatches_refdiff, int nmismatches_bothdiff, int nindels,
+		bool intronp, char *queryseq_ptr, char *quality_string,
+		int hardclip_start, int hardclip_end, int mate_hardclip_low, int mate_hardclip_high,
+		int querylength, Chimera_T chimera, int quality_shift,
+		int pathnum, int npaths_primary, int npaths_altloc, int absmq_score, int second_absmq, unsigned int flag,
+		Univ_IIT_T chromosome_iit, Chrpos_T chrpos, Chrpos_T chrlength,
+#ifdef GSNAP
+		Shortread_T queryseq, Shortread_T queryseq_mate, Resulttype_T resulttype, int pair_mapq_score, int end_mapq_score,
+		Stage3end_T mate, char *mate_chrstring, Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+		Chrpos_T mate_chrlength, int mate_sensedir, int pairedlength, bool invertp,
+#else
+		int mapq_score,
+#endif
+		double min_evalue, char *sam_read_group_id, bool merged_overlap_p, bool sarrayp) {
 
-  for (p = cigar_types; p != NULL; p = Intlist_next(p)) {
-    type = Intlist_head(p);
-    if (type == 'M') {
-      M_present_p = true;
 #if 0
-    } else if (type == 'H' && last_type == 'S') {
-      debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
-      return false;
-    } else if (type == 'S' && last_type == 'H') {
-      debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
-      return false;
+  /* Should already be checked when Stage3_T or Stage3end_T object was created */
+  if (cigar_action == CIGAR_ACTION_IGNORE) {
+    /* Don't check */
+  } else if (Cigar_length(cigar_tokens) + hardclip_start + hardclip_end == querylength) {
+    /* Okay */
+  } else if (cigar_action == CIGAR_ACTION_WARNING) {
+    fprintf(stderr,"Warning: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
+	    acc1,Cigar_length(cigar_tokens),hardclip_start,hardclip_end,querylength);
+  } else if (cigar_action == CIGAR_ACTION_NOPRINT) {
+    fprintf(stderr,"Warning: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
+	    acc1,Cigar_length(cigar_tokens),hardclip_start,hardclip_end,querylength);
+    return;
+  } else {
+    /* CIGAR_ACTION_ABORT */
+    fprintf(stderr,"Error: for %s, CIGAR length %d plus hardclips %d and %d do not match sequence length %d\n",
+	    acc1,Cigar_length(cigar_tokens),hardclip_start,hardclip_end,querylength);
+    abort();
+  }
 #endif
-    }
+
+  /* 1. QNAME or Accession */
+  if (acc2 == NULL) {
+    FPRINTF(fp,"%s\t",acc1);
+  } else {
+    FPRINTF(fp,"%s,%s\t",acc1,acc2);
   }
 
-  return M_present_p;
-}
+  /* 2. Flags */
+  FPRINTF(fp,"%u\t",flag);
+
+  /* 3. RNAME or Chrstring */
+  /* 4. POS or Chrlow */
+  /* Taken from GMAP part of SAM_chromosomal_pos */
+  if (chrpos > chrlength) {
+    FPRINTF(fp,"%s\t%u\t",chrstring,chrpos - chrlength /*+ 1U*/);
+  } else {
+    FPRINTF(fp,"%s\t%u\t",chrstring,chrpos /*+ 1U*/);
+  }
+
+  /* 5. MAPQ or Mapping quality */
+#ifdef GSNAP
+  FPRINTF(fp,"%d\t",pair_mapq_score);
+#else
+  FPRINTF(fp,"%d\t",mapq_score);
 #endif
 
+  /* 6. CIGAR */
+  Pair_print_tokens(fp,cigar_tokens);
 
-#if 0
-bool
-Pair_check_cigar (struct T *pairs, int npairs, int querylength_given,
-		  int clipdir, int hardclip5, int hardclip3,
-		  bool watsonp, bool first_read_p, bool circularp) {
-  bool result;
-  Intlist_T cigar_types = NULL;
-  int hardclip_low, hardclip_high;
-  int Mlength = 0, Ilength = 0, Dlength = 0;
-  bool in_exon = false, deletionp;
-  struct T *ptr, *prev, *this = NULL;
-  int exon_queryend;
-  int query_gap;
-  int last_querypos = -1;
-  int i;
+  /* 7. MRNM: Mate chr */
+  /* 8. MPOS: Mate chrpos */
+#ifdef GSNAP
+  if (mate_chrpos_low == 0U) {
+    FPRINTF(fp,"\t*\t0");
+  } else if (mate_chrpos_low > mate_chrlength) {
+    FPRINTF(fp,"\t%s\t%u",mate_chrstring,mate_chrpos_low - mate_chrlength /* +1U*/);
+  } else {
+    FPRINTF(fp,"\t%s\t%u",mate_chrstring,mate_chrpos_low /* +1U*/);
+  }
+#else
+  FPRINTF(fp,"\t*\t0");
+#endif
 
-  if (circularp == true) {
-    if (watsonp == true) {
-      hardclip_low = hardclip5;
-      hardclip_high = hardclip3;
+  /* 9. ISIZE: Insert size */
+#ifdef GSNAP
+  if (resulttype == CONCORDANT_UNIQ || resulttype == CONCORDANT_TRANSLOC || resulttype == CONCORDANT_MULT) {
+    if (watsonp == invertp) {
+      FPRINTF(fp,"\t%d",-pairedlength);
     } else {
-      hardclip_low = hardclip3;
-      hardclip_high = hardclip5;
+      FPRINTF(fp,"\t%d",pairedlength);
     }
+  } else if (mate_chrpos_low == 0) {
+    FPRINTF(fp,"\t%d",pairedlength);
+  } else if (chrpos < mate_chrpos_low) {
+    FPRINTF(fp,"\t%d",pairedlength);
+  } else if (chrpos > mate_chrpos_low) {
+    FPRINTF(fp,"\t%d",-pairedlength);
+  } else if (first_read_p == true) {
+    FPRINTF(fp,"\t%d",pairedlength);
   } else {
-    /* Incoming hardclip5 and hardclip3 are due to overlaps, not chimera */
-    if (clipdir >= 0) {
+    FPRINTF(fp,"\t%d",-pairedlength);
+  }
+#else
+  FPRINTF(fp,"\t0");
+#endif
+
+  /* 10. SEQ: queryseq and 11. QUAL: quality_scores */
+  FPRINTF(fp,"\t");
+  if (watsonp == true) {
+    print_chopped(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
+    FPRINTF(fp,"\t");
+    print_quality(fp,quality_string,querylength,hardclip_start,hardclip_end,
+		  quality_shift);
+  } else {
+    print_chopped_revcomp(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
+    FPRINTF(fp,"\t");
+    print_quality_revcomp(fp,quality_string,querylength,hardclip_start,hardclip_end,
+			  quality_shift);
+  }
+
+#ifdef GSNAP
+  /* 12. TAGS: XM */
+  if (queryseq_mate == NULL) {
+    /* Unpaired alignment.  Don't print XM. */
+  } else {
+    FPRINTF(fp,"\tXM:Z:");
+    Cigar_print_mate(fp,mate,Shortread_fulllength(queryseq_mate),mate_hardclip_low,mate_hardclip_high);
+  }
+#endif
+
+  /* 12. TAGS: RG */
+  if (sam_read_group_id != NULL) {
+    FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
+  }
+
+  /* 12. TAGS: XH and XI */
+  if (hardclip_start > 0 || hardclip_end > 0) {
+    FPRINTF(fp,"\tXH:Z:");
+    if (watsonp == true) {
+      print_chopped_end(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
+    } else {
+      print_chopped_end_revcomp(fp,queryseq_ptr,querylength,hardclip_start,hardclip_end);
+    }
+
+    if (quality_string != NULL) {
+      FPRINTF(fp,"\tXI:Z:");
+      if (watsonp == true) {
+	print_chopped_end_quality(fp,quality_string,querylength,hardclip_start,hardclip_end);
+      } else {
+	print_chopped_end_quality_reverse(fp,quality_string,querylength,hardclip_start,hardclip_end);
+      }
+    }
+  }
+
+#ifdef GSNAP
+  if (queryseq != NULL) {
+    /* 12. TAGS: XB */
+    Shortread_print_barcode(fp,queryseq);
+
+    /* 12. TAGS: XP.  Logically should be last in reconstructing a read. */
+    Shortread_print_chop(fp,queryseq,invertp);
+  }
+#endif
+
+  /* 12. TAGS: MD string */
+  FPRINTF(fp,"\tMD:Z:");
+  Pair_print_tokens(fp,md_tokens);
+
+  /* 12. TAGS: NH */
+  FPRINTF(fp,"\tNH:i:%d",npaths_primary + npaths_altloc);
+  
+  /* 12. TAGS: HI */
+  FPRINTF(fp,"\tHI:i:%d",pathnum);
+
+  /* 12. TAGS: NM */
+  FPRINTF(fp,"\tNM:i:%d",nmismatches_refdiff + nindels);
+
+  if (snps_p) {
+    /* 12. TAGS: XW and XV */
+    FPRINTF(fp,"\tXW:i:%d",nmismatches_bothdiff);
+    FPRINTF(fp,"\tXV:i:%d",nmismatches_refdiff - nmismatches_bothdiff);
+  }
+
+
+  /* 12. TAGS: SM */
+#ifdef GSNAP
+  FPRINTF(fp,"\tSM:i:%d",end_mapq_score);
+#else
+  FPRINTF(fp,"\tSM:i:%d",40);
+#endif
+
+  /* 12. TAGS: XQ */
+  FPRINTF(fp,"\tXQ:i:%d",absmq_score);
+
+  /* 12. TAGS: X2 */
+  FPRINTF(fp,"\tX2:i:%d",second_absmq);
+
+  /* 12. TAGS: XO */
+  FPRINTF(fp,"\tXO:Z:%s",abbrev);
+
+  /* 12. TAGS: XS */
+#ifdef GSNAP
+  /* Use mate sensedir, to be consistent with samprint method */
+  if (sensedir == SENSE_NULL) {
+    sensedir = mate_sensedir;
+  }
+#endif
+
+  if (sensedir == SENSE_FORWARD) {
+    if (watsonp == true) {
+      FPRINTF(fp,"\tXS:A:+");
+    } else {
+      FPRINTF(fp,"\tXS:A:-");
+    }
+
+  } else if (sensedir == SENSE_ANTI) {
+    if (watsonp == true) {
+      FPRINTF(fp,"\tXS:A:-");
+    } else {
+      FPRINTF(fp,"\tXS:A:+");
+    }
+
+  } else if (intronp == false) {
+    /* Skip.  No intron in this end and mate is not revealing. */
+
+#if 0
+  } else if (force_xs_direction_p == true) {
+    /* Don't print XS field for SENSE_NULL */
+    /* Could not determine sense, so just report arbitrarily as + */
+    /* This option provided for users of Cufflinks, which cannot handle XS:A:? */
+    FPRINTF(fp,"\tXS:A:+");
+    
+  } else {
+    /* Non-canonical.  Don't report. */
+    FPRINTF(fp,"\tXS:A:?");
+#endif
+  }
+
+  /* 12. TAGS: XT */
+  if (chimera != NULL) {
+    FPRINTF(fp,"\tXT:Z:");
+    Chimera_print_sam_tag(fp,chimera,chromosome_iit);
+  }
+
+  /* 12. TAGS: XG */
+  if (merged_overlap_p) {
+    FPRINTF(fp,"\tXG:Z:O");
+  } else if (sarrayp == true) {
+    FPRINTF(fp,"\tXG:Z:B");
+  } else {
+    FPRINTF(fp,"\tXG:Z:M");
+  }
+
+#if 0
+  /* 12. TAGS: XE (BLAST E-value) */
+  FPRINTF(fp,"\tXE:f:%.2g",min_evalue);
+#endif
+
+  FPRINTF(fp,"\n");
+
+  return;
+}
+
+
+void
+Pair_print_sam (Filestring_T fp, char *abbrev, struct Pair_T *pairarray, int npairs, List_T cigar_tokens, bool intronp,
+		char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
+		char *queryseq_ptr, char *quality_string,
+		int hardclip_low, int hardclip_high, int mate_hardclip_low, int mate_hardclip_high, int querylength_given,
+		bool watsonp, int sensedir, int chimera_part, Chimera_T chimera,
+		int quality_shift, bool first_read_p, int pathnum, int npaths_primary, int npaths_altloc,
+		int absmq_score, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength,
+#ifdef GSNAP
+		Shortread_T queryseq, Shortread_T queryseq_mate, Resulttype_T resulttype, unsigned int flag,
+		int pair_mapq_score, int end_mapq_score,
+		Stage3end_T mate, Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+		Chrpos_T mate_chrlength, int mate_sensedir, int pairedlength,
+#else
+		int mapq_score, bool sam_paired_p,
+#endif
+		char *sam_read_group_id, bool invertp, bool merged_overlap_p, bool sarrayp) {
+  char *chrstring = NULL;
+#ifdef GSNAP
+  char *mate_chrstring, *mate_chrstring_alloc = NULL;
+#else
+  unsigned int flag;
+#endif
+
+  List_T md_tokens = NULL;
+  int nmismatches_refdiff, nmismatches_bothdiff, nindels;
+  bool ignore_intronp;
+  int hardclip_start, hardclip_end;
+  /* int hardclip_start_zero = 0, hardclip_end_zero = 0; */
+  struct Pair_T *clipped_pairarray;
+  int clipped_npairs;
+  bool cigar_tokens_alloc;
+  double min_evalue, max_bitscore;
+
+
+  if (chrnum == 0) {
+    chrstring = Sequence_accession(usersegment);
+  } else {
+    chrstring = Chrnum_to_string(chrnum,chromosome_iit);
+  }
+
+#ifdef GSNAP
+  if (mate_chrpos_low == 0U) {
+    mate_chrstring = "*";
+  } else if (mate_chrnum == 0) {
+    abort();
+  } else if (/* chrpos > 0U && chrnum > 0 && */ mate_chrnum == chrnum) {
+    mate_chrstring = "=";
+  } else {
+    mate_chrstring = mate_chrstring_alloc = Chrnum_to_string(mate_chrnum,chromosome_iit);
+  }
+#else
+  flag = compute_sam_flag_nomate(npaths_primary + npaths_altloc,first_read_p,watsonp,sam_paired_p);
+#endif
+
+  debug4(printf("Entered SAM_print_pairs with watsonp %d, first_read_p %d, hardclip_low %d, and hardclip_high %d\n",
+		watsonp,first_read_p,hardclip_low,hardclip_high));
+
+  if (watsonp == true) {
+    hardclip_start = hardclip_low;
+    hardclip_end = hardclip_high;
+  } else {
+    hardclip_start = hardclip_high;
+    hardclip_end = hardclip_low;
+  }
+  debug4(printf("hardclip_start %d, hardclip_end %d\n",hardclip_start,hardclip_end));
+
+
+  if (merged_overlap_p == true) {
+    /* clipped_pairarray = pairarray; */
+    /* clipped_npairs = npairs; */
+    clipped_pairarray = Pair_hardclip(&clipped_npairs,hardclip_start,hardclip_end,
+				      pairarray,npairs,querylength_given);
+    cigar_tokens = Pair_compute_cigar(&intronp,&hardclip_start,&hardclip_end,clipped_pairarray,clipped_npairs,querylength_given,
+				      watsonp,sensedir,chimera_part);
+    cigar_tokens_alloc = true;
+
+#if 0
+  } else if (hardclip_start == 0 && hardclip_end == 0) {
+    /* Fails for both GSNAP clip-overlap and GMAP chimera */
+    clipped_pairarray = pairarray;
+    clipped_npairs = npairs;
+#endif
+
+  } else {
+    clipped_pairarray = Pair_hardclip(&clipped_npairs,hardclip_start,hardclip_end,
+				      pairarray,npairs,querylength_given);
+    cigar_tokens = Pair_compute_cigar(&ignore_intronp,&hardclip_start,&hardclip_end,clipped_pairarray,clipped_npairs,querylength_given,
+				      watsonp,sensedir,chimera_part);
+    cigar_tokens_alloc = true;
+  }
+
+  /* Cigar updates hardclip5 and hardclip3 for chimeras */
+  md_tokens = Pair_compute_md_string(&nmismatches_refdiff,&nmismatches_bothdiff,&nindels,
+				     clipped_pairarray,clipped_npairs,watsonp,cigar_tokens);
+
+#if 0
+  min_evalue = Pair_min_evalue(clipped_pairarray,clipped_npairs);
+#endif
+
+  print_sam_line(fp,abbrev,first_read_p,acc1,acc2,chrstring,
+		 watsonp,sensedir,cigar_tokens,md_tokens,
+		 nmismatches_refdiff,nmismatches_bothdiff,nindels,
+		 intronp,queryseq_ptr,quality_string,hardclip_start,hardclip_end,
+		 mate_hardclip_low,mate_hardclip_high,
+		 querylength_given,chimera,quality_shift,pathnum,npaths_primary,npaths_altloc,
+		 absmq_score,second_absmq,flag,chromosome_iit,chrpos,chrlength,
+#ifdef GSNAP
+		 queryseq,queryseq_mate,resulttype,pair_mapq_score,end_mapq_score,
+		 mate,mate_chrstring,mate_chrnum,mate_chrpos_low,mate_chrlength,mate_sensedir,
+		 pairedlength,invertp,
+#else
+		 mapq_score,
+#endif
+		 min_evalue,sam_read_group_id,merged_overlap_p,sarrayp);
+
+  /* Print procedures free the character strings */
+  Pair_tokens_free(&md_tokens);
+  if (cigar_tokens_alloc == true) {
+    Pair_tokens_free(&cigar_tokens);
+  }
+
+#ifdef GSNAP
+  if (mate_chrstring_alloc != NULL) {
+    FREE(mate_chrstring_alloc);
+  }
+#endif
+  if (chrnum != 0) {
+    FREE(chrstring);
+  }
+
+  return;
+}
+
+
+
+#if 0
+/* Copied from samprint.c */
+static bool
+check_cigar_types (Intlist_T cigar_types) {
+  Intlist_T p;
+  int type;
+  /* int last_type = 'M'; */
+  bool M_present_p = false;
+
+  for (p = cigar_types; p != NULL; p = Intlist_next(p)) {
+    type = Intlist_head(p);
+    if (type == 'M') {
+      M_present_p = true;
+#if 0
+    } else if (type == 'H' && last_type == 'S') {
+      debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
+      return false;
+    } else if (type == 'S' && last_type == 'H') {
+      debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
+      return false;
+#endif
+    }
+  }
+
+  return M_present_p;
+}
+#endif
+
+
+#if 0
+bool
+Pair_check_cigar (struct T *pairs, int npairs, int querylength_given,
+		  int clipdir, int hardclip5, int hardclip3,
+		  bool watsonp, bool first_read_p, bool circularp) {
+  bool result;
+  Intlist_T cigar_types = NULL;
+  int hardclip_low, hardclip_high;
+  int Mlength = 0, Ilength = 0, Dlength = 0;
+  bool in_exon = false, deletionp;
+  struct T *ptr, *prev, *this = NULL;
+  int exon_queryend;
+  int query_gap;
+  int last_querypos = -1;
+  int i;
+
+  if (circularp == true) {
+    if (watsonp == true) {
+      hardclip_low = hardclip5;
+      hardclip_high = hardclip3;
+    } else {
+      hardclip_low = hardclip3;
+      hardclip_high = hardclip5;
+    }
+  } else {
+    /* Incoming hardclip5 and hardclip3 are due to overlaps, not chimera */
+    if (clipdir >= 0) {
       if (watsonp == true) {
 	if (first_read_p == true) {
 	  hardclip_high = hardclip5;
@@ -6617,9 +7116,9 @@ compute_md_string_old (int *nmismatches, struct T *pairs, int npairs, bool watso
 #endif
 
 
-static List_T
-compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nindels,
-		   struct T *pairs, int npairs, bool watsonp, List_T cigar_tokens) {
+List_T
+Pair_compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nindels,
+			struct T *pairs, int npairs, bool watsonp, List_T cigar_tokens) {
   List_T md_tokens = NULL, p;
   char *cigar_token, token[11], *first_token, type;
   Pair_T this;
@@ -6783,274 +7282,73 @@ compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nin
 	  md_tokens = push_token(md_tokens,"^");
 	}
 
-	while (k < npairs && pairs[k].comp == INDEL_COMP && pairs[k].genome == ' ') {
-	  *nindels += 1;
-	  k++;
-	}
-	state = IN_MATCHES;
-
-      } else if (type == 'N') {
-#if 0
-	/* Ignore deletion adjacent to intron, to avoid double ^^ */
-	if (state == IN_DELETION) {
-	  md_tokens = push_token(md_tokens,"^");
-	}
-#endif
-
-	while (k < npairs && pairs[k].gapp == true) {
-	  k++;
-	}
-
-      } else if (type == 'D') {
-	if (state == IN_MATCHES) {
-	  if (nmatches > 0) {
-	    sprintf(token,"%d",nmatches);
-	    md_tokens = push_token(md_tokens,token);
-	    nmatches = 0;
-	  }
-	} else if (state == IN_MISMATCHES) {
-	  md_tokens = push_token(md_tokens,"0");
-	}
-
-	for (i = 0; i < length; i++, k++) {
-	  this = &(pairs[k]);
-	  sprintf(token,"%c",complCode[(int) this->genome]);
-	  md_tokens = push_token(md_tokens,token);
-	  *nindels += 1;
-	}
-	state = IN_DELETION;
-
-      } else {
-	fprintf(stderr,"Don't recognize type %c\n",type);
-	abort();
-      }
-    }
-
-    if (nmatches > 0) {
-      sprintf(token,"%d",nmatches);
-      md_tokens = push_token(md_tokens,token);
-    }
-
-    /* Restore cigar_tokens */
-    cigar_tokens = List_reverse(cigar_tokens);
-  }
-
-  assert(k == npairs);
-
-  /* Insert initial 0 token if necessary */
-  if (md_tokens != NULL) {
-    first_token = (char *) List_head(md_tokens);
-    if (!isdigit(first_token[0])) {
-      md_tokens = push_token(md_tokens,"0");
-    }
-  }
-
-  return md_tokens;
-}
-
-
-
-
-void
-Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairarray, int npairs, List_T cigar_tokens, bool intronp,
-		char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
-		char *queryseq_ptr, char *quality_string,
-		int hardclip_low, int hardclip_high, int querylength_given,
-		bool watsonp, int sensedir, int chimera_part, Chimera_T chimera,
-		int quality_shift, bool first_read_p, int pathnum, int npaths_primary, int npaths_altloc,
-		int absmq_score, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength,
-#ifdef GSNAP
-		Shortread_T queryseq, Resulttype_T resulttype, unsigned int flag,
-		int pair_mapq_score, int end_mapq_score,
-		Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
-		Chrpos_T mate_chrpos, Chrpos_T mate_chrlength,
-		int mate_sensedir, int pairedlength,
-#else
-		int mapq_score, bool sam_paired_p,
-#endif
-		char *sam_read_group_id, bool invertp, bool merged_overlap_p, bool sarrayp) {
-  char *chrstring = NULL;
-#ifdef GSNAP
-  char *mate_chrstring, *mate_chrstring_alloc = NULL;
-#else
-  unsigned int flag;
-#endif
-
-  List_T md_tokens = NULL;
-  int nmismatches_refdiff, nmismatches_bothdiff, nindels;
-  bool ignore_intronp;
-  int hardclip_start, hardclip_end;
-  /* int hardclip_start_zero = 0, hardclip_end_zero = 0; */
-  struct T *clipped_pairarray;
-  int clipped_npairs;
-  bool cigar_tokens_alloc;
-  double min_evalue, max_bitscore;
-
-
-  if (chrnum == 0) {
-    chrstring = Sequence_accession(usersegment);
-  } else {
-    chrstring = Chrnum_to_string(chrnum,chromosome_iit);
-  }
-
-#ifdef GSNAP
-  if (mate_chrpos == 0U) {
-    mate_chrstring = "*";
-  } else if (mate_chrnum == 0) {
-    if (/* chrpos > 0U && chrnum > 0 && */ mate_effective_chrnum == chrnum) {
-      mate_chrstring = "=";
-    } else {
-      mate_chrstring = mate_chrstring_alloc = Chrnum_to_string(mate_effective_chrnum,chromosome_iit);
-    }
-  } else {
-    if (/* chrpos > 0U && chrnum > 0 && */ mate_chrnum == chrnum) {
-      mate_chrstring = "=";
-    } else {
-      mate_chrstring = mate_chrstring_alloc = Chrnum_to_string(mate_chrnum,chromosome_iit);
-    }
-  }
-#else
-  flag = compute_sam_flag_nomate(npaths_primary + npaths_altloc,first_read_p,watsonp,sam_paired_p);
-#endif
-
-  debug4(printf("Entered Pair_print_sam with watsonp %d, first_read_p %d, hardclip_low %d, and hardclip_high %d\n",
-		watsonp,first_read_p,hardclip_low,hardclip_high));
-
-  if (watsonp == true) {
-    hardclip_start = hardclip_low;
-    hardclip_end = hardclip_high;
-  } else {
-    hardclip_start = hardclip_high;
-    hardclip_end = hardclip_low;
-  }
-  debug4(printf("hardclip_start %d, hardclip_end %d\n",hardclip_start,hardclip_end));
-
-
-  if (merged_overlap_p == true) {
-    /* clipped_pairarray = pairarray; */
-    /* clipped_npairs = npairs; */
-    clipped_pairarray = hardclip_pairs(&clipped_npairs,hardclip_start,hardclip_end,
-				       pairarray,npairs,querylength_given);
-    cigar_tokens = Pair_compute_cigar(&intronp,&hardclip_start,&hardclip_end,clipped_pairarray,clipped_npairs,querylength_given,
-				      watsonp,sensedir,chimera_part);
-    cigar_tokens_alloc = true;
-
-#if 0
-  } else if (hardclip_start == 0 && hardclip_end == 0) {
-    /* Fails for both GSNAP clip-overlap and GMAP chimera */
-    clipped_pairarray = pairarray;
-    clipped_npairs = npairs;
-#endif
-
-  } else {
-    clipped_pairarray = hardclip_pairs(&clipped_npairs,hardclip_start,hardclip_end,
-				       pairarray,npairs,querylength_given);
-    cigar_tokens = Pair_compute_cigar(&ignore_intronp,&hardclip_start,&hardclip_end,clipped_pairarray,clipped_npairs,querylength_given,
-				      watsonp,sensedir,chimera_part);
-    cigar_tokens_alloc = true;
-  }
-
-  /* Cigar updates hardclip5 and hardclip3 for chimeras */
-  md_tokens = compute_md_string(&nmismatches_refdiff,&nmismatches_bothdiff,&nindels,
-				clipped_pairarray,clipped_npairs,watsonp,cigar_tokens);
-
-#if 0
-  min_evalue = Pair_min_evalue(clipped_pairarray,clipped_npairs);
-#endif
-
-  print_sam_line(fp,abbrev,first_read_p,acc1,acc2,chrstring,
-		 watsonp,sensedir,cigar_tokens,md_tokens,
-		 nmismatches_refdiff,nmismatches_bothdiff,nindels,
-		 intronp,queryseq_ptr,quality_string,hardclip_start,hardclip_end,
-		 querylength_given,chimera,quality_shift,pathnum,npaths_primary,npaths_altloc,
-		 absmq_score,second_absmq,flag,chromosome_iit,chrpos,chrlength,
-#ifdef GSNAP
-		 queryseq,resulttype,pair_mapq_score,end_mapq_score,mate_chrstring,
-		 mate_chrpos,mate_chrlength,mate_sensedir,pairedlength,invertp,
-#else
-		 mapq_score,
-#endif
-		 min_evalue,sam_read_group_id,merged_overlap_p,sarrayp);
-
-  /* Print procedures free the character strings */
-  Pair_tokens_free(&md_tokens);
-  if (cigar_tokens_alloc == true) {
-    Pair_tokens_free(&cigar_tokens);
-  }
-
-#ifdef GSNAP
-  if (mate_chrstring_alloc != NULL) {
-    FREE(mate_chrstring_alloc);
-  }
-#endif
-  if (chrnum != 0) {
-    FREE(chrstring);
-  }
-
-  return;
-}
-
-
-void
-Pair_print_sam_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
-			  char *quality_string, int querylength, int quality_shift,
-			  bool first_read_p, bool sam_paired_p, char *sam_read_group_id) {
-  unsigned int flag;
+	while (k < npairs && pairs[k].comp == INDEL_COMP && pairs[k].genome == ' ') {
+	  *nindels += 1;
+	  k++;
+	}
+	state = IN_MATCHES;
 
-#ifdef GSNAP
-  fprintf(stderr,"Unexpected call to Pair_print_sam_nomapping in GSNAP\n");
-  abort();
+      } else if (type == 'N') {
+#if 0
+	/* Ignore deletion adjacent to intron, to avoid double ^^ */
+	if (state == IN_DELETION) {
+	  md_tokens = push_token(md_tokens,"^");
+	}
 #endif
 
-  /* 1. QNAME */
-  if (acc2 == NULL) {
-    FPRINTF(fp,"%s",acc1);
-  } else {
-    FPRINTF(fp,"%s,%s",acc1,acc2);
-  }
-  
-  /* 2. FLAG */
-  flag = compute_sam_flag_nomate(/*npaths*/0,first_read_p,/*watsonp*/true,sam_paired_p);
-  FPRINTF(fp,"\t%u",flag);
+	while (k < npairs && pairs[k].gapp == true) {
+	  k++;
+	}
 
-  /* 3. RNAME: chr */
-  FPRINTF(fp,"\t*");
+      } else if (type == 'D') {
+	if (state == IN_MATCHES) {
+	  if (nmatches > 0) {
+	    sprintf(token,"%d",nmatches);
+	    md_tokens = push_token(md_tokens,token);
+	    nmatches = 0;
+	  }
+	} else if (state == IN_MISMATCHES) {
+	  md_tokens = push_token(md_tokens,"0");
+	}
 
-  /* 4. POS: chrpos */
-  FPRINTF(fp,"\t0");
+	for (i = 0; i < length; i++, k++) {
+	  this = &(pairs[k]);
+	  sprintf(token,"%c",complCode[(int) this->genome]);
+	  md_tokens = push_token(md_tokens,token);
+	  *nindels += 1;
+	}
+	state = IN_DELETION;
 
-  /* 5. MAPQ: Mapping quality */
-  /* Picard says MAPQ should be 0 for an unmapped read */
-  FPRINTF(fp,"\t0");
+      } else {
+	fprintf(stderr,"Don't recognize type %c\n",type);
+	abort();
+      }
+    }
 
-  /* 6. CIGAR */
-  FPRINTF(fp,"\t*");
+    if (nmatches > 0) {
+      sprintf(token,"%d",nmatches);
+      md_tokens = push_token(md_tokens,token);
+    }
 
-  /* 7. MRNM: Mate chr */
-  /* 8. MPOS: Mate chrpos */
-  /* 9. ISIZE: Insert size */
-  FPRINTF(fp,"\t*\t0\t0\t");
+    /* Restore cigar_tokens */
+    cigar_tokens = List_reverse(cigar_tokens);
+  }
 
-  /* 10. SEQ: queryseq and 11. QUAL: quality scores */
-  print_chopped(fp,queryseq_ptr,querylength,/*hardclip_start*/0,/*hardclip_end*/0);
-  FPRINTF(fp,"\t");
-  print_quality(fp,quality_string,querylength,/*hardclip_start*/0,/*hardclip_end*/0,
-		quality_shift);
+  assert(k == npairs);
 
-  /* 12. TAGS: RG */
-  if (sam_read_group_id != NULL) {
-    FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
+  /* Insert initial 0 token if necessary */
+  if (md_tokens != NULL) {
+    first_token = (char *) List_head(md_tokens);
+    if (!isdigit(first_token[0])) {
+      md_tokens = push_token(md_tokens,"0");
+    }
   }
-  
-  /* 12. TAGS: XO */
-  FPRINTF(fp,"\tXO:Z:%s",abbrev);
 
-  FPRINTF(fp,"\n");
-
-  return;
+  return md_tokens;
 }
 
 
+
 Uintlist_T
 Pair_exonbounds (struct T *pairs, int npairs, Univcoord_T chroffset) {
   Uintlist_T exonbounds = NULL;
@@ -8695,108 +8993,410 @@ Pair_print_protein_genomic (Filestring_T fp, struct T *ptr, int npairs, int wrap
 	  PUTC(this->aa_g,fp);
 	  xpos++;
 	}
-#endif
+#endif
+      }
+    }
+    PUTC('\n',fp);
+
+  }
+
+  return;
+}
+
+#ifdef PMAP
+void
+Pair_print_nucleotide_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength) {
+  struct T *this;
+  int xpos = 0, i;
+
+  for (i = 0; i < npairs; i++) {
+    this = ptr++;
+    if (this->cdna != ' ') {
+      if (xpos == wraplength) {
+	PUTC('\n',fp);
+	xpos = 0;
+      }
+      PUTC(this->cdna,fp);
+      xpos++;
+    }
+  }
+  PUTC('\n',fp);
+  return;
+}
+#else
+void
+Pair_print_protein_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp) {
+  struct T *this;
+  int xpos = 0, i;
+
+  if (forwardp == true) {
+    for (i = 0; i < npairs; i++) {
+      this = ptr++;
+      if (this->aa_e != ' ') {
+	if (xpos == wraplength) {
+	  PUTC('\n',fp);
+	  xpos = 0;
+	}
+	if (this->aa_e != '*') {
+	  PUTC(this->aa_e,fp);
+	  xpos++;
+	}
+      }
+    }
+    PUTC('\n',fp);
+
+  } else {
+    for (i = npairs-1; i >= 0; i--) {
+      this = ptr--;
+      if (this->aa_e != ' ') {
+	if (xpos == wraplength) {
+	  PUTC('\n',fp);
+	  xpos = 0;
+	}
+	if (this->aa_e != '*') {
+	  PUTC(this->aa_e,fp);
+	  xpos++;
+	}
+      }
+    }
+    PUTC('\n',fp);
+  }
+
+  return;
+}
+#endif
+
+
+#if 0
+void
+Pair_print_compressed_old (Filestring_T fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
+			   Sequence_T usersegment, int nexons, double fracidentity,
+			   struct T *pairs, int npairs, Chrnum_T chrnum,
+			   Univcoord_T chroffset, Univ_IIT_T chromosome_iit, int querylength_given,
+			   int skiplength, int trim_start, int trim_end, bool checksump,
+			   int chimerapos, int chimeraequivpos, double donor_prob, double acceptor_prob,
+			   int chimera_cdna_direction, char *strain, bool watsonp, int cdna_direction) {
+  Chrpos_T chrpos1, chrpos2;
+  Univcoord_T position1, position2;
+
+  bool in_exon = false;
+  List_T tokens = NULL;
+  struct T *ptr = pairs, *this = NULL;
+  int querypos1, querypos2;
+  int exon_querystart = -1, exon_queryend;
+  Chrpos_T exon_genomestart = -1, exon_genomeend, intron_start, intron_end;
+  int num = 0, den = 0, runlength = 0, i;
+  int print_dinucleotide_p;
+  char token[11], donor[3], acceptor[3], *chr;
+  double coverage;
+  /* double trimmed_coverage; */
+  int last_querypos = -1;
+  Chrpos_T last_genomepos = -1U;
+
+  donor[0] = donor[1] = donor[2] = '\0';
+  acceptor[0] = acceptor[1] = acceptor[2] = '\0';
+
+  querypos1 = start->querypos;
+  querypos2 = end->querypos;
+
+  FPRINTF(fp,">%s ",Sequence_accession(queryseq));
+  if (dbversion != NULL) {
+    FPRINTF(fp,"%s ",dbversion);
+  } else if (usersegment != NULL && Sequence_accession(usersegment) != NULL) {
+    FPRINTF(fp,"%s ",Sequence_accession(usersegment));
+  } else {
+    FPRINTF(fp,"user-provided ");
+  }
+#ifdef PMAP
+  FPRINTF(fp,"%d/%d %d %d",pathnum,npaths,(querylength_given+skiplength)*3,nexons);
+  coverage = (double) (querypos2 - querypos1 + 1)/(double) ((querylength_given+skiplength)*3);
+  FPRINTF(fp," %.1f",((double) rint(1000.0*coverage)));
+#else
+  coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given+skiplength);
+  if (end->querypos + 1 > trim_end) {
+    trim_end = end->querypos + 1;
+  }
+  if (start->querypos < trim_start) {
+    trim_start = start->querypos;
+  }
+  /*
+  trimmed_coverage = (double) (end->querypos - start->querypos + 1)/(double) (trim_end - trim_start + skiplength);
+  FPRINTF(fp,">%s %s %d/%d %d(%d) %d",
+	 Sequence_accession(queryseq),dbversion,pathnum,npaths,
+	 querylength_given+skiplength,trim_end-trim_start,nexons);
+  FPRINTF(fp," %.1f(%.1f)",((double) rint(1000.0*coverage))/10.0,((double) rint(1000.0*trimmed_coverage))/10.0);
+  */
+  FPRINTF(fp,"%d/%d %d %d",pathnum,npaths,querylength_given+skiplength,nexons);
+  FPRINTF(fp," %.1f",((double) rint(1000.0*coverage))/10.0);
+#endif
+  FPRINTF(fp," %.1f",((double) rint(1000.0*fracidentity))/10.0);
+
+  start = &(pairs[0]);
+  end = &(pairs[npairs-1]);
+  FPRINTF(fp," %d%s%d",start->querypos + ONEBASEDP,"..",end->querypos + ONEBASEDP);
+
+  chrpos1 = start->genomepos;
+  chrpos2 = end->genomepos;
+  position1 = chroffset + chrpos1;
+  position2 = chroffset + chrpos2;
+  FPRINTF(fp," %u%s%u",position1 + ONEBASEDP,"..",position2 + ONEBASEDP);
+
+  if (chrnum == 0) {
+    FPRINTF(fp," %u%s%u",chrpos1 + ONEBASEDP,"..",chrpos2 + ONEBASEDP);
+  } else {
+    chr = Chrnum_to_string(chrnum,chromosome_iit);
+    FPRINTF(fp," %s:%u%s%u",chr,chrpos1 + ONEBASEDP,"..",chrpos2 + ONEBASEDP);
+    FREE(chr);
+  }
+
+  if (chrpos1 <= chrpos2) {
+    FPRINTF(fp," +");
+  } else {
+    FPRINTF(fp," -");
+  }
+
+  if (cdna_direction > 0) {
+    FPRINTF(fp," dir:sense");
+  } else if (cdna_direction < 0) {
+    FPRINTF(fp," dir:antisense");
+  } else {
+    FPRINTF(fp," dir:indet");
+  }
+
+  if (checksump == true) {
+    FPRINTF(fp," md5:");
+    Sequence_print_digest(fp,queryseq);
+  }
+
+  if (chimerapos >= 0) {
+    if (chimeraequivpos == chimerapos) {
+      if (donor_prob > 0.0 && acceptor_prob > 0.0) {
+	if (chimera_cdna_direction >= 0) {
+	  FPRINTF(fp," chimera:%d(>)/%.3f/%.3f",chimerapos + ONEBASEDP,donor_prob,acceptor_prob);
+	} else {
+	  FPRINTF(fp," chimera:%d(<)/%.3f/%.3f",chimerapos + ONEBASEDP,donor_prob,acceptor_prob);
+	}
+      } else {
+	FPRINTF(fp," chimera:%d",chimerapos + ONEBASEDP);
       }
+    } else {
+      FPRINTF(fp," chimera:%d..%d",chimerapos + ONEBASEDP,chimeraequivpos + ONEBASEDP);
     }
-    PUTC('\n',fp);
-
   }
 
-  return;
-}
+  if (strain != NULL) {
+    FPRINTF(fp," strain:%s",strain);
+  }
 
-#ifdef PMAP
-void
-Pair_print_nucleotide_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength) {
-  struct T *this;
-  int xpos = 0, i;
+  PUTC('\n',fp);
 
   for (i = 0; i < npairs; i++) {
+    /* prev = this; */
     this = ptr++;
-    if (this->cdna != ' ') {
-      if (xpos == wraplength) {
-	PUTC('\n',fp);
-	xpos = 0;
-      }
-      PUTC(this->cdna,fp);
-      xpos++;
-    }
-  }
-  PUTC('\n',fp);
-  return;
-}
-#else
-void
-Pair_print_protein_cdna (Filestring_T fp, struct T *ptr, int npairs, int wraplength, bool forwardp) {
-  struct T *this;
-  int xpos = 0, i;
 
-  if (forwardp == true) {
-    for (i = 0; i < npairs; i++) {
-      this = ptr++;
-      if (this->aa_e != ' ') {
-	if (xpos == wraplength) {
-	  PUTC('\n',fp);
-	  xpos = 0;
+    if (this->gapp) {
+      if (in_exon == true) {
+	/* Beginning of gap */
+	exon_queryend = last_querypos + ONEBASEDP;
+	exon_genomeend = last_genomepos + ONEBASEDP;
+	if (watsonp) {
+	  intron_start = exon_genomeend + 1;
+	} else {
+	  intron_start = exon_genomeend - 1;
 	}
-	if (this->aa_e != '*') {
-	  PUTC(this->aa_e,fp);
-	  xpos++;
+
+	FPRINTF(fp,"\t%u %u",exon_genomestart,exon_genomeend);
+	FPRINTF(fp," %d %d",exon_querystart,exon_queryend);
+	if (den == 0) {
+	  FPRINTF(fp," 100");
+	} else {
+	  FPRINTF(fp," %d",(int) floor(100.0*(double) num/(double) den));
 	}
-      }
-    }
-    PUTC('\n',fp);
+	print_dinucleotide_p = 1;
+	if (this->comp == FWD_CANONICAL_INTRON_COMP) {
+	  sprintf(token,"%d>",runlength);
+	} else if (this->comp == REV_CANONICAL_INTRON_COMP) {
+	  sprintf(token,"%d<",runlength);
+	  print_dinucleotide_p = -1;
+	} else if (this->comp == NONINTRON_COMP) {
+	  sprintf(token,"%d=",runlength);
+	} else if (this->comp == FWD_GCAG_INTRON_COMP) {
+	  sprintf(token,"%d)",runlength);
+	} else if (this->comp == REV_GCAG_INTRON_COMP) {
+	  sprintf(token,"%d(",runlength);
+	  print_dinucleotide_p = -1;
+	} else if (this->comp == FWD_ATAC_INTRON_COMP) {
+	  sprintf(token,"%d]",runlength);
+	} else if (this->comp == REV_ATAC_INTRON_COMP) {
+	  sprintf(token,"%d[",runlength);
+	  print_dinucleotide_p = -1;
+	} else if (this->comp == DUALBREAK_COMP) {
+	  sprintf(token,"%d#",runlength);
+	  print_dinucleotide_p = 0;
+	} else if (this->comp == EXTRAEXON_COMP) {
+	  sprintf(token,"%d#",runlength);
+	  print_dinucleotide_p = 0;
+	} else {
+	  fprintf(stderr,"Can't parse comp '%c' in compression for %s\n",
+		  this->comp,Sequence_accession(queryseq));
+	  abort();
+	}
+	tokens = push_token(tokens,token);
+	tokens = List_reverse(tokens);
+	print_tokens_compressed(fp,tokens);
+	List_free_out(&tokens);
+	FPRINTF(fp,"\t%d",exon_queryend - exon_querystart + 1);
 
-  } else {
-    for (i = npairs-1; i >= 0; i--) {
-      this = ptr--;
-      if (this->aa_e != ' ') {
-	if (xpos == wraplength) {
+	runlength = 0;
+	donor[0] = this->genome;
+	donor[1] = '\0';
+	in_exon = false;
+      } else if (donor[1] == '\0') {
+	donor[1] = this->genome;
+      } else {
+	acceptor[0] = acceptor[1];
+	acceptor[1] = this->genome;
+      }
+    } else if (this->comp == INTRONGAP_COMP) {
+      /* Do nothing */
+    } else {
+      /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP, 
+	 SHORTGAP_COMP, or MISMATCH_COMP */
+      if (in_exon == false) {
+	exon_querystart = this->querypos + ONEBASEDP;
+	exon_genomestart = this->genomepos + ONEBASEDP;
+	if (watsonp) {
+	  intron_end = exon_genomestart - 1;
+	} else {
+	  intron_end = exon_genomestart + 1;
+	}
+	if (i > 0) {
+	  if (intron_end > intron_start) {
+	    FPRINTF(fp,"\t%d",intron_end - intron_start + 1);
+	  } else {
+	    FPRINTF(fp,"\t%d",intron_start - intron_end + 1);
+	  }
+	  if (print_dinucleotide_p == -1) {
+	    invert_intron(donor,acceptor);
+	  }
+	  if (print_dinucleotide_p != 0) {
+	    if ((donor[0] == 'G' || donor[0] == 'g') &&
+		(donor[1] == 'T' || donor[1] == 't') &&
+		(acceptor[0] == 'A' || acceptor[0] == 'a') &&
+		(acceptor[1] == 'G' || acceptor[1] == 'g')) {
+	      /* Do nothing */
+	    } else {
+	      FPRINTF(fp,"\t%c%c-%c%c",toupper(donor[0]),toupper(donor[1]),toupper(acceptor[0]),toupper(acceptor[1]));
+	    }
+	  }
+#if 0
+	  if (exon_querystart > exon_queryend + 1) {
+	    FPRINTF(fp,"***");
+	  }
+#endif
 	  PUTC('\n',fp);
-	  xpos = 0;
 	}
-	if (this->aa_e != '*') {
-	  PUTC(this->aa_e,fp);
-	  xpos++;
+
+	num = den = 0;
+	in_exon = true;
+      }
+      if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
+	/* Gap in upper or lower sequence */
+	if (this->genome == ' ') {
+	  sprintf(token,"%d^%c",runlength,this->cdna);
+	} else if (this->cdna == ' ') {
+	  sprintf(token,"%dv",runlength);
+	} else {
+	  fprintf(stderr,"Error at %c%c%c\n",this->genome,this->comp,this->cdna);
+	  exit(9);
+	}
+	tokens = push_token(tokens,token);
+	runlength = 0;
+	/* Don't increment den */
+
+      } else if (this->comp == MISMATCH_COMP) {
+	sprintf(token,"%dx%c",runlength,this->cdna);
+	tokens = push_token(tokens,token);
+	runlength = 0;
+	den++;
+
+#ifndef PMAP
+      } else if (this->comp == AMBIGUOUS_COMP) {
+	sprintf(token,"%d:%c",runlength,this->cdna);
+	tokens = push_token(tokens,token);
+	runlength = 0;
+	den++;
+	num++;
+#endif
+
+      } else {
+	runlength++;
+	den++;
+	if (this->comp == MATCH_COMP || this->comp == DYNPROG_MATCH_COMP) {
+	  /* AMBIGUOUS_COMP handled above */
+	  num++;
 	}
       }
     }
-    PUTC('\n',fp);
+
+    if (this->cdna != ' ') {
+      last_querypos = this->querypos;
+    }
+    if (this->genome != ' ') {
+      last_genomepos = this->genomepos;
+    }
+  }
+  
+  /* prev = this; */
+  exon_queryend = last_querypos + ONEBASEDP;
+  exon_genomeend = last_genomepos + ONEBASEDP;
+  
+  FPRINTF(fp,"\t%d %d",exon_genomestart,exon_genomeend);
+  FPRINTF(fp," %d %d",exon_querystart,exon_queryend);
+  if (den == 0) {
+    FPRINTF(fp," 100");
+  } else {
+    FPRINTF(fp," %d",(int) floor(100.0*(double) num/(double) den));
   }
 
+  sprintf(token,"%d*",runlength);
+  tokens = push_token(tokens,token);
+  tokens = List_reverse(tokens);
+  print_tokens_compressed(fp,tokens);
+  List_free_out(&tokens);
+
+  FPRINTF(fp,"\t%d",exon_queryend - exon_querystart + 1);
+  PUTC('\n',fp);
+
   return;
 }
 #endif
 
-
+#if 0
 void
-Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
-		       Sequence_T usersegment, int nexons, double fracidentity,
-		       struct T *pairs, int npairs, Chrnum_T chrnum,
-		       Univcoord_T chroffset, Univ_IIT_T chromosome_iit, int querylength_given,
-		       int skiplength, int trim_start, int trim_end, bool checksump,
-		       int chimerapos, int chimeraequivpos, double donor_prob, double acceptor_prob,
-		       int chimera_cdna_direction, char *strain, bool watsonp, int cdna_direction) {
+Pair_print_compressed_byexons (Filestring_T fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
+			       Sequence_T usersegment, int nexons, double fracidentity,
+			       struct T *pairs, int npairs, Chrnum_T chrnum,
+			       Univcoord_T chroffset, Univ_IIT_T chromosome_iit, int querylength_given,
+			       int skiplength, int trim_start, int trim_end, bool checksump,
+			       int chimerapos, int chimeraequivpos, double donor_prob, double acceptor_prob,
+			       int chimera_cdna_direction, char *strain, bool watsonp, int cdna_direction) {
   Chrpos_T chrpos1, chrpos2;
   Univcoord_T position1, position2;
 
   bool in_exon = false;
-  List_T tokens = NULL;
   struct T *ptr = pairs, *this = NULL;
   int querypos1, querypos2;
   int exon_querystart = -1, exon_queryend;
-  Chrpos_T exon_genomestart = -1, exon_genomeend, intron_start, intron_end;
-  int num = 0, den = 0, runlength = 0, i;
-  int print_dinucleotide_p;
-  char token[11], donor[3], acceptor[3], *chr;
+  int exon_pairi_start, exon_pairi_end;
+  Chrpos_T exon_genomestart = -1, exon_genomeend;
+  int i, k;
+  char *chr, c;
   double coverage;
   /* double trimmed_coverage; */
   int last_querypos = -1;
   Chrpos_T last_genomepos = -1U;
 
-  donor[0] = donor[1] = donor[2] = '\0';
-  acceptor[0] = acceptor[1] = acceptor[2] = '\0';
-
   querypos1 = start->querypos;
   querypos2 = end->querypos;
 
@@ -8891,6 +9491,7 @@ Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end,
 
   PUTC('\n',fp);
 
+  exon_pairi_start = 0;
   for (i = 0; i < npairs; i++) {
     /* prev = this; */
     this = ptr++;
@@ -8900,144 +9501,36 @@ Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end,
 	/* Beginning of gap */
 	exon_queryend = last_querypos + ONEBASEDP;
 	exon_genomeend = last_genomepos + ONEBASEDP;
-	if (watsonp) {
-	  intron_start = exon_genomeend + 1;
-	} else {
-	  intron_start = exon_genomeend - 1;
-	}
+	exon_pairi_end = i;
 
 	FPRINTF(fp,"\t%u %u",exon_genomestart,exon_genomeend);
 	FPRINTF(fp," %d %d",exon_querystart,exon_queryend);
-	if (den == 0) {
-	  FPRINTF(fp," 100");
-	} else {
-	  FPRINTF(fp," %d",(int) floor(100.0*(double) num/(double) den));
-	}
-	print_dinucleotide_p = 1;
-	if (this->comp == FWD_CANONICAL_INTRON_COMP) {
-	  sprintf(token,"%d>",runlength);
-	} else if (this->comp == REV_CANONICAL_INTRON_COMP) {
-	  sprintf(token,"%d<",runlength);
-	  print_dinucleotide_p = -1;
-	} else if (this->comp == NONINTRON_COMP) {
-	  sprintf(token,"%d=",runlength);
-	} else if (this->comp == FWD_GCAG_INTRON_COMP) {
-	  sprintf(token,"%d)",runlength);
-	} else if (this->comp == REV_GCAG_INTRON_COMP) {
-	  sprintf(token,"%d(",runlength);
-	  print_dinucleotide_p = -1;
-	} else if (this->comp == FWD_ATAC_INTRON_COMP) {
-	  sprintf(token,"%d]",runlength);
-	} else if (this->comp == REV_ATAC_INTRON_COMP) {
-	  sprintf(token,"%d[",runlength);
-	  print_dinucleotide_p = -1;
-	} else if (this->comp == DUALBREAK_COMP) {
-	  sprintf(token,"%d#",runlength);
-	  print_dinucleotide_p = 0;
-	} else if (this->comp == EXTRAEXON_COMP) {
-	  sprintf(token,"%d#",runlength);
-	  print_dinucleotide_p = 0;
-	} else {
-	  fprintf(stderr,"Can't parse comp '%c' in compression for %s\n",
-		  this->comp,Sequence_accession(queryseq));
-	  abort();
+	PUTC('\t',fp);
+	for (k = exon_pairi_start; k < exon_pairi_end; k++) {
+	  if ((c = pairs[k].cdna) != ' ') {
+	    PUTC(c,fp);
+	  }
 	}
-	tokens = push_token(tokens,token);
-	tokens = List_reverse(tokens);
-	print_tokens_compressed(fp,tokens);
-	List_free(&tokens);
-	FPRINTF(fp,"\t%d",exon_queryend - exon_querystart + 1);
 
-	runlength = 0;
-	donor[0] = this->genome;
-	donor[1] = '\0';
 	in_exon = false;
-      } else if (donor[1] == '\0') {
-	donor[1] = this->genome;
-      } else {
-	acceptor[0] = acceptor[1];
-	acceptor[1] = this->genome;
       }
+
     } else if (this->comp == INTRONGAP_COMP) {
       /* Do nothing */
+
     } else {
       /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP, 
 	 SHORTGAP_COMP, or MISMATCH_COMP */
       if (in_exon == false) {
 	exon_querystart = this->querypos + ONEBASEDP;
 	exon_genomestart = this->genomepos + ONEBASEDP;
-	if (watsonp) {
-	  intron_end = exon_genomestart - 1;
-	} else {
-	  intron_end = exon_genomestart + 1;
-	}
+	exon_pairi_start = i;
 	if (i > 0) {
-	  if (intron_end > intron_start) {
-	    FPRINTF(fp,"\t%d",intron_end - intron_start + 1);
-	  } else {
-	    FPRINTF(fp,"\t%d",intron_start - intron_end + 1);
-	  }
-	  if (print_dinucleotide_p == -1) {
-	    invert_intron(donor,acceptor);
-	  }
-	  if (print_dinucleotide_p != 0) {
-	    if ((donor[0] == 'G' || donor[0] == 'g') &&
-		(donor[1] == 'T' || donor[1] == 't') &&
-		(acceptor[0] == 'A' || acceptor[0] == 'a') &&
-		(acceptor[1] == 'G' || acceptor[1] == 'g')) {
-	      /* Do nothing */
-	    } else {
-	      FPRINTF(fp,"\t%c%c-%c%c",toupper(donor[0]),toupper(donor[1]),toupper(acceptor[0]),toupper(acceptor[1]));
-	    }
-	  }
-#if 0
-	  if (exon_querystart > exon_queryend + 1) {
-	    FPRINTF(fp,"***");
-	  }
-#endif
 	  PUTC('\n',fp);
 	}
 
-	num = den = 0;
 	in_exon = true;
       }
-      if (this->comp == INDEL_COMP || this->comp == SHORTGAP_COMP) {
-	/* Gap in upper or lower sequence */
-	if (this->genome == ' ') {
-	  sprintf(token,"%d^%c",runlength,this->cdna);
-	} else if (this->cdna == ' ') {
-	  sprintf(token,"%dv",runlength);
-	} else {
-	  fprintf(stderr,"Error at %c%c%c\n",this->genome,this->comp,this->cdna);
-	  exit(9);
-	}
-	tokens = push_token(tokens,token);
-	runlength = 0;
-	/* Don't increment den */
-
-      } else if (this->comp == MISMATCH_COMP) {
-	sprintf(token,"%dx%c",runlength,this->cdna);
-	tokens = push_token(tokens,token);
-	runlength = 0;
-	den++;
-
-#ifndef PMAP
-      } else if (this->comp == AMBIGUOUS_COMP) {
-	sprintf(token,"%d:%c",runlength,this->cdna);
-	tokens = push_token(tokens,token);
-	runlength = 0;
-	den++;
-	num++;
-#endif
-
-      } else {
-	runlength++;
-	den++;
-	if (this->comp == MATCH_COMP || this->comp == DYNPROG_MATCH_COMP) {
-	  /* AMBIGUOUS_COMP handled above */
-	  num++;
-	}
-      }
     }
 
     if (this->cdna != ' ') {
@@ -9051,24 +9544,150 @@ Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end,
   /* prev = this; */
   exon_queryend = last_querypos + ONEBASEDP;
   exon_genomeend = last_genomepos + ONEBASEDP;
+  exon_pairi_end = i;
   
   FPRINTF(fp,"\t%d %d",exon_genomestart,exon_genomeend);
   FPRINTF(fp," %d %d",exon_querystart,exon_queryend);
-  if (den == 0) {
-    FPRINTF(fp," 100");
+  PUTC('\t',fp);
+  for (k = exon_pairi_start; k < exon_pairi_end; k++) {
+    if ((c = pairs[k].cdna) != ' ') {
+      PUTC(c,fp);
+    }
+  }
+  PUTC('\n',fp);
+  
+  return;
+}
+#endif
+
+
+void
+Pair_print_compressed (Filestring_T fp, int pathnum, int npaths, T start, T end, Sequence_T queryseq, char *dbversion,
+		       Sequence_T usersegment, int nexons, double fracidentity,
+		       struct T *pairs, int npairs, Chrnum_T chrnum,
+		       Univcoord_T chroffset, Univ_IIT_T chromosome_iit, int querylength_given,
+		       int skiplength, int trim_start, int trim_end, bool checksump,
+		       int chimerapos, int chimeraequivpos, double donor_prob, double acceptor_prob,
+		       int chimera_cdna_direction, char *strain, bool watsonp, int cdna_direction) {
+  Chrpos_T chrpos1, chrpos2;
+  Univcoord_T position1, position2;
+
+  bool in_exon = false;
+  struct T *ptr = pairs, *this = NULL;
+  int querypos1, querypos2;
+  int exon_querystart = -1, exon_queryend;
+  Chrpos_T exon_genomestart = -1, exon_genomeend;
+  int i, k;
+  char *chr, c;
+  double coverage;
+  /* double trimmed_coverage; */
+  int last_querypos = -1;
+  Chrpos_T last_genomepos = -1U;
+
+  querypos1 = start->querypos;
+  querypos2 = end->querypos;
+
+  FPRINTF(fp,">%s ",Sequence_accession(queryseq));
+  if (dbversion != NULL) {
+    FPRINTF(fp,"%s ",dbversion);
+  } else if (usersegment != NULL && Sequence_accession(usersegment) != NULL) {
+    FPRINTF(fp,"%s ",Sequence_accession(usersegment));
   } else {
-    FPRINTF(fp," %d",(int) floor(100.0*(double) num/(double) den));
+    FPRINTF(fp,"user-provided ");
+  }
+#ifdef PMAP
+  FPRINTF(fp,"%d/%d %d %d",pathnum,npaths,(querylength_given+skiplength)*3,nexons);
+  coverage = (double) (querypos2 - querypos1 + 1)/(double) ((querylength_given+skiplength)*3);
+  FPRINTF(fp," %.1f",((double) rint(1000.0*coverage)));
+#else
+  coverage = (double) (querypos2 - querypos1 + 1)/(double) (querylength_given+skiplength);
+  if (end->querypos + 1 > trim_end) {
+    trim_end = end->querypos + 1;
+  }
+  if (start->querypos < trim_start) {
+    trim_start = start->querypos;
   }
+  /*
+  trimmed_coverage = (double) (end->querypos - start->querypos + 1)/(double) (trim_end - trim_start + skiplength);
+  FPRINTF(fp,">%s %s %d/%d %d(%d) %d",
+	 Sequence_accession(queryseq),dbversion,pathnum,npaths,
+	 querylength_given+skiplength,trim_end-trim_start,nexons);
+  FPRINTF(fp," %.1f(%.1f)",((double) rint(1000.0*coverage))/10.0,((double) rint(1000.0*trimmed_coverage))/10.0);
+  */
+  FPRINTF(fp,"%d/%d %d %d",pathnum,npaths,querylength_given+skiplength,nexons);
+  FPRINTF(fp," %.1f",((double) rint(1000.0*coverage))/10.0);
+#endif
+  FPRINTF(fp," %.1f",((double) rint(1000.0*fracidentity))/10.0);
 
-  sprintf(token,"%d*",runlength);
-  tokens = push_token(tokens,token);
-  tokens = List_reverse(tokens);
-  print_tokens_compressed(fp,tokens);
-  List_free(&tokens);
+  start = &(pairs[0]);
+  end = &(pairs[npairs-1]);
+  FPRINTF(fp," %d%s%d",start->querypos + ONEBASEDP,"..",end->querypos + ONEBASEDP);
+
+  chrpos1 = start->genomepos;
+  chrpos2 = end->genomepos;
+  position1 = chroffset + chrpos1;
+  position2 = chroffset + chrpos2;
+  FPRINTF(fp," %u%s%u",position1 + ONEBASEDP,"..",position2 + ONEBASEDP);
+
+  if (chrnum == 0) {
+    FPRINTF(fp," %u%s%u",chrpos1 + ONEBASEDP,"..",chrpos2 + ONEBASEDP);
+  } else {
+    chr = Chrnum_to_string(chrnum,chromosome_iit);
+    FPRINTF(fp," %s:%u%s%u",chr,chrpos1 + ONEBASEDP,"..",chrpos2 + ONEBASEDP);
+    FREE(chr);
+  }
+
+  if (chrpos1 <= chrpos2) {
+    FPRINTF(fp," +");
+  } else {
+    FPRINTF(fp," -");
+  }
+
+  if (cdna_direction > 0) {
+    FPRINTF(fp," dir:sense");
+  } else if (cdna_direction < 0) {
+    FPRINTF(fp," dir:antisense");
+  } else {
+    FPRINTF(fp," dir:indet");
+  }
+
+  if (checksump == true) {
+    FPRINTF(fp," md5:");
+    Sequence_print_digest(fp,queryseq);
+  }
+
+  if (chimerapos >= 0) {
+    if (chimeraequivpos == chimerapos) {
+      if (donor_prob > 0.0 && acceptor_prob > 0.0) {
+	if (chimera_cdna_direction >= 0) {
+	  FPRINTF(fp," chimera:%d(>)/%.3f/%.3f",chimerapos + ONEBASEDP,donor_prob,acceptor_prob);
+	} else {
+	  FPRINTF(fp," chimera:%d(<)/%.3f/%.3f",chimerapos + ONEBASEDP,donor_prob,acceptor_prob);
+	}
+      } else {
+	FPRINTF(fp," chimera:%d",chimerapos + ONEBASEDP);
+      }
+    } else {
+      FPRINTF(fp," chimera:%d..%d",chimerapos + ONEBASEDP,chimeraequivpos + ONEBASEDP);
+    }
+  }
+
+  if (strain != NULL) {
+    FPRINTF(fp," strain:%s",strain);
+  }
 
-  FPRINTF(fp,"\t%d",exon_queryend - exon_querystart + 1);
   PUTC('\n',fp);
 
+  for (i = 0; i < npairs; i++) {
+    /* prev = this; */
+    this = ptr++;
+    if (this->cdna != ' ') {
+      PUTC(this->cdna,fp);
+    }
+  }
+
+  PUTC('\n',fp);
+  
   return;
 }
 
@@ -9448,11 +10067,12 @@ Pairarray_lookup (struct T *pairarray, int npairs, int querypos) {
 
 
 Chrpos_T
-Pair_genomicpos_low (int hardclip_low, int hardclip_high, struct T *pairarray, int npairs, int querylength,
+Pair_genomicpos_low (int hardclip_low, int hardclip_high,
+		     struct T *pairarray, int npairs, int querylength,
 		     bool watsonp, bool hide_soft_clips_p) {
   struct T *clipped_pairs;
   int clipped_npairs;
-  T pair;
+  T lowpair, highpair;
 
 #if 0
   if (clipdir >= 0) {
@@ -9495,25 +10115,33 @@ Pair_genomicpos_low (int hardclip_low, int hardclip_high, struct T *pairarray, i
 #endif
 
   if (watsonp == true) {
-    clipped_pairs = hardclip_pairs(&clipped_npairs,hardclip_low,hardclip_high,
+    clipped_pairs = Pair_hardclip(&clipped_npairs,hardclip_low,hardclip_high,
 				   pairarray,npairs,querylength);
-    pair = &(clipped_pairs[0]);
+    lowpair = &(clipped_pairs[0]);
+    highpair = &(clipped_pairs[clipped_npairs-1]);
     if (hide_soft_clips_p == true) {
-      assert(pair->querypos == 0);
-      return pair->genomepos + 1U - pair->querypos;
+      assert(lowpair->querypos == 0);
+      assert(highpair->querypos == querylength - 1);
+      /* *chrpos_high = highpair->genomepos + 1U - (querylength - 1 - highpair->querypos); */
+      return lowpair->genomepos + 1U - lowpair->querypos;
     } else {
-      return pair->genomepos + 1U;
+      /* *chrpos_high = highpair->genomepos + 1U; */
+      return lowpair->genomepos + 1U;
     }
   } else {
     /* Swap hardclip_low and hardclip_high */
-    clipped_pairs = hardclip_pairs(&clipped_npairs,hardclip_high,hardclip_low,
+    clipped_pairs = Pair_hardclip(&clipped_npairs,hardclip_high,hardclip_low,
 				   pairarray,npairs,querylength);
-    pair = &(clipped_pairs[clipped_npairs-1]);
+    lowpair = &(clipped_pairs[clipped_npairs-1]);
+    highpair = &(clipped_pairs[0]);
     if (hide_soft_clips_p == true) {
-      assert(pair->querypos == querylength - 1);
-      return pair->genomepos + 1U + (querylength - 1 - pair->querypos);
+      assert(lowpair->querypos == querylength - 1);
+      assert(highpair->querypos == 0);
+      /* *chrpos_high = highpair->genomepos + 1U - highpair->querypos; */
+      return lowpair->genomepos + 1U + (querylength - 1 - lowpair->querypos);
     } else {
-      return pair->genomepos + 1U;
+      /* *chrpos_high = highpair->genomepos + 1U; */
+      return lowpair->genomepos + 1U;
     }
   }
 }
diff --git a/src/pair.h b/src/pair.h
index f461b2f..e9e110b 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 200236 2016-11-08 00:58:17Z twu $ */
+/* $Id: pair.h 207201 2017-06-12 18:40:57Z twu $ */
 #ifndef PAIR_INCLUDED
 #define PAIR_INCLUDED
 
@@ -22,6 +22,7 @@ typedef struct Pair_T *Pair_T;
 
 #ifdef GSNAP
 #include "resulthr.h"		/* For Resulttype_T.  Don't call for GMAP, because result.h conflicts */
+#include "stage3hr.h"
 #endif
 
 #define MATCHESPERGAP 3
@@ -124,19 +125,28 @@ Pair_genomicpos (struct T *pairs, int npairs, int querypos, bool headp);
 extern int
 Pair_codon_changepos (struct T *pairs, int npairs, int aapos, int cdna_direction);
 
-
+extern bool
+Pair_identical_p (List_T pairs1, List_T pairs2);
 extern void
 Pair_check_list (List_T pairs);
 extern bool
 Pair_check_array (struct T *pairs, int npairs);
 extern List_T
-Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength,
-			     int hardclip_low, int hardclip_high, int queryseq_offset);
+Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp,
+			     Chrpos_T chrlength, Pairpool_T pairpool);
+extern List_T
+Pair_convert_array_to_pairs_out (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength,
+				 int hardclip_low, int hardclip_high, int queryseq_offset);
 
 extern void
 Pair_print_exonsummary (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum,
 			Univcoord_T chroffset, Genome_T genome, Univ_IIT_T chromosome_iit,
 			bool watsonp, int cdna_direction, bool genomefirstp, int invertmode);
+
+extern int
+Pair_cigar_length (List_T tokens);
+extern void
+Pair_print_tokens (Filestring_T fp, List_T tokens);
 extern void
 Pair_tokens_free (List_T *tokens);
 extern List_T
@@ -167,6 +177,13 @@ Pair_print_gsnap (Filestring_T fp, struct T *pairs, int npairs, int nsegments, b
 		  int *splicesites_divint_crosstable, int donor_typeint, int acceptor_typeint,
 		  bool pairedp, GMAP_source_T gmap_source);
 
+#ifndef PMAP
+extern void
+Pair_print_bedpe (Filestring_T fp, struct T *pairs_querydir, int npairs,
+		  Chrnum_T chrnum, int querylength, bool watsonp, int cdna_direction,
+		  Univ_IIT_T chromosome_iit);
+#endif
+
 extern void
 Pair_fix_cdna_direction_array (struct T *pairs_querydir, int npairs, int cdna_direction);
 extern int
@@ -189,6 +206,15 @@ Pair_alias_circular (struct T *pairs, int npairs, Chrpos_T chrlength);
 extern void
 Pair_unalias_circular (struct T *pairs, int npairs, Chrpos_T chrlength);
 
+extern void
+Pair_print_sam_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
+			  char *quality_string, int querylength, int quality_shift,
+			  bool first_read_p, bool sam_paired_p, char *sam_read_group_id);
+
+extern struct T *
+Pair_hardclip (int *clipped_npairs, int hardclip_start, int hardclip_end,
+	       struct T *pairs, int npairs, int querylength);
+
 extern List_T
 Pair_clean_cigar (List_T tokens, bool watsonp);
 extern List_T
@@ -196,28 +222,26 @@ Pair_compute_cigar (bool *intronp, int *hardclip_start, int *hardclip_end, struc
 		    bool watsonp, int sensedir, int chimera_part);
 
 extern void
-Pair_print_sam (Filestring_T fp, char *abbrev, struct T *pairs, int npairs, List_T cigar_tokens, bool intronp,
+Pair_print_sam (Filestring_T fp, char *abbrev, struct Pair_T *pairarray, int npairs, List_T cigar_tokens, bool intronp,
 		char *acc1, char *acc2, Chrnum_T chrnum, Univ_IIT_T chromosome_iit, Sequence_T usersegment,
 		char *queryseq_ptr, char *quality_string,
-		int hardclip_low, int hardclip_high, int querylength_given,
+		int hardclip_low, int hardclip_high, int mate_hardclip_low, int mate_hardclip_high, int querylength_given,
 		bool watsonp, int sensedir, int chimera_part, Chimera_T chimera,
 		int quality_shift, bool first_read_p, int pathnum, int npaths_primary, int npaths_altloc,
 		int absmq_score, int second_absmq, Chrpos_T chrpos, Chrpos_T chrlength,
 #ifdef GSNAP
-		Shortread_T queryseq, Resulttype_T resulttype, unsigned int flag,
+		Shortread_T queryseq, Shortread_T queryseq_mate, Resulttype_T resulttype, unsigned int flag,
 		int pair_mapq_score, int end_mapq_score,
-		Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
-		Chrpos_T mate_chrpos, Chrpos_T mate_chrlength,
-		int mate_sensedir, int pairedlength,
+		Stage3end_T mate, Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+		Chrpos_T mate_chrlength, int mate_sensedir, int pairedlength,
 #else
 		int mapq_score, bool sam_paired_p,
 #endif
 		char *sam_read_group_id, bool invertp, bool merged_overlap_p, bool sarrayp);
 
-extern void
-Pair_print_sam_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
-			  char *quality_string, int querylength, int quality_shift,
-			  bool first_read_p, bool sam_paired_p, char *sam_read_group_id);
+extern List_T
+Pair_compute_md_string (int *nmismatches_refdiff, int *nmismatches_bothdiff, int *nindels,
+			struct T *pairs, int npairs, bool watsonp, List_T cigar_tokens);
 
 extern Uintlist_T
 Pair_exonbounds (struct T *pairs, int npairs, Univcoord_T chroffset);
@@ -333,7 +357,8 @@ Pairarray_contains_p (struct T *pairarray, int npairs, int querypos);
 extern Chrpos_T
 Pairarray_lookup (struct T *pairarray, int npairs, int querypos);
 extern Chrpos_T
-Pair_genomicpos_low (int hardclip_low, int hardclip_high, struct T *pairarray, int npairs, int querylength,
+Pair_genomicpos_low (int hardclip_low, int hardclip_high,
+		     struct T *pairarray, int npairs, int querylength,
 		     bool watsonp, bool hide_soft_clips_p);
 #endif
 
diff --git a/src/pairpool.c b/src/pairpool.c
index 04aa5c5..8078f6a 100644
--- a/src/pairpool.c
+++ b/src/pairpool.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pairpool.c 195548 2016-08-02 17:18:50Z twu $";
+static char rcsid[] = "$Id: pairpool.c 203955 2017-03-03 00:28:52Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -199,8 +199,12 @@ Pairpool_push (List_T list, T this, int querypos, int genomepos, char cdna, char
   List_T p;
   int n;
 
-  assert(querypos >= 0);
-  assert(genomepos >= 0);
+  /* assert(querypos >= 0); */
+  /* assert(genomepos >= 0); */
+
+  if (querypos < 0 || genomepos < 0) {
+    return list;
+  }
 
   if (this->pairctr >= this->npairs) {
     this->pairptr = add_new_pairchunk(this);
diff --git a/src/parserange.c b/src/parserange.c
index c66fcf2..afe9b86 100644
--- a/src/parserange.c
+++ b/src/parserange.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: parserange.c 184170 2016-02-12 19:37:19Z twu $";
+static char rcsid[] = "$Id: parserange.c 206103 2017-05-10 18:56:07Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -181,7 +181,7 @@ translate_chromosomepos_universal (Univcoord_T *genomicstart, Chrpos_T *genomicl
     debug(printf("chromosome %s => index %d\n",chromosome,index));
     interval = Univ_IIT_interval(chromosome_iit,index);
     debug(printf("  => label %s with interval low %u\n",
-		 Univ_IIT_label(chromosome_iit,index,&allocp),Interval_low(interval)));
+		 Univ_IIT_label(chromosome_iit,index,&allocp),Univinterval_low(interval)));
     *genomicstart = Univinterval_low(interval)+left;
     if (*genomicstart < Univinterval_low(interval)) {
       fprintf(stderr,"%llu + %llu = %llu (exceeds a 32-bit unsigned int)\n",
@@ -315,9 +315,10 @@ Parserange_query (char **divstring, Univcoord_T *coordstart, Univcoord_T *coorde
     /* Query may have a div */
     *divstring = (char *) MALLOC((div_strlen+1) * sizeof(char)); /* Return value */
     strncpy(*divstring,query,div_strlen);
+    (*divstring)[div_strlen] = '\0';
 
-    debug(printf("Parsed query %s into divstring %s and coords %s\n",
-		 query,*divstring,coords));
+    debug(printf("Parsed query %s into divstring %s (length %d) and coords %s\n",
+		 query,*divstring,div_strlen,coords));
 
     if (IIT_read_divint(filename,*divstring,/*add_iit_p*/true) < 0) {
       fprintf(stderr,"Chromosome %s not found in IIT file\n",*divstring);
diff --git a/src/popcount.c b/src/popcount.c
index 7ce7cc5..ced2107 100644
--- a/src/popcount.c
+++ b/src/popcount.c
@@ -1,10 +1,10 @@
-static char rcsid[] = "$Id: popcount.c 171614 2015-08-10 23:27:29Z twu $";
+static char rcsid[] = "$Id: popcount.c 207320 2017-06-14 19:37:19Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 
-#if !defined(HAVE_SSE4_2) || !defined(HAVE_BUILTIN_CTZ)
+#if !defined(HAVE_SSE4_2) || (!defined(HAVE_TZCNT) && !defined(HAVE_BUILTIN_CTZ))
 const int mod_37_bit_position[] = 
   {
     32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, 4,
@@ -13,7 +13,7 @@ const int mod_37_bit_position[] =
   };
 #endif
 
-#if !defined(HAVE_SSE4_2) || !defined(HAVE_BUILTIN_POPCOUNT)
+#if !defined(HAVE_SSE4_2) || (!defined(HAVE_POPCNT) && !defined(HAVE_MM_POPCNT) && !defined(HAVE_BUILTIN_POPCOUNT))
 const int count_bits[] =
 { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
   1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
@@ -2066,7 +2066,7 @@ const int count_bits[] =
  };
 #endif
 
-#if !defined(HAVE_SSE4_2) || !defined(HAVE_BUILTIN_CLZ)
+#if !defined(HAVE_SSE4_2) || (!defined(HAVE_LZCNT) && !defined(HAVE_BUILTIN_CLZ))
 const int clz_table[] =
 {16,15,14,14,13,13,13,13,12,12,12,12,12,12,12,12,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
  10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
diff --git a/src/popcount.h b/src/popcount.h
index b8ceb05..68672fd 100644
--- a/src/popcount.h
+++ b/src/popcount.h
@@ -1,19 +1,19 @@
-/* $Id: popcount.h 171614 2015-08-10 23:27:29Z twu $ */
+/* $Id: popcount.h 207320 2017-06-14 19:37:19Z twu $ */
 #ifndef POPCOUNT_INCLUDED
 #define POPCOUNT_INCLUDED
 #ifdef HAVE_CONFIG_H
 #include <config.h>		/* For HAVE_BUILTIN_CTZ, HAVE_BUILTIN_POPCOUNT, HAVE_BUILTIN_CLZ */
 #endif
 
-#if !defined(HAVE_SSE4_2) || !defined(HAVE_BUILTIN_CTZ)
+#if !defined(HAVE_SSE4_2) || (!defined(HAVE_TZCNT) && !defined(HAVE_BUILTIN_CTZ))
 extern const int mod_37_bit_position[];
 #endif
 
-#if !defined(HAVE_SSE4_2) || !defined(HAVE_BUILTIN_POPCOUNT)
+#if !defined(HAVE_SSE4_2) || (!defined(HAVE_POPCNT) && !defined(HAVE_MM_POPCNT) && !defined(HAVE_BUILTIN_POPCOUNT))
 extern const int count_bits[];
 #endif
 
-#if !defined(HAVE_SSE4_2) || !defined(HAVE_BUILTIN_CLZ)
+#if !defined(HAVE_SSE4_2) || (!defined(HAVE_LZCNT) && !defined(HAVE_BUILTIN_CLZ))
 extern const int clz_table[];
 #endif
 
diff --git a/src/samflags.h b/src/samflags.h
index d7d626f..ef1984a 100644
--- a/src/samflags.h
+++ b/src/samflags.h
@@ -1,4 +1,4 @@
-/* $Id: samflags.h 155282 2014-12-12 19:42:54Z twu $ */
+/* $Id: samflags.h 205263 2017-04-13 00:02:34Z twu $ */
 #ifndef SAMFLAGS_INCLUDED
 #define SAMFLAGS_INCLUDED
 
@@ -13,6 +13,7 @@
 #define NOT_PRIMARY        0x0100 /* 256 */
 #define BAD_READ_QUALITY   0x0200 /* 512 */
 #define DUPLICATE_READ     0x0400 /* 1024 */
+#define SUPPLEMENTARY      0x0800 /* 2048 */
 
 
 /* 83 = first read, minus strand for paired */
diff --git a/src/samheader.c b/src/samheader.c
index 6575386..9b63bc3 100644
--- a/src/samheader.c
+++ b/src/samheader.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samheader.c 157094 2015-01-21 00:33:35Z twu $";
+static char rcsid[] = "$Id: samheader.c 207321 2017-06-14 19:37:40Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -27,6 +27,8 @@ SAM_header_open_file (SAM_split_output_type split_output, char *split_output_roo
   char *filename, *suffix;
 
   if (split_output == OUTPUT_NONE) {
+    /* Don't open a file */
+    return (FILE *) NULL;
 
 #ifdef USE_MPI
     /* output file name is passed in through split_output_root */
diff --git a/src/samprint.c b/src/samprint.c
index a377944..2619c88 100644
--- a/src/samprint.c
+++ b/src/samprint.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: samprint.c 200237 2016-11-08 00:58:55Z twu $";
+static char rcsid[] = "$Id: samprint.c 207409 2017-06-16 00:45:35Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -13,6 +13,7 @@ static char rcsid[] = "$Id: samprint.c 200237 2016-11-08 00:58:55Z twu $";
 #include "complement.h"
 #include "mapq.h"
 #include "assert.h"
+#include "cigar.h"
 
 
 #define SANGER_ILLUMINA_DIFF 31
@@ -71,36 +72,61 @@ static bool cigar_extended_p = false;
 
 static bool clip_overlap_p;
 static bool merge_overlap_p;
+static bool merge_samechr_p;
 
 static bool sam_multiple_primaries_p;
 static bool force_xs_direction_p;
 static bool md_lowercase_variant_p;
 static IIT_T snps_iit;
 
+static bool omit_concordant_uniq_p = false;
+static bool omit_concordant_mult_p = false;
+
+static bool find_dna_chimeras_p;
+static IIT_T splicing_iit = NULL;
+static int donor_typeint;
+static int acceptor_typeint;
+static bool transcript_splicing_p = false;
+static IIT_T genestruct_iit = NULL;
+
 static Univ_IIT_T chromosome_iit;
 static Genome_T genome;
 
 void
 SAM_setup (bool add_paired_nomappers_p_in, bool paired_flag_means_concordant_p_in,
+	   bool omit_concordant_uniq_p_in, bool omit_concordant_mult_p_in, 
 	   bool quiet_if_excessive_p_in, int maxpaths_report_in,
 	   char *failedinput_root_in, bool fastq_format_p_in, bool hide_soft_clips_p_in,
-	   bool clip_overlap_p_in, bool merge_overlap_p_in, bool sam_multiple_primaries_p_in,
+	   bool clip_overlap_p_in, bool merge_overlap_p_in, bool merge_samechr_p_in, bool sam_multiple_primaries_p_in,
 	   bool force_xs_direction_p_in, bool md_lowercase_variant_p_in, IIT_T snps_iit_in,
-	   Univ_IIT_T chromosome_iit_in, Genome_T genome_in) {
+	   bool find_dna_chimeras_p_in, IIT_T splicing_iit_in, int donor_typeint_in, int acceptor_typeint_in,
+	   bool transcript_splicing_p_in, IIT_T genestruct_iit_in,  Univ_IIT_T chromosome_iit_in, Genome_T genome_in) {
   add_paired_nomappers_p = add_paired_nomappers_p_in;
   paired_flag_means_concordant_p = paired_flag_means_concordant_p_in;
+
+  omit_concordant_uniq_p = omit_concordant_uniq_p_in;
+  omit_concordant_mult_p = omit_concordant_mult_p_in;
+
   quiet_if_excessive_p = quiet_if_excessive_p_in;
   failedinput_root = failedinput_root_in;
   fastq_format_p = fastq_format_p_in;
   hide_soft_clips_p = hide_soft_clips_p_in;
   clip_overlap_p = clip_overlap_p_in;
   merge_overlap_p = merge_overlap_p_in;
+  merge_samechr_p = merge_samechr_p_in;
   maxpaths_report = maxpaths_report_in;
   sam_multiple_primaries_p = sam_multiple_primaries_p_in;
   force_xs_direction_p = force_xs_direction_p_in;
   md_lowercase_variant_p = md_lowercase_variant_p_in;
   snps_iit = snps_iit_in;
 
+  find_dna_chimeras_p = find_dna_chimeras_p_in;
+  splicing_iit = splicing_iit_in;
+  donor_typeint = donor_typeint_in;
+  acceptor_typeint = acceptor_typeint_in;
+  transcript_splicing_p = transcript_splicing_p_in;
+  genestruct_iit = genestruct_iit_in;
+
   chromosome_iit = chromosome_iit_in;
   genome = genome_in;
 
@@ -111,7 +137,8 @@ SAM_setup (bool add_paired_nomappers_p_in, bool paired_flag_means_concordant_p_i
 unsigned int
 SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
 		  bool first_read_p, int pathnum, int npaths, bool artificial_mate_p, int npaths_mate,
-		  int absmq_score, int first_absmq, bool invertp, bool invert_mate_p) {
+		  int absmq_score, int first_absmq, bool invertp, bool invert_mate_p,
+		  bool supplementaryp) {
   unsigned int flag = 0U;
 
   debug(printf("Resulttype: %s\n",Resulttype_string(resulttype)));
@@ -211,25 +238,32 @@ SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
     }
   }
 
+  if (supplementaryp == true) {
+    flag |= SUPPLEMENTARY;
+  }
+
   return flag;
 }
 
 
+/* Returns chrpos_low */
 Chrpos_T
-SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int querylength,
-		    bool first_read_p) {
-  Substring_T substring;
+SAM_compute_chrpos (Chrnum_T *chrnum, int hardclip_low, int hardclip_high,
+		    Stage3end_T this, int querylength, bool first_read_p) {
+  Substring_T low_substring, substring;
   Hittype_T hittype;
 
   if (this == NULL) {
+    *chrnum = 0;
     return 0U;
 
   } else if ((hittype = Stage3end_hittype(this)) == GMAP) {
+    *chrnum = Stage3end_chrnum(this);
     return Pair_genomicpos_low(hardclip_low,hardclip_high,Stage3end_pairarray(this),Stage3end_npairs(this),
 			       querylength,/*watsonp*/Stage3end_plusp(this),hide_soft_clips_p);
 
   } else if (hittype == SAMECHR_SPLICE || hittype == TRANSLOC_SPLICE) {
-    /* Want concordant substring */
+    /* Want concordant substring for both chrpos_low and chrpos_high */
     if (Stage3end_plusp(this) == true) {
       if (first_read_p == true) {
 	substring = Stage3end_substringN(this);
@@ -243,12 +277,16 @@ SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int q
 	substring = Stage3end_substringN(this);
       }
     }
-    return Substring_compute_chrpos(substring,hardclip_low,hide_soft_clips_p);
+    *chrnum = Substring_chrnum(substring);
+    return Substring_compute_chrpos(substring,hardclip_low,hardclip_high,hide_soft_clips_p);
 
   } else {
     /* Want low substring */
-    substring = Stage3end_substring_low(this,hardclip_low);
-    return Substring_compute_chrpos(substring,hardclip_low,hide_soft_clips_p);
+    low_substring = Stage3end_substring_low(this,hardclip_low);
+    /* high_substring = Stage3end_substring_high(this,hardclip_high); */
+    *chrnum = Substring_chrnum(low_substring);
+    /* Substring_compute_chrpos(high_substring,hardclip_low,hardclip_high,hide_soft_clips_p); */
+    return Substring_compute_chrpos(low_substring,hardclip_low,hardclip_high,hide_soft_clips_p);
   }
 }
 
@@ -289,77 +327,273 @@ print_chromosomal_pos (Filestring_T fp, Chrnum_T chrnum, Chrpos_T chrpos, Chrpos
   }
 }
 
+/* first_read_p here is that of the printed end, not the mate */
 static void
-print_mate_chromosomal_pos (Filestring_T fp, Chrnum_T mate_chrnum, Chrnum_T mate_effective_chrnum,
-			    Chrpos_T mate_chrpos, Chrpos_T mate_chrlength, Chrnum_T anchor_chrnum, Chrpos_T anchor_chrpos,
+print_mate_chromosomal_pos (Filestring_T fp, Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+			    Chrpos_T mate_chrlength, Chrnum_T anchor_chrnum, Chrpos_T anchor_chrpos,
 			    Univ_IIT_T chromosome_iit) {
   bool allocp;
   char *chr;
 
-  if (mate_chrpos == 0U) {
+  if (mate_chrpos_low == 0U) {
     FPRINTF(fp,"\t*\t0");
     return;
 
-  } else {
-    if (mate_chrnum == 0) {
-      /* Interchromosomal splice.  Choose effective chrnum. */
-      mate_chrnum = mate_effective_chrnum;
-    }
-      
-    if (anchor_chrpos > 0U && anchor_chrnum > 0 && mate_chrnum == anchor_chrnum) {
-      /* chrpos already in 1-based coordinates */
-      if (mate_chrpos > mate_chrlength) {
-	FPRINTF(fp,"\t=\t%u",mate_chrpos - mate_chrlength /*+1U*/);
-      } else {
-	FPRINTF(fp,"\t=\t%u",mate_chrpos /*+1U*/);
-      }
+  } else if (mate_chrnum == 0) {
+    /* Abort because chrpos should have been 0 */
+    abort();
 
+  } else if (anchor_chrpos > 0U && anchor_chrnum > 0 && mate_chrnum == anchor_chrnum) {
+    /* chrpos already in 1-based coordinates */
+    if (mate_chrpos_low > mate_chrlength) {
+      FPRINTF(fp,"\t=\t%u",mate_chrpos_low - mate_chrlength /*+1U*/);
     } else {
-      chr = Univ_IIT_label(chromosome_iit,mate_chrnum,&allocp);
+      FPRINTF(fp,"\t=\t%u",mate_chrpos_low /*+1U*/);
+    }
 
-      /* chrpos already in 1-based coordinates */
-      if (mate_chrpos > mate_chrlength) {
-	FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos - mate_chrlength /*+1U*/);
-      } else {
-	FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos /*+1U*/);
-      }
+    return;
 
-      if (allocp == true) {
-	FREE(chr);
-      }
-    }
+  } else {
+    chr = Univ_IIT_label(chromosome_iit,mate_chrnum,&allocp);
     
     /* chrpos already in 1-based coordinates */
+    if (mate_chrpos_low > mate_chrlength) {
+      FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos_low - mate_chrlength /*+1U*/);
+    } else {
+      FPRINTF(fp,"\t%s\t%u",chr,mate_chrpos_low /*+1U*/);
+    }
+    
+    if (allocp == true) {
+      FREE(chr);
+    }
+
     return;
   }
 }
 
 
 
+static int
+print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdiff,
+		 Filestring_T fp, int matchlength, char *genomicfwd_refdiff, char *genomicfwd_bothdiff,
+		 int stringlength, int querypos, int querylength,
+		 int hardclip_low, int hardclip_high, bool plusp, bool lastp) {
+  int starti, endi, i;
+  int local_nmismatches = 0;
+  bool hardclip_end_p = false;
 
+  if (plusp == true) {
+    debug2(printf("\nEntering md_string with matchlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus: %s ref, %s both\n",
+		  matchlength,querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
+    if (hardclip_low == 0) {
+      starti = 0;
+      hardclip_end_p = true;
+    } else if (hardclip_low > querypos) {
+      /* startpos = hardclip_low; */
+      starti = hardclip_low - querypos;
+      hardclip_end_p = true;
+      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
+		    starti,hardclip_low,querypos));
+    } else {
+      /* startpos = querypos; */
+      starti = 0;
+    }
 
-static char complCode[128] = COMPLEMENT_LC;
+    if (querylength - hardclip_high < querypos + stringlength) {
+      /* endpos = querylength - hardclip_high; */
+      endi = (querylength - hardclip_high) - querypos;
+      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
+		    endi,querylength,hardclip_high,querypos));
+    } else {
+      /* endpos = querypos + stringlength; */
+      endi = stringlength;
+    }
 
-static void
-make_complement_buffered (char *complement, char *sequence, unsigned int length) {
-  int i, j;
+    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
 
-  /* complement = (char *) CALLOC(length+1,sizeof(char)); */
-  for (i = length-1, j = 0; i >= 0; i--, j++) {
-    complement[j] = complCode[(int) sequence[i]];
+    if (genomicfwd_refdiff == NULL) {
+      if (endi > starti) {
+	matchlength += (endi - starti);
+      }
+
+    } else if (md_lowercase_variant_p == false) {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  matchlength++;
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (matchlength > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d",matchlength);
+	    *printp = true;
+	    hardclip_end_p = false;
+	  }
+	  FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
+	  *printp = true;
+	  local_nmismatches += 1;
+	  matchlength = 0;
+	}
+      }
+      *nmismatches_refdiff += local_nmismatches;
+
+    } else {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  matchlength++;
+
+	} else if (isupper(genomicfwd_bothdiff[i])) {
+	  /* A mismatch against the reference only => alternate variant */
+	  if (matchlength > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d",matchlength);
+	    *printp = true;
+	    hardclip_end_p = false;
+	  }
+	  FPRINTF(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
+	  *printp = true;
+	  local_nmismatches += 1;
+	  matchlength = 0;
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (matchlength > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d",matchlength);
+	    *printp = true;
+	    hardclip_end_p = false;
+	  }
+	  FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
+	  *printp = true;
+	  local_nmismatches += 1;
+	  matchlength = 0;
+	}
+      }
+      *nmismatches_refdiff += local_nmismatches;
+    }
+
+  } else {
+    debug2(printf("\nEntering md_string with matchlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus: %s ref, %s both\n",
+		  matchlength,querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
+    querypos = querylength - querypos - stringlength;
+    debug2(printf("  Revising querypos to be %d\n",querypos));
+
+    if (hardclip_low == 0) {
+      starti = 0;
+      hardclip_end_p = true;
+    } else if (hardclip_low > querypos) {
+      /* startpos = hardclip_low; */
+      starti = hardclip_low - querypos;
+      hardclip_end_p = true;
+      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
+		    starti,hardclip_low,querypos));
+    } else {
+      /* startpos = querypos; */
+      starti = 0;
+    }
+
+    if (querylength - hardclip_high < querypos + stringlength) {
+      /* endpos = querylength - hardclip_high; */
+      endi = (querylength - hardclip_high) - querypos;
+      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
+		    endi,querylength,hardclip_high,querypos));
+    } else {
+      /* endpos = querypos + stringlength; */
+      endi = stringlength;
+    }
+
+    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
+
+    if (genomicfwd_refdiff == NULL) {
+      if (endi > starti) {
+	matchlength += (endi - starti);
+      }
+
+    } else if (md_lowercase_variant_p == false) {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  matchlength++;
+
+	} else {
+	  if (matchlength > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d",matchlength);
+	    *printp = true;
+	    hardclip_end_p = false;
+	  }
+	  FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
+	  *printp = true;
+	  local_nmismatches += 1;
+	  matchlength = 0;
+	}
+      }
+      *nmismatches_refdiff += local_nmismatches;
+
+    } else {
+      for (i = starti; i < endi; i++) {
+	if (isupper(genomicfwd_refdiff[i])) {
+	  matchlength++;
+
+	} else if (isupper(genomicfwd_bothdiff[i])) {
+	  /* A mismatch against the reference only => alternate variant */
+	  if (matchlength > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d",matchlength);
+	    *printp = true;
+	    hardclip_end_p = false;
+	  }
+	  FPRINTF(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
+	  *printp = true;
+	  local_nmismatches += 1;
+	  matchlength = 0;
+
+	} else {
+	  /* A true mismatch against both variants */
+	  if (matchlength > 0 || hardclip_end_p == true) {
+	    FPRINTF(fp,"%d",matchlength);
+	    *printp = true;
+	    hardclip_end_p = false;
+	  }
+	  FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
+	  *printp = true;
+	  local_nmismatches += 1;
+	  matchlength = 0;
+	}
+      }
+      *nmismatches_refdiff += local_nmismatches;
+    }
   }
-  complement[length] = '\0';
-  return;
-}
 
+  /* Update nmismatches_bothdiff */
+  if (genomicfwd_bothdiff == NULL) {
+    /* No change to nmismatches_bothdiff */
+  } else if (genomicfwd_bothdiff == genomicfwd_refdiff) {
+    *nmismatches_bothdiff += local_nmismatches;
+  } else {
+    for (i = starti; i < endi; i++) {
+      if (!isupper(genomicfwd_bothdiff[i])) {
+	*nmismatches_bothdiff += 1;
+      }
+    }
+  }
+
+  debug2(printf("  Ending with matchlength %d\n",matchlength));
+
+  if (lastp == false) {
+    return matchlength;
+  } else if (matchlength > 0) {
+    FPRINTF(fp,"%d",matchlength);
+    *printp = true;
+    return 0;
+  } else {
+    return 0;
+  }
+}
 
 
 /* npaths could be non-zero, if user selected --quiet-if-excessive */
 void
-SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3end_T mate, char *acc1, char *acc2,
+SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Shortread_T queryseq_mate,
+		     Stage3end_T mate, char *acc1, char *acc2,
 		     Univ_IIT_T chromosome_iit, Resulttype_T resulttype, bool first_read_p,
 		     int pathnum, int npaths_primary, int npaths_altloc, bool artificial_mate_p, int npaths_mate,
-		     Chrpos_T mate_chrpos, int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
+		     Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low, int mate_hardclip_low, int mate_hardclip_high,
+		     int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
   unsigned int flag;
 
 
@@ -377,13 +611,13 @@ SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3
   /* 6. CIGAR */
   flag = SAM_compute_flag(/*plusp (NA)*/true,mate,resulttype,first_read_p,
 			  /*pathnum*/0,/*npaths*/0,artificial_mate_p,npaths_mate,
-			  /*absmq_score*/0,/*first_absmq*/0,invertp,invert_mate_p);
+			  /*absmq_score*/0,/*first_absmq*/0,invertp,invert_mate_p,
+			  /*supplementaryp*/false);
   FPRINTF(fp,"\t%u\t*\t0\t0\t*",flag);
 
   /* 7. MRNM: Mate chr */
   /* 8. MPOS: Mate chrpos */
-  print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
-			     mate_chrpos,Stage3end_chrlength(mate),
+  print_mate_chromosomal_pos(fp,mate_chrnum,mate_chrpos_low,Stage3end_chrlength(mate),
 			     /*anchor_chrnum*/0,/*anchor_chrpos*/0U,chromosome_iit);
 
 
@@ -402,6 +636,14 @@ SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3
 				    quality_shift,/*show_chopped_p*/false);
   }
 
+  /* 12. TAGS: XM */
+  if (queryseq_mate == NULL) {
+    /* Unpaired alignment.  Don't print XM. */
+  } else {
+    FPRINTF(fp,"\tXM:Z:");
+    Cigar_print_mate(fp,mate,Shortread_fulllength(queryseq_mate),mate_hardclip_low,mate_hardclip_high);
+  }
+
   /* 12. TAGS: RG */
   if (sam_read_group_id != NULL) {
     FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
@@ -430,1016 +672,39 @@ SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3
 }
 
 
-#if 0
-/* Derived from print_tokens_gff3 */
-static void
-print_tokens_sam (Filestring_T fp, List_T tokens) {
-  List_T p;
-  char *token;
-  
-  for (p = tokens; p != NULL; p = List_next(p)) {
-    token = (char *) List_head(p);
-    FPRINTF(fp,"%s",token);
-    FREE(token);
-  }
 
-  return;
-}
-#endif
-
-#if 0
-static List_T
-push_token (List_T tokens, char *token) {
-  char *copy;
-
-  copy = (char *) CALLOC(strlen(token)+1,sizeof(char));
-  strcpy(copy,token);
-  return List_push(tokens,(void *) copy);
-}
-#endif
-
-
-#if 0
-/* Currently used for insertions and deletions */
-static List_T
-compute_cigar_old (List_T tokens, char type, int stringlength, int querypos, int querylength,
-		   int hardclip_low, int hardclip_high, bool plusp, bool firstp, bool lastp) {
-  char token[10];
-  
-  debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plusp %d\n",
-		type,stringlength,querypos,querylength,hardclip_low,hardclip_high,plusp));
-
-  if (firstp == true) {
-    debug1(printf("firstp is true\n"));
-    if (plusp == true) {
-      if (hardclip_low > 0) {
-	sprintf(token,"%dH",hardclip_low);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-      if (querypos > hardclip_low) {
-	sprintf(token,"%dS",querypos - hardclip_low);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-    } else {
-      if (hardclip_high > 0) {
-	sprintf(token,"%dH",hardclip_high);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-      if (querypos < querylength - hardclip_high) {
-	sprintf(token,"%dS",querypos - hardclip_high);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-    }
-  }
-
-  if (type == 'D' || type == 'N') {
-    if (querypos < hardclip_low || querypos >= querylength - hardclip_high) {
-      stringlength = 0;
-    }
-
-  } else if (plusp == true) {
-    debug1(printf("Comparing querypos %d..%d against %d..%d\n",
-		  querypos,querypos + stringlength,hardclip_low,querylength - hardclip_high));
-    if (/* querypos < hardclip_low && */querypos + stringlength < hardclip_low) {
-      /* Print nothing */
-      stringlength = 0;
-      debug1(printf("Case 1: stringlength 0\n"));
-    } else if (querypos < hardclip_low) {
-      if (querypos + stringlength < querylength - hardclip_high) {
-	/* Print part after hardclip_low */
-	stringlength = (querypos + stringlength) - hardclip_low;
-	debug1(printf("Case 2: stringlength %d\n",stringlength));
-      } else {
-	/* Print part between hardclip_low and hardclip_high */
-	stringlength = (querylength - hardclip_high) - hardclip_low;
-	debug1(printf("Case 3: stringlength %d\n",stringlength));
-      }
-    } else if (querypos < querylength - hardclip_high) {
-      if (querypos + stringlength >= querylength - hardclip_high) {
-	/* Print up to hardclip_high */
-	stringlength = (querylength - hardclip_high) - querypos;
-	debug1(printf("Case 4: stringlength %d\n",stringlength));
-      } else {
-	/* Print full stringlength */
-	debug1(printf("Case 5: stringlength %d\n",stringlength));
-      }
-    } else {
-      /* Print nothing */
-      stringlength = 0;
-      debug1(printf("Case 6: stringlength 0\n"));
-    }
-
-  } else {
-    debug1(printf("Comparing querypos %d..%d against %d..%d\n",
-		  querypos,querypos - stringlength,hardclip_low,querylength - hardclip_high));
-    if (/* querypos >= querylength - hardclip_high && */ querypos - stringlength >= querylength - hardclip_high) {
-      /* Print nothing */
-      stringlength = 0;
-      debug1(printf("Case 1: stringlength 0\n"));
-    } else if (querypos >= querylength - hardclip_high) {
-      if (querypos - stringlength >= hardclip_low) {
-	/* Print part after hardclip_high */
-	stringlength = (querylength - hardclip_high) - (querypos - stringlength);
-	debug1(printf("Case 2: stringlength %d\n",stringlength));
-      } else {
-	/* Print part between hardclip_low and hardclip_high */
-	stringlength = (querylength - hardclip_high) - hardclip_low;
-	debug1(printf("Case 3: stringlength %d\n",stringlength));
-      }
-    } else if (querypos >= hardclip_low) {
-      if (querypos - stringlength < hardclip_low) {
-	/* Print up to hardclip_low */
-	stringlength = querypos - hardclip_low;
-	debug1(printf("Case 4: stringlength %d\n",stringlength));
-      } else {
-	/* Print full stringlength */
-	debug1(printf("Case 5: stringlength %d\n",stringlength));
-      }
-    } else {
-      /* Print nothing */
-      stringlength = 0;
-      debug1(printf("Case 5: stringlength 0\n"));
-    }
-  }
-
-  if (stringlength > 0) {
-    sprintf(token,"%d%c",stringlength,type);
-    debug1(printf("Pushing token %s\n",token));
-    tokens = push_token(tokens,token);
-  }
-
-  if (lastp == true) {
-    debug1(printf("lastp is true\n"));
-    if (plusp == true) {
-      querypos += stringlength;
-      if (querypos < querylength - 1 - hardclip_high) {
-	sprintf(token,"%dS",querylength - 1 - hardclip_high - querypos);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-      if (hardclip_high > 0) {
-	sprintf(token,"%dH",hardclip_high);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-    } else {
-      querypos -= stringlength;
-      if (querypos > hardclip_low) {
-	sprintf(token,"%dS",hardclip_low - querypos);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-      if (hardclip_low > 0) {
-	sprintf(token,"%dH",hardclip_low);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-    }
-  }
-
-  return tokens;
-}
-#endif
-
-
-#if 0
-/* Currently used for insertions and deletions */
-static List_T
-compute_cigar (List_T tokens, char type, int stringlength, int querypos, int querylength,
-	       int hardclip_low, int hardclip_high, bool plusp, int lastp) {
-  int matchlength = 0;
-  int startpos, endpos;
-  int cliplength = 0;
-  char token[10];
-  
-  if (plusp == true) {
-    debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus\n",
-		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
-    if (hardclip_low > querypos) { /* > not >= */
-      startpos = hardclip_low;
-      cliplength = hardclip_low;
-    } else {
-      startpos = querypos;
-    }
-
-    if (querylength - hardclip_high < querypos + stringlength) {
-      endpos = querylength - hardclip_high;
-      debug1(printf("  endpos %d = querylength %d - hardclip_high %d\n",endpos,querylength,hardclip_high));
-    } else {
-      endpos = querypos + stringlength;
-      debug1(printf("  endpos %d = querypos %d + stringlength %d\n",endpos,querypos,stringlength));
-    }
-
-    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
-
-    if (endpos >= startpos) {
-      if (cliplength > 0) {
-	debug1(printf("  Pushing initial %dH\n",cliplength));
-	sprintf(token,"%dH",cliplength);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-      matchlength = endpos - startpos;
-      if (matchlength > 0) {
-	debug1(printf("  Pushing %d%c\n",matchlength,type));
-	sprintf(token,"%d%c",matchlength,type);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-    }
-
-
-    if (lastp == true) {
-      /* cliplength = querypos + stringlength - endpos; */
-      cliplength = querylength - endpos;
-      if (cliplength > 0) {
-	debug1(printf("  Pushing final %dH\n",cliplength));
-	sprintf(token,"%dH",cliplength);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-    }
-
-  } else {
-    debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus\n",
-		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
-
-    if (querylength - hardclip_low < querypos) {
-      startpos = querylength - hardclip_low;
-      cliplength = hardclip_low;
-    } else {
-      startpos = querypos;
-    }
-
-    if (hardclip_high >= querypos - stringlength) {
-      endpos = hardclip_high;
-      debug1(printf("  endpos %d = hardclip_high %d\n",endpos,hardclip_high));
-    } else {
-      endpos = querypos - stringlength;
-      debug1(printf("  endpos %d = querypos %d - stringlength %d\n",endpos,querypos,stringlength));
-    }
-
-    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
-
-    if (endpos <= startpos) {
-      if (cliplength > 0) {
-	debug1(printf("  Pushing initial %dH\n",cliplength));
-	sprintf(token,"%dH",cliplength);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-      matchlength = startpos - endpos;
-      if (matchlength > 0) {
-	debug1(printf("  Pushing %d%c\n",matchlength,type));
-	sprintf(token,"%d%c",matchlength,type);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-    }
-
-
-    if (lastp == true) {
-      cliplength = endpos;
-      if (cliplength > 0) {
-	debug1(printf("  Pushing final %dH\n",cliplength));
-	sprintf(token,"%dH",cliplength);
-	debug1(printf("Pushing token %s\n",token));
-	tokens = push_token(tokens,token);
-      }
-    }
-  }
-
-  return tokens;
-}
-#endif
-
-
-#if 0
-/* Modified from compute_cigar */
-static Intlist_T
-compute_cigar_types_only (Intlist_T types, char type, int stringlength, int querypos, int querylength,
-			  int hardclip_low, int hardclip_high, bool plusp, int lastp) {
-  int matchlength = 0;
-  int startpos, endpos;
-  int cliplength = 0;
-  
-  if (plusp == true) {
-    debug1(printf("\nEntering compute_cigar_types_only with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus\n",
-		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
-    if (hardclip_low > querypos) { /* > not >= */
-      startpos = hardclip_low;
-      cliplength = hardclip_low;
-    } else {
-      startpos = querypos;
-    }
-
-    if (querylength - hardclip_high < querypos + stringlength) {
-      endpos = querylength - hardclip_high;
-      debug1(printf("  endpos %d = querylength %d - hardclip_high %d\n",endpos,querylength,hardclip_high));
-    } else {
-      endpos = querypos + stringlength;
-      debug1(printf("  endpos %d = querypos %d + stringlength %d\n",endpos,querypos,stringlength));
-    }
-
-    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
-
-    if (endpos >= startpos) {
-      if (cliplength > 0) {
-	debug1(printf("  Pushing initial %dH\n",cliplength));
-	types = Intlist_push(types,'H');
-      }
-      matchlength = endpos - startpos;
-      if (matchlength > 0) {
-	debug1(printf("  Pushing %d%c\n",matchlength,type));
-	types = Intlist_push(types,type);
-      }
-    }
-
-
-    if (lastp == true) {
-      /* cliplength = querypos + stringlength - endpos; */
-      cliplength = querylength - endpos;
-      if (cliplength > 0) {
-	debug1(printf("  Pushing final %dH\n",cliplength));
-	types = Intlist_push(types,'H');
-      }
-    }
-
-  } else {
-    debug1(printf("\nEntering compute_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus\n",
-		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
-
-    if (querylength - hardclip_low < querypos) {
-      startpos = querylength - hardclip_low;
-      cliplength = hardclip_low;
-    } else {
-      startpos = querypos;
-    }
-
-    if (hardclip_high >= querypos - stringlength) {
-      endpos = hardclip_high;
-      debug1(printf("  endpos %d = hardclip_high %d\n",endpos,hardclip_high));
-    } else {
-      endpos = querypos - stringlength;
-      debug1(printf("  endpos %d = querypos %d - stringlength %d\n",endpos,querypos,stringlength));
-    }
-
-    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
-
-    if (endpos <= startpos) {
-      if (cliplength > 0) {
-	debug1(printf("  Pushing initial %dH\n",cliplength));
-	types = Intlist_push(types,'H');
-      }
-      matchlength = startpos - endpos;
-      if (matchlength > 0) {
-	debug1(printf("  Pushing %d%c\n",matchlength,type));
-	types = Intlist_push(types,type);
-      }
-    }
-
-
-    if (lastp == true) {
-      cliplength = endpos;
-      if (cliplength > 0) {
-	debug1(printf("  Pushing final %dH\n",cliplength));
-	types = Intlist_push(types,'H');
-      }
-    }
-  }
-
-  return types;
-}
-#endif
-
-
-static void
-print_cigar (Filestring_T fp, char type, int stringlength, int querypos, int querylength,
-	     int hardclip_low, int hardclip_high, bool plusp, bool lastp, int trimlength) {
-  int matchlength = 0;
-  int startpos, endpos;
-  int cliplength = 0;
-  
-  if (plusp == true) {
-    debug1(printf("\nEntering print_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus\n",
-		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
-    if (hardclip_low > querypos) { /* > not >= */
-      startpos = hardclip_low;
-      cliplength = hardclip_low;
-    } else {
-      startpos = querypos;
-    }
-
-    if (querylength - hardclip_high < querypos + stringlength) {
-      endpos = querylength - hardclip_high;
-      debug1(printf("  endpos %d = querylength %d - hardclip_high %d\n",endpos,querylength,hardclip_high));
-    } else {
-      endpos = querypos + stringlength;
-      debug1(printf("  endpos %d = querypos %d + stringlength %d\n",endpos,querypos,stringlength));
-    }
-
-    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
-
-    if (endpos >= startpos) {
-      if (cliplength > 0) {
-	debug1(printf("  Pushing initial %dH\n",cliplength));
-	FPRINTF(fp,"%dH",cliplength);
-      }
-      matchlength = endpos - startpos;
-      if (matchlength <= 0) {
-	/* Skip */
-      } else if (type != 'E') {
-	debug1(printf("  Pushing %d%c\n",matchlength,type));
-	FPRINTF(fp,"%d%c",matchlength,type);
-      } else if (matchlength == trimlength) {
-	debug1(printf("  Pushing %dS\n",matchlength));
-	FPRINTF(fp,"%dS",matchlength);
-      } else {
-	debug1(printf("  Pushing %dH because matchlength %d != trimlength %d\n",
-		      matchlength,matchlength,trimlength));
-	FPRINTF(fp,"%dH",matchlength);
-      }
-    }
-
-
-    if (lastp == true) {
-      /* cliplength = querypos + stringlength - endpos; */
-      cliplength = querylength - endpos;
-      if (cliplength > 0) {
-	debug1(printf("  Pushing final %dH\n",cliplength));
-	FPRINTF(fp,"%dH",cliplength);
-      }
-    }
-
-  } else {
-    debug1(printf("\nEntering print_cigar with type %c, stringlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus\n",
-		  type,stringlength,querypos,querylength,hardclip_low,hardclip_high));
-
-    if (querylength - hardclip_low < querypos) {
-      startpos = querylength - hardclip_low;
-      cliplength = hardclip_low;
-    } else {
-      startpos = querypos;
-    }
-
-    if (hardclip_high >= querypos - stringlength) {
-      endpos = hardclip_high;
-      debug1(printf("  endpos %d = hardclip_high %d\n",endpos,hardclip_high));
-    } else {
-      endpos = querypos - stringlength;
-      debug1(printf("  endpos %d = querypos %d - stringlength %d\n",endpos,querypos,stringlength));
-    }
-
-    debug1(printf("  new startpos %d, endpos %d, cliplength %d\n",startpos,endpos,cliplength));
-
-    if (endpos <= startpos) {
-      if (cliplength > 0) {
-	debug1(printf("  Pushing initial %dH\n",cliplength));
-	FPRINTF(fp,"%dH",cliplength);
-      }
-      matchlength = startpos - endpos;
-      if (matchlength <= 0) {
-	/* Skip */
-      } else if (type != 'E') {
-	debug1(printf("  Pushing %d%c\n",matchlength,type));
-	FPRINTF(fp,"%d%c",matchlength,type);
-      } else if (matchlength == trimlength) {
-	debug1(printf("  Pushing %dS\n",matchlength));
-	FPRINTF(fp,"%dS",matchlength);
-      } else {
-	debug1(printf("  Pushing %dH because matchlength %d != trimlength %d\n",
-		      matchlength,matchlength,trimlength));
-	FPRINTF(fp,"%dH",matchlength);
-      }
-    }
-
-
-    if (lastp == true) {
-      cliplength = endpos;
-      if (cliplength > 0) {
-	debug1(printf("  Pushing final %dH\n",cliplength));
-	FPRINTF(fp,"%dH",cliplength);
-      }
-    }
-  }
-
-  return;
-}
-
-
-static int
-print_md_string (bool *printp, int *nmismatches_refdiff, int *nmismatches_bothdiff,
-		 Filestring_T fp, int matchlength, char *genomicfwd_refdiff, char *genomicfwd_bothdiff,
-		 int stringlength, int querypos, int querylength,
-		 int hardclip_low, int hardclip_high, bool plusp, bool lastp) {
-  int starti, endi, i;
-  int local_nmismatches = 0;
-  bool hardclip_end_p = false;
-
-  if (plusp == true) {
-    debug2(printf("\nEntering md_string with matchlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus: %s ref, %s both\n",
-		  matchlength,querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
-    if (hardclip_low == 0) {
-      starti = 0;
-      hardclip_end_p = true;
-    } else if (hardclip_low > querypos) {
-      /* startpos = hardclip_low; */
-      starti = hardclip_low - querypos;
-      hardclip_end_p = true;
-      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
-		    starti,hardclip_low,querypos));
-    } else {
-      /* startpos = querypos; */
-      starti = 0;
-    }
-
-    if (querylength - hardclip_high < querypos + stringlength) {
-      /* endpos = querylength - hardclip_high; */
-      endi = (querylength - hardclip_high) - querypos;
-      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
-		    endi,querylength,hardclip_high,querypos));
-    } else {
-      /* endpos = querypos + stringlength; */
-      endi = stringlength;
-    }
-
-    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
-
-    if (genomicfwd_refdiff == NULL) {
-      if (endi > starti) {
-	matchlength += (endi - starti);
-      }
-
-    } else if (md_lowercase_variant_p == false) {
-      for (i = starti; i < endi; i++) {
-	if (isupper(genomicfwd_refdiff[i])) {
-	  matchlength++;
-
-	} else {
-	  /* A true mismatch against both variants */
-	  if (matchlength > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d",matchlength);
-	    *printp = true;
-	    hardclip_end_p = false;
-	  }
-	  FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
-	  *printp = true;
-	  local_nmismatches += 1;
-	  matchlength = 0;
-	}
-      }
-      *nmismatches_refdiff += local_nmismatches;
-
-    } else {
-      for (i = starti; i < endi; i++) {
-	if (isupper(genomicfwd_refdiff[i])) {
-	  matchlength++;
-
-	} else if (isupper(genomicfwd_bothdiff[i])) {
-	  /* A mismatch against the reference only => alternate variant */
-	  if (matchlength > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d",matchlength);
-	    *printp = true;
-	    hardclip_end_p = false;
-	  }
-	  FPRINTF(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
-	  *printp = true;
-	  local_nmismatches += 1;
-	  matchlength = 0;
-
-	} else {
-	  /* A true mismatch against both variants */
-	  if (matchlength > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d",matchlength);
-	    *printp = true;
-	    hardclip_end_p = false;
-	  }
-	  FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
-	  *printp = true;
-	  local_nmismatches += 1;
-	  matchlength = 0;
-	}
-      }
-      *nmismatches_refdiff += local_nmismatches;
-    }
-
-  } else {
-    debug2(printf("\nEntering md_string with matchlength %d, querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus: %s ref, %s both\n",
-		  matchlength,querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
-    querypos = querylength - querypos - stringlength;
-    debug2(printf("  Revising querypos to be %d\n",querypos));
-
-    if (hardclip_low == 0) {
-      starti = 0;
-      hardclip_end_p = true;
-    } else if (hardclip_low > querypos) {
-      /* startpos = hardclip_low; */
-      starti = hardclip_low - querypos;
-      hardclip_end_p = true;
-      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
-		    starti,hardclip_low,querypos));
-    } else {
-      /* startpos = querypos; */
-      starti = 0;
-    }
-
-    if (querylength - hardclip_high < querypos + stringlength) {
-      /* endpos = querylength - hardclip_high; */
-      endi = (querylength - hardclip_high) - querypos;
-      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
-		    endi,querylength,hardclip_high,querypos));
-    } else {
-      /* endpos = querypos + stringlength; */
-      endi = stringlength;
-    }
-
-    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
-
-    if (genomicfwd_refdiff == NULL) {
-      if (endi > starti) {
-	matchlength += (endi - starti);
-      }
-
-    } else if (md_lowercase_variant_p == false) {
-      for (i = starti; i < endi; i++) {
-	if (isupper(genomicfwd_refdiff[i])) {
-	  matchlength++;
-
-	} else {
-	  if (matchlength > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d",matchlength);
-	    *printp = true;
-	    hardclip_end_p = false;
-	  }
-	  FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
-	  *printp = true;
-	  local_nmismatches += 1;
-	  matchlength = 0;
-	}
-      }
-      *nmismatches_refdiff += local_nmismatches;
-
-    } else {
-      for (i = starti; i < endi; i++) {
-	if (isupper(genomicfwd_refdiff[i])) {
-	  matchlength++;
-
-	} else if (isupper(genomicfwd_bothdiff[i])) {
-	  /* A mismatch against the reference only => alternate variant */
-	  if (matchlength > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d",matchlength);
-	    *printp = true;
-	    hardclip_end_p = false;
-	  }
-	  FPRINTF(fp,"%c",genomicfwd_refdiff[i]); /* Leave as lower case */
-	  *printp = true;
-	  local_nmismatches += 1;
-	  matchlength = 0;
-
-	} else {
-	  /* A true mismatch against both variants */
-	  if (matchlength > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d",matchlength);
-	    *printp = true;
-	    hardclip_end_p = false;
-	  }
-	  FPRINTF(fp,"%c",toupper(genomicfwd_refdiff[i]));
-	  *printp = true;
-	  local_nmismatches += 1;
-	  matchlength = 0;
-	}
-      }
-      *nmismatches_refdiff += local_nmismatches;
-    }
-  }
-
-  /* Update nmismatches_bothdiff */
-  if (genomicfwd_bothdiff == NULL) {
-    /* No change to nmismatches_bothdiff */
-  } else if (genomicfwd_bothdiff == genomicfwd_refdiff) {
-    *nmismatches_bothdiff += local_nmismatches;
-  } else {
-    for (i = starti; i < endi; i++) {
-      if (!isupper(genomicfwd_bothdiff[i])) {
-	*nmismatches_bothdiff += 1;
-      }
-    }
-  }
-
-  debug2(printf("  Ending with matchlength %d\n",matchlength));
-
-  if (lastp == false) {
-    return matchlength;
-  } else if (matchlength > 0) {
-    FPRINTF(fp,"%d",matchlength);
-    *printp = true;
-    return 0;
-  } else {
-    return 0;
-  }
-}
-
-
-/* Based on print_md_string */
-static void
-print_extended_cigar (Filestring_T fp, char *genomicfwd_refdiff,
-		      int stringlength, int querypos, int querylength,
-		      int hardclip_low, int hardclip_high, bool plusp, bool lastp) {
-  int nmatches = 0, nmismatches = 0;
-  int starti, endi, i;
-  bool hardclip_end_p = false;
-  int cliplength, endpos;
-
-  if (plusp == true) {
-    debug2(printf("\nEntering print_extended_cigar with querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, plus: %s ref, %s both\n",
-		  querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
-    if (hardclip_low == 0) {
-      starti = 0;
-      hardclip_end_p = true;
-    } else if (hardclip_low > querypos) {
-      /* startpos = hardclip_low; */
-      starti = hardclip_low - querypos;
-      hardclip_end_p = true;
-      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
-		    starti,hardclip_low,querypos));
-    } else {
-      /* startpos = querypos; */
-      starti = 0;
-    }
-
-    if (querylength - hardclip_high < querypos + stringlength) {
-      endpos = querylength - hardclip_high;
-      endi = (querylength - hardclip_high) - querypos;
-      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
-		    endi,querylength,hardclip_high,querypos));
-    } else {
-      endpos = querypos + stringlength;
-      endi = stringlength;
-    }
-
-    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
-
-    if (genomicfwd_refdiff == NULL) {
-      if (endi > starti) {
-	nmatches += (endi - starti);
-      }
-
-    } else if (md_lowercase_variant_p == false) {
-      for (i = starti; i < endi; i++) {
-	if (isupper(genomicfwd_refdiff[i])) {
-	  if (nmismatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%dX",nmismatches);
-	    nmismatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmatches++;
-
-	} else {
-	  /* A true mismatch against both variants */
-	  if (nmatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d=",nmatches);
-	    nmatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmismatches++;
-	}
-      }
-
-    } else {
-      for (i = starti; i < endi; i++) {
-	if (isupper(genomicfwd_refdiff[i])) {
-	  if (nmismatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%dX",nmismatches);
-	    nmismatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmatches++;
-
-#if 0
-	} else if (isupper(genomicfwd_bothdiff[i])) {
-	  /* A mismatch against the reference only => alternate variant */
-	  if (nmatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d=",nmatches);
-	    nmatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmismatches++;
-#endif
-
-	} else {
-	  /* A true mismatch against both variants */
-	  if (nmatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d=",nmatches);
-	    nmatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmismatches++;
-	}
-      }
-    }
-
-    if (nmatches > 0) {
-      FPRINTF(fp,"%d=",nmatches);
-    } else if (nmismatches > 0) {
-      FPRINTF(fp,"%dX",nmismatches);
-    }
-
-    if (lastp == true) {
-      /* cliplength = querypos + stringlength - endpos; */
-      cliplength = querylength - endpos;
-      if (cliplength > 0) {
-	debug1(printf("  Pushing final %dH\n",cliplength));
-	FPRINTF(fp,"%dH",cliplength);
-      }
-    }
-
-  } else {
-    debug2(printf("\nEntering print_extended_cigar with querypos %d, querylength %d, hardclip_low %d, hardclip_high %d, minus: %s ref, %s both\n",
-		  querypos,querylength,hardclip_low,hardclip_high,genomicfwd_refdiff,genomicfwd_bothdiff));
-    querypos = querylength - querypos - stringlength;
-    debug2(printf("  Revising querypos to be %d\n",querypos));
-
-    if (hardclip_low == 0) {
-      starti = 0;
-      hardclip_end_p = true;
-    } else if (hardclip_low > querypos) {
-      /* startpos = hardclip_low; */
-      starti = hardclip_low - querypos;
-      hardclip_end_p = true;
-      debug2(printf("  Setting starti %d = hardclip_low %d - querypos %d\n",
-		    starti,hardclip_low,querypos));
-    } else {
-      /* startpos = querypos; */
-      starti = 0;
-    }
-
-    if (querylength - hardclip_high < querypos + stringlength) {
-      endpos = querylength - hardclip_high;
-      endi = (querylength - hardclip_high) - querypos;
-      debug2(printf("  Setting endi %d = (querylength %d - hardclip_high %d) - querypos %d\n",
-		    endi,querylength,hardclip_high,querypos));
-    } else {
-      endpos = querypos + stringlength;
-      endi = stringlength;
-    }
-
-    debug2(printf("  Counting matches from %d to %d\n",starti,endi));
-
-    if (genomicfwd_refdiff == NULL) {
-      if (endi > starti) {
-	nmatches += (endi - starti);
-      }
-
-    } else if (md_lowercase_variant_p == false) {
-      for (i = starti; i < endi; i++) {
-	if (isupper(genomicfwd_refdiff[i])) {
-	  if (nmismatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%dX",nmismatches);
-	    nmismatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmatches++;
-
-	} else {
-	  /* A true mismatch against both variants */
-	  if (nmatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d=",nmatches);
-	    nmatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmismatches++;
-	}
-      }
-
-    } else {
-      for (i = starti; i < endi; i++) {
-	if (isupper(genomicfwd_refdiff[i])) {
-	  if (nmismatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%dX",nmismatches);
-	    nmismatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmatches++;
-
-#if 0
-	} else if (isupper(genomicfwd_bothdiff[i])) {
-	  /* A mismatch against the reference only => alternate variant */
-	  if (nmatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d=",nmatches);
-	    nmatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmismatches++;
-#endif
-
-	} else {
-	  /* A true mismatch against both variants */
-	  if (nmatches > 0 || hardclip_end_p == true) {
-	    FPRINTF(fp,"%d=",nmatches);
-	    nmatches = 0;
-	    hardclip_end_p = false;
-	  }
-	  nmismatches++;
-	}
-      }
-    }
-
-    if (nmatches > 0) {
-      FPRINTF(fp,"%d=",nmatches);
-    } else if (nmismatches > 0) {
-      FPRINTF(fp,"%dX",nmismatches);
-    }
-
-    if (lastp == true) {
-      cliplength = endpos;
-      if (cliplength > 0) {
-	debug1(printf("  Pushing final %dH\n",cliplength));
-	FPRINTF(fp,"%dH",cliplength);
-      }
-    }
-  }
-
-  return;
-}
 
+static char complCode[128] = COMPLEMENT_LC;
 
 static void
-print_cigar_M (Filestring_T fp, Substring_T substring, int substring_length, int substring_start,
-	       int stringlength, int querypos, int querylength,
-	       int hardclip_low, int hardclip_high, bool plusp, bool lastp, int trimlength) {
-  char *genomicfwd_refdiff, *genomicdir_refdiff;
-  
-  if (cigar_extended_p == false) {
-    print_cigar(fp,/*type*/'M',stringlength,querypos,querylength,
-		hardclip_low,hardclip_high,plusp,lastp,trimlength);
-  } else if ((genomicdir_refdiff = Substring_genomic_refdiff(substring)) == NULL) {
-    print_extended_cigar(fp,/*genomicfwd_refdiff*/NULL,/*stringlength*/substring_length,
-			 /*querypos*/substring_start,querylength,
-			 hardclip_low,hardclip_high,plusp,lastp);
-  } else if (plusp == true) {
-    print_extended_cigar(fp,&(genomicdir_refdiff[substring_start]),/*stringlength*/substring_length,
-			 /*querypos*/substring_start,querylength,
-			 hardclip_low,hardclip_high,plusp,lastp);
-  } else {
-    genomicfwd_refdiff = (char *) MALLOCA((substring_length+1) * sizeof(char));
-    make_complement_buffered(genomicfwd_refdiff,&(genomicdir_refdiff[substring_start]),substring_length);
-    print_extended_cigar(fp,genomicfwd_refdiff,/*stringlength*/substring_length,
-			 /*querypos*/substring_start,querylength,
-			 hardclip_low,hardclip_high,plusp,lastp);
-    FREEA(genomicfwd_refdiff);
-  }
-}
-
-
-#if 0
-/* Copy also in pair.c for GMAP */
-static bool
-check_cigar_types (Intlist_T cigar_types) {
-  Intlist_T p;
-  int type;
-  bool M_present_p = false;
+make_complement_buffered (char *complement, char *sequence, unsigned int length) {
+  int i, j;
 
-  for (p = cigar_types; p != NULL; p = Intlist_next(p)) {
-    type = Intlist_head(p);
-    if (type == 'M') {
-      M_present_p = true;
-#if 0
-    } else if (type == 'H' && last_type == 'S') {
-      debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
-      return false;
-    } else if (type == 'S' && last_type == 'H') {
-      debug1(printf("check_cigar_types detects adjacent S and H, so returning false\n"));
-      return false;
-#endif
-    }
+  /* complement = (char *) CALLOC(length+1,sizeof(char)); */
+  for (i = length-1, j = 0; i >= 0; i--, j++) {
+    complement[j] = complCode[(int) sequence[i]];
   }
-
-  return M_present_p;
+  complement[length] = '\0';
+  return;
 }
-#endif
-
 
 
 static void
 print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3end_T mate,
 		  char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
 		  int absmq_score, int first_absmq, int second_absmq, int mapq_score,
-		  Shortread_T queryseq, int pairedlength,
-		  Chrpos_T chrpos, Chrpos_T mate_chrpos, int hardclip_low, int hardclip_high,
+		  Shortread_T queryseq, Shortread_T queryseq_mate, int pairedlength, Chrpos_T chrpos,
+		  Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+		  int hardclip_low, int hardclip_high, int mate_hardclip_low, int mate_hardclip_high,
 		  Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
 		  int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
-		  bool circularp) {
+		  bool circularp, bool supplementaryp) {
   unsigned int flag = 0U;
   Substring_T substring, substringL, substringH, substringM;
   Junction_T post_junction;
   int type;
-  int nindels = 0;
+  int nindels;
 
-  List_T substrings_LtoH, junctions_LtoH;
   List_T startp, endp, startq, prevp, finalp, nextp, p, q;
   int substring_start, substring_length, matchlength;
 
@@ -1460,404 +725,45 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
   plusp = Stage3end_plusp(stage3end);
 
 
-#if 1
-  if ((sensedir = Stage3end_sensedir(stage3end)) == SENSE_NULL && mate != NULL) {
-    sensedir = Stage3end_sensedir(mate);
-  }
-#else
-  /* If we use this, we need to change code in pair.c also */
-  sensedir = Stage3end_sensedir(stage3end);
-#endif
-  /* sensep = (sensedir == SENSE_ANTI) ? false : true; */
-
-  /* 1. QNAME */
-  if (acc2 == NULL) {
-    FPRINTF(fp,"%s",acc1);
-  } else {
-    FPRINTF(fp,"%s,%s",acc1,acc2);
-  }
-
-  /* 2. FLAG */
-  flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
-			  pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
-			  absmq_score,first_absmq,invertp,invert_mate_p);
-  FPRINTF(fp,"\t%u",flag);
-
-  /* 3. RNAME: chr */
-  /* 4. POS: chrpos */
-  print_chromosomal_pos(fp,Stage3end_chrnum(stage3end),chrpos,Stage3end_chrlength(stage3end),chromosome_iit);
-
-
-  /* 5. MAPQ: Mapping quality */
-  FPRINTF(fp,"\t%d\t",mapq_score);
-
-  /* 6. CIGAR */
-  substrings_LtoH = Stage3end_substrings_LtoH(stage3end);
-  junctions_LtoH = Stage3end_junctions_LtoH(stage3end);
-  substringL = (Substring_T) List_head(substrings_LtoH);
-  substringH = (Substring_T) List_last_value(substrings_LtoH);
-  if (Substring_ambiguous_p(substringL) == true) {
-    prevp = substrings_LtoH;
-    startp = List_next(substrings_LtoH);
-    startq = List_next(junctions_LtoH);
-  } else {
-    prevp = (List_T) NULL;
-    startp = substrings_LtoH;
-    startq = junctions_LtoH;
-  }
-  if (Substring_ambiguous_p(substringH) == true) {
-    endp = List_last_item(substrings_LtoH);
-  } else {
-    endp = (List_T) NULL;
-  }
-
-  debug(printf("End has %d substrings\n",List_length(substrings_LtoH)));
-
-  p = startp;
-  q = startq;
-  if (plusp == true) {
-    /* Plus */
-    while (p != endp && Substring_queryend((Substring_T) List_head(p)) < hardclip_low) {
-      /* Skip, because substring entirely in hard-clipped region */
-      debug(printf("Skipping %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		   Substring_queryend((Substring_T) List_head(p))));
-      prevp = p;
-      p = List_next(p);
-      q = List_next(q);
-    }
-
-    substring = (Substring_T) List_head(p);
-    if (List_next(p) == endp ||	Substring_queryend(substring) >= querylength - hardclip_high) {
-      /* Single substring */
-      debug(printf("Single substring %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		   Substring_queryend((Substring_T) List_head(p))));
-
-      if (hide_soft_clips_p == true) {
-	substring_start = Substring_querystart_orig(substring);
-	substring_length = Substring_match_length_orig(substring);
-	print_cigar_M(fp,substring,substring_length,substring_start,
-		      Substring_querystart(substring) + Substring_match_length(substring) +
-		      (querylength - Substring_queryend(substring)),/*querypos*/0,querylength,
-		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
-      } else {
-	substring_start = Substring_querystart(substring);
-	substring_length = Substring_match_length(substring);
-	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar_M(fp,substring,substring_length,substring_start,
-		      Substring_match_length(substring),
-		      /*querypos*/Substring_querystart(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
-		    /*querypos*/Substring_queryend(substring),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
-      }
-      finalp = p;
-      nextp = List_next(p);
-
-    } else {
-      /* First substring, plus */
-      debug(printf("First substring, plus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		   Substring_queryend((Substring_T) List_head(p))));
-
-      post_junction = (Junction_T) List_head(q);
-
-      if (hide_soft_clips_p == true) {
-	substring_start = Substring_querystart_orig(substring);
-	substring_length = Substring_match_length_orig(substring);
-	print_cigar_M(fp,substring,substring_length,substring_start,
-		      Substring_querystart(substring) +
-		      Substring_match_length(substring),
-		      /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		      /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-      } else {
-	substring_start = Substring_querystart(substring);
-	substring_length = Substring_match_length(substring);
-	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar_M(fp,substring,substring_length,substring_start,
-		      Substring_match_length(substring),
-		      /*querypos*/Substring_querystart(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
-      }
-      p = List_next(p);
-      
-      while (p != endp && Substring_queryend((Substring_T) List_head(p)) < querylength - hardclip_high) {
-	if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
-	  FPRINTF(fp,"%dD",Junction_nindels(post_junction));
-	  nindels += Junction_nindels(post_junction);
-	} else if (type == INS_JUNCTION) {
-	  FPRINTF(fp,"%dI",Junction_nindels(post_junction));
-	  nindels += Junction_nindels(post_junction);
-	} else if (type == SPLICE_JUNCTION) {
-	  FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
-	}
-	q = List_next(q);
-	if (q == NULL) {
-	} else {
-	  post_junction = (Junction_T) List_head(q);
-	}
-
-	substring = (Substring_T) List_head(p);
-	if (List_next(p) == endp) {
-	  /* Last substring, plus, not hard-clipped */
-	  debug(printf("Last substring, plus, not hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		       Substring_queryend((Substring_T) List_head(p))));
-	  
-	  if (hide_soft_clips_p == true) {
-	    substring_start = Substring_querystart_orig(substring);
-	    substring_length = Substring_match_length_orig(substring);
-	    print_cigar_M(fp,substring,substring_length,substring_start,
-			  Substring_match_length(substring) +
-			  (querylength - Substring_queryend(substring)),
-			  /*querypos*/Substring_querystart(substring),querylength,
-			  hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
-	  } else {
-	    substring_start = Substring_querystart(substring);
-	    substring_length = Substring_match_length(substring);
-	    print_cigar_M(fp,substring,substring_length,substring_start,Substring_match_length(substring),
-			  /*querypos*/Substring_querystart(substring),querylength,
-			  hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	    print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
-			/*querypos*/Substring_queryend(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
-	  }
-	  finalp = p;
-	  nextp = List_next(p);
-
-	} else {
-	  /* Middle substring, plus */
-	  debug(printf("Middle substring, plus %d..%d\n",Substring_querystart((Substring_T) List_head(p)), 
-		       Substring_queryend((Substring_T) List_head(p))));
-	  substring_start = Substring_querystart(substring);
-	  substring_length = Substring_match_length(substring);
-
-	  print_cigar_M(fp,substring,substring_length,substring_start,
-			Substring_match_length(substring),
-			/*querypos*/Substring_querystart(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	}
-	p = List_next(p);
-      }
-      
-      if (p != endp) {
-	if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
-	  FPRINTF(fp,"%dD",Junction_nindels(post_junction));
-	  nindels += Junction_nindels(post_junction);
-	} else if (type == INS_JUNCTION) {
-	  FPRINTF(fp,"%dI",Junction_nindels(post_junction));
-	  nindels += Junction_nindels(post_junction);
-	} else if (type == SPLICE_JUNCTION) {
-	  FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
-	}
-
-	/* Last substring, plus, hard-clipped */
-	substring = (Substring_T) List_head(p);
-	debug(printf("Last substring, plus, hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		     Substring_queryend((Substring_T) List_head(p))));
-	if (hide_soft_clips_p == true) {
-	  substring_start = Substring_querystart_orig(substring);
-	  substring_length = Substring_match_length_orig(substring);
-	  print_cigar_M(fp,substring,substring_length,substring_start,
-			Substring_match_length(substring) +
-			(querylength - Substring_queryend(substring)),
-			/*querypos*/Substring_querystart(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
-	} else {
-	  substring_start = Substring_querystart(substring);
-	  substring_length = Substring_match_length(substring);
-	  print_cigar_M(fp,substring,substring_length,substring_start,
-			Substring_match_length(substring),
-			/*querypos*/Substring_querystart(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	  print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
-		      /*querypos*/Substring_queryend(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,/*trimlength*/0);
-	}
-	finalp = p;
-	nextp = List_next(p);
-
-      }
-    }
-
-  } else {
-    /* Minus */
-    while (p != endp && Substring_querystart((Substring_T) List_head(p)) >= querylength - hardclip_low) {
-      /* Skip, because substring entirely in hard-clipped region */
-      debug(printf("Skipping %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		   Substring_queryend((Substring_T) List_head(p))));
-      prevp = p;
-      p = List_next(p);
-      q = List_next(q);
-    }
-
-    substring = (Substring_T) List_head(p);
-    if (List_next(p) == endp || Substring_querystart(substring) < hardclip_high) {
-      /* Single substring */
-      debug(printf("Single substring %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		   Substring_queryend((Substring_T) List_head(p))));
-
-      if (hide_soft_clips_p == true) {
-	substring_start = Substring_querystart_orig(substring);
-	substring_length = Substring_match_length_orig(substring);
-	print_cigar_M(fp,substring,substring_length,substring_start,
-		      (querylength - Substring_queryend(substring)) + 
-		      Substring_match_length(substring) + Substring_querystart(substring),
-		      /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
-		      /*plusp*/false,/*lastp*/true,/*trimlength*/0);
-      } else {
-	substring_start = Substring_querystart(substring);
-	substring_length = Substring_match_length(substring);
-	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
-		    /*querypos*/querylength,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	print_cigar_M(fp,substring,substring_length,substring_start,
-		      Substring_match_length(substring),
-		      /*querypos*/Substring_queryend(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'S',Substring_querystart(substring),
-		    /*querypos*/Substring_querystart(substring),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
-      }
-      finalp = p;
-      nextp = List_next(p);
-
-    } else {
-      /* First substring, minus */
-      debug(printf("First substring, minus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		   Substring_queryend((Substring_T) List_head(p))));
-    
-      post_junction = (Junction_T) List_head(q);
-
-      if (hide_soft_clips_p == true) {
-	substring_start = Substring_querystart_orig(substring);
-	substring_length = Substring_match_length_orig(substring);
-	print_cigar_M(fp,substring,substring_length,substring_start,
-		      (querylength - Substring_queryend(substring)) +
-		      Substring_match_length(substring),
-		      /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
-		      /*plusp*/false,/*lastp*/false,/*trimlength*/0);
-      } else {
-	substring_start = Substring_querystart(substring);
-	substring_length = Substring_match_length(substring);
-	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(substring),
-		    /*querypos*/querylength,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	print_cigar_M(fp,substring,substring_length,substring_start,
-		      Substring_match_length(substring),
-		      /*querypos*/Substring_queryend(substring),querylength,
-		      hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-      }
-      p = List_next(p);
-
-      while (p != endp && Substring_querystart((Substring_T) List_head(p)) >= hardclip_high) {
-	if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
-	  FPRINTF(fp,"%dD",Junction_nindels(post_junction));
-	  nindels += Junction_nindels(post_junction);
-	} else if (type == INS_JUNCTION) {
-	  FPRINTF(fp,"%dI",Junction_nindels(post_junction));
-	  nindels += Junction_nindels(post_junction);
-	} else if (type == SPLICE_JUNCTION) {
-	  FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
-	}
-	q = List_next(q);
-	if (q == NULL) {
-	} else {
-	  post_junction = (Junction_T) List_head(q);
-	}
-
-	substring = (Substring_T) List_head(p);
-	if (List_next(p) == endp) {
-	  /* Last substring, minus, not hard-clipped */
-	  debug(printf("Last substring, minus, not hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		       Substring_queryend((Substring_T) List_head(p))));
-
-	  if (hide_soft_clips_p == true) {
-	    substring_start = Substring_querystart_orig(substring);
-	    substring_length = Substring_match_length_orig(substring);
-	    print_cigar_M(fp,substring,substring_length,substring_start,
-			  Substring_match_length(substring) +
-			  Substring_querystart(substring),
-			  /*querypos*/Substring_queryend(substring),querylength,
-			  hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
-	  } else {
-	    substring_start = Substring_querystart(substring);
-	    substring_length = Substring_match_length(substring);
-	    print_cigar_M(fp,substring,substring_length,substring_start,
-			  Substring_match_length(substring),
-			  /*querypos*/Substring_queryend(substring),querylength,
-			  hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	    print_cigar(fp,/*type*/'S',Substring_querystart(substring),
-			/*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
-			/*plusp*/false,/*lastp*/true,/*trimlength*/0);
-	  }
-	  finalp = p;
-	  nextp = List_next(p);
+#if 1
+  if ((sensedir = Stage3end_sensedir(stage3end)) == SENSE_NULL && mate != NULL) {
+    sensedir = Stage3end_sensedir(mate);
+  }
+#else
+  /* If we use this, we need to change code in pair.c also */
+  sensedir = Stage3end_sensedir(stage3end);
+#endif
+  /* sensep = (sensedir == SENSE_ANTI) ? false : true; */
 
-	} else {
-	  /* Middle substring, minus */
-	  debug(printf("Middle substring, minus %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		       Substring_queryend((Substring_T) List_head(p))));
-	  substring_start = Substring_querystart(substring);
-	  substring_length = Substring_match_length(substring);
+  /* 1. QNAME */
+  if (acc2 == NULL) {
+    FPRINTF(fp,"%s",acc1);
+  } else {
+    FPRINTF(fp,"%s,%s",acc1,acc2);
+  }
 
-	  print_cigar_M(fp,substring,substring_length,substring_start,
-			Substring_match_length(substring),
-			/*querypos*/Substring_queryend(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	}
-	p = List_next(p);
-      }
+  /* 2. FLAG */
+  flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
+			  pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
+			  absmq_score,first_absmq,invertp,invert_mate_p,supplementaryp);
+  FPRINTF(fp,"\t%u",flag);
 
-      if (p != endp) {
-	if ((type = Junction_type(post_junction)) == DEL_JUNCTION) {
-	  FPRINTF(fp,"%dD",Junction_nindels(post_junction));
-	  nindels += Junction_nindels(post_junction);
-	} else if (type == INS_JUNCTION) {
-	  FPRINTF(fp,"%dI",Junction_nindels(post_junction));
-	  nindels += Junction_nindels(post_junction);
-	} else if (type == SPLICE_JUNCTION) {
-	  FPRINTF(fp,"%uN",Junction_splice_distance(post_junction));
-	}
+  /* 3. RNAME: chr */
+  /* 4. POS: chrpos */
+  print_chromosomal_pos(fp,Stage3end_chrnum(stage3end),chrpos,Stage3end_chrlength(stage3end),chromosome_iit);
 
-	/* Last substring, minus, hard-clipped */
-	substring = (Substring_T) List_head(p);
-	debug(printf("Last substring, minus, hard-clipped %d..%d\n",Substring_querystart((Substring_T) List_head(p)),
-		     Substring_queryend((Substring_T) List_head(p))));
 
-	if (hide_soft_clips_p == true) {
-	  substring_start = Substring_querystart_orig(substring);
-	  substring_length = Substring_match_length_orig(substring);
-	  print_cigar_M(fp,substring,substring_length,substring_start,
-			Substring_match_length(substring) +
-			Substring_querystart(substring),
-			/*querypos*/Substring_queryend(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,/*trimlength*/0);
-	} else {
-	  substring_start = Substring_querystart(substring);
-	  substring_length = Substring_match_length(substring);
-	  print_cigar_M(fp,substring,substring_length,substring_start,
-			Substring_match_length(substring),
-			/*querypos*/Substring_queryend(substring),querylength,
-			hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	  print_cigar(fp,/*type*/'S',Substring_querystart(substring),
-		      /*querypos*/Substring_querystart(substring),querylength,hardclip_low,hardclip_high,
-		      /*plusp*/false,/*lastp*/true,/*trimlength*/0);
-	}
-	finalp = p;
-	nextp = List_next(p);
+  /* 5. MAPQ: Mapping quality */
+  FPRINTF(fp,"\t%d\t",mapq_score);
 
-      }
-    }
-  }
+  /* 6. CIGAR */
+  Cigar_print_substrings(&nindels,&startp,&startq,&prevp,&nextp,&finalp,&endp,
+			 fp,stage3end,querylength,hardclip_low,hardclip_high);
 
 
   /* 7. MRNM: Mate chr */
   /* 8. MPOS: Mate chrpos */
-  print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
-			     mate_chrpos,Stage3end_chrlength(mate),
+  print_mate_chromosomal_pos(fp,mate_chrnum,mate_chrpos_low,Stage3end_chrlength(mate),
 			     Stage3end_chrnum(stage3end),chrpos,chromosome_iit);
 
 
@@ -1868,11 +774,11 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
     } else {
       FPRINTF(fp,"\t%d",pairedlength);
     }
-  } else if (mate_chrpos == 0) {
+  } else if (mate_chrpos_low == 0) {
     FPRINTF(fp,"\t%d",pairedlength);
-  } else if (chrpos < mate_chrpos) {
+  } else if (chrpos < mate_chrpos_low) {
     FPRINTF(fp,"\t%d",pairedlength);
-  } else if (chrpos > mate_chrpos) {
+  } else if (chrpos > mate_chrpos_low) {
     FPRINTF(fp,"\t%d",-pairedlength);
   } else if (first_read_p == true) {
     FPRINTF(fp,"\t%d",pairedlength);
@@ -1893,6 +799,15 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 				   quality_shift,/*show_chopped_p*/false);
   } 
 
+  /* 12. TAGS: XM */
+  if (queryseq_mate == NULL) {
+    /* Unpaired alignment.  Don't print XM. */
+  } else {
+    FPRINTF(fp,"\tXM:Z:");
+    Cigar_print_mate(fp,mate,Shortread_fulllength(queryseq_mate),mate_hardclip_low,mate_hardclip_high);
+  }
+
+
   /* 12. TAGS: RG */
   if (sam_read_group_id != NULL) {
     FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
@@ -2443,6 +1358,7 @@ print_substrings (Filestring_T fp, char *abbrev, Stage3end_T stage3end, Stage3en
 }
 
 
+
 static void
 halfdonor_dinucleotide (char *donor1, char *donor2, Substring_T donor, int sensedir) {
   char *genomic;
@@ -2487,20 +1403,22 @@ static void
 print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
 		 char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
 		 int absmq_score, int first_absmq, int second_absmq, int mapq_score,
-		 Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
-		 Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos, Chrpos_T mate_chrpos,
-		 int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p,
+		 Univ_IIT_T chromosome_iit, Shortread_T queryseq, Shortread_T queryseq_mate, int pairedlength,
+		 Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos,
+		 Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+		 int hardclip_low, int hardclip_high, int mate_hardclip_low, int mate_hardclip_high,
+		 Resulttype_T resulttype, bool first_read_p,
 		 bool artificial_mate_p, int npaths_mate,
 		 int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
 		 bool use_hardclip_p, bool print_xt_p, int donor_sensedir, char donor_strand, char acceptor_strand,
 		 char *donor_chr, char *acceptor_chr, char donor1, char donor2, char acceptor2, char acceptor1,
-		 double donor_prob, double acceptor_prob, bool circularp) {
+		 double donor_prob, double acceptor_prob, bool circularp, bool supplementaryp) {
   unsigned int flag = 0U;
   int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
   bool sensep;
   char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
   int substring_start, substring_length;
-  int transloc_hardclip_low, transloc_hardclip_high;
+  /* int transloc_hardclip_low, transloc_hardclip_high; */
   bool plusp, printp;
   bool start_ambig, end_ambig;
   int n, i;
@@ -2508,6 +1426,12 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
 #ifdef PRINT_AMBIG_COORDS
   Univcoord_T chroffset;
 #endif
+  Chrpos_T donor_coord, acceptor_coord, chrpos;
+  char strand, *divstring, *donor_genome_coord = NULL, *acceptor_genome_coord = NULL,
+    *donor_gene = NULL, *acceptor_gene = NULL, *label, *p;
+  int match, *matches, nmatches;
+  char *annotation, *restofheader;
+  bool alloc_header_p, allocp;
 
 
   querylength = Shortread_fulllength(queryseq);
@@ -2523,7 +1447,7 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
   /* 2. FLAG */
   flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
 			  pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
-			  absmq_score,first_absmq,invertp,invert_mate_p);
+			  absmq_score,first_absmq,invertp,invert_mate_p,supplementaryp);
   FPRINTF(fp,"\t%u",flag);
 
   /* 3. RNAME: chr */
@@ -2535,163 +1459,12 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
   FPRINTF(fp,"\t%d\t",mapq_score);
 
   /* 6. CIGAR */
-  if (Stage3end_sensedir(this) == SENSE_ANTI) {
-    sensep = false;
-  } else {
-    sensep = true;
-  }
-
-  if (use_hardclip_p == true) {
-    if (sensep == true) {
-      if (plusp == true) {
-	transloc_hardclip_low = 0;
-	transloc_hardclip_high = querylength - Substring_queryend(donor);
-      } else {
-	transloc_hardclip_high = 0;
-	transloc_hardclip_low = querylength - Substring_queryend(donor);
-      }
-
-    } else {
-      if (plusp == true) {
-	transloc_hardclip_high = 0;
-	transloc_hardclip_low = Substring_querystart(donor);
-      } else {
-	transloc_hardclip_low = 0;
-	transloc_hardclip_high = Substring_querystart(donor);
-      }
-    }
-
-    if (transloc_hardclip_low > hardclip_low) {
-      hardclip_low = transloc_hardclip_low;
-    }
-    if (transloc_hardclip_high > hardclip_high) {
-      hardclip_high = transloc_hardclip_high;
-    }
-  }
-
-
-  if (sensep == true) {
-    /* Doesn't hold for DNA-Seq chimeras */
-    /* assert(Substring_siteD_pos(donor) == Substring_queryend(donor)); */
-    if (plusp == true) {
-      /* sensep true, plusp true */
-      /* FPRINTF(fp,"donor sensep true, plusp true\n"); */
-      if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'M',
-		    Substring_querystart(donor) + 
-		    Substring_match_length(donor),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
-		    /*querypos*/Substring_queryend(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
-		    /*trimlength*/Substring_trim_right(donor));
-
-      } else {
-	print_cigar(fp,/*type*/'S',Substring_querystart(donor),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'M',Substring_match_length(donor),
-		    /*querypos*/Substring_querystart(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,
-		    /*trimlength*/0);
-	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
-		    /*querypos*/Substring_queryend(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
-		    /*trimlength*/Substring_trim_right(donor));
-      }
-
-    } else {
-      /* sensep true, plusp false */
-      /* FPRINTF(fp,"donor sensep false, plusp false\n"); */
-      if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
-		    /*querypos*/querylength,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/Substring_trim_right(donor));
-	print_cigar(fp,/*type*/'M',
-		    Substring_match_length(donor) +
-		    Substring_querystart(donor),
-		    /*querypos*/Substring_queryend(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,
-		    /*trimlength*/0);
-      } else {
-	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(donor),
-		    /*querypos*/querylength,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/Substring_trim_right(donor));
-	print_cigar(fp,/*type*/'M',Substring_match_length(donor),
-		    /*querypos*/Substring_queryend(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/0);
-	print_cigar(fp,/*type*/'S',Substring_querystart(donor),
-		    /*querypos*/Substring_querystart(donor),querylength,hardclip_low,hardclip_high,
-		    /*plusp*/false,/*lastp*/true,/*trimlength*/0);
-      }
-    }
-
-  } else {
-    /* Doesn't hold for DNA-Seq chimeras */
-    /* assert(Substring_siteD_pos(donor) == Substring_querystart(donor)); */
-    if (plusp == true) {
-      /* sensep false, plusp true */
-      /* FPRINTF(fp,"donor sensep false, plusp true\n"); */
-      if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'E',Substring_querystart(donor),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(donor));
-	print_cigar(fp,/*type*/'M',Substring_match_length(donor) + (querylength - Substring_queryend(donor)),
-		    /*querypos*/Substring_querystart(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
-		    /*trimlength*/0);
-      } else {
-	print_cigar(fp,/*type*/'E',Substring_querystart(donor),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(donor));
-	print_cigar(fp,/*type*/'M',Substring_match_length(donor),
-		    /*querypos*/Substring_querystart(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,
-		    /*trimlength*/0);
-	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(donor),
-		    /*querypos*/Substring_queryend(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
-		    /*trimlength*/0);
-      }
-
-    } else {
-      /* sensep false, plusp false */
-      /* FPRINTF(fp,"donor sensep true, plusp false\n"); */
-      if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'M',(querylength - Substring_queryend(donor)) + Substring_match_length(donor),
-		    /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'E',Substring_querystart(donor),
-		    /*querypos*/Substring_querystart(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,
-		    /*trimlength*/Substring_trim_left(donor));
-
-      } else {
-	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(donor),
-		    /*querypos*/querylength,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/0);
-	print_cigar(fp,/*type*/'M',Substring_match_length(donor),
-		    /*querypos*/Substring_queryend(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/0);
-	print_cigar(fp,/*type*/'E',Substring_querystart(donor),
-		    /*querypos*/Substring_querystart(donor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,
-		    /*trimlength*/Substring_trim_left(donor));
-      }
-    }
-  }
+  Cigar_print_halfdonor(fp,donor,this,querylength,&hardclip_low,&hardclip_high,use_hardclip_p);
 
   /* 7. MRNM: Mate chr */
   /* 8. MPOS: Mate chrpos */
   /* For anchor_chrnum, previously used Stage3end_chrnum(this), but this is 0 */
-  print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
-			     mate_chrpos,Stage3end_chrlength(mate),
+  print_mate_chromosomal_pos(fp,mate_chrnum,mate_chrpos_low,Stage3end_chrlength(mate),
 			     /*anchor_chrnum*/Substring_chrnum(donor),donor_chrpos,chromosome_iit);
 
 
@@ -2702,12 +1475,12 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
     } else {
       FPRINTF(fp,"\t%d",pairedlength);
     }
-  } else if (mate_chrpos == 0) {
+  } else if (mate_chrpos_low == 0) {
     FPRINTF(fp,"\t%d",pairedlength);
 #if 0
-  } else if (concordant_chrpos < mate_chrpos) {
+  } else if (concordant_chrpos < mate_chrpos_low) {
     FPRINTF(fp,"\t%d",pairedlength);
-  } else if (concordant_chrpos > mate_chrpos) {
+  } else if (concordant_chrpos > mate_chrpos_low) {
     FPRINTF(fp,"\t%d",-pairedlength);
 #endif
   } else if (first_read_p == true) {
@@ -2730,6 +1503,14 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
   }
 
 
+  /* 12. TAGS: XM */
+  if (queryseq_mate == NULL) {
+    /* Unpaired alignment.  Don't print XM. */
+  } else {
+    FPRINTF(fp,"\tXM:Z:");
+    Cigar_print_mate(fp,mate,Shortread_fulllength(queryseq_mate),mate_hardclip_low,mate_hardclip_high);
+  }
+
   /* 12. TAGS: RG */
   if (sam_read_group_id != NULL) {
     FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
@@ -2764,6 +1545,12 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
   FPRINTF(fp,"\tMD:Z:");
   printp = false;
 
+  if (Stage3end_sensedir(this) == SENSE_ANTI) {
+    sensep = false;
+  } else {
+    sensep = true;
+  }
+
   if (hide_soft_clips_p == true) {
     substring_start = Substring_querystart_orig(donor);
     substring_length = Substring_match_length_orig(donor);
@@ -2982,9 +1769,111 @@ print_halfdonor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T a
 
   /* 12. TAGS: XT */
   if (print_xt_p == true) {
+    donor_coord = Substring_chr_splicecoord_D(donor,donor_strand);
+    acceptor_coord = Substring_chr_splicecoord_A(acceptor,acceptor_strand);
     FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
-    FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor),
-	    acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor));
+    FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_coord,
+	    acceptor_strand,acceptor_chr,acceptor_coord);
+
+    if (donor_prob > 1.0 && transcript_splicing_p == true) {
+      /* donor_coord is really a known transcript splicesite */
+      /* Assumes that IIT label starts with <gene>. and that coordinates follow " 0 " in header */
+#if 0      
+      /* This value for match does not work.  knowni is for splicetrie, not splicing_iit */
+      match = Substring_splicesitesD_knowni(donor);
+      IIT_annotation(&restofheader,splicing_iit,match,&alloc_header_p);
+#else
+      matches = IIT_get_typed(&nmatches,splicing_iit,donor_chr,donor_coord,donor_coord,donor_typeint,/*sortp*/false);
+      if (nmatches > 0) {
+	match = matches[0];
+	IIT_annotation(&restofheader,splicing_iit,match,&alloc_header_p);
+	p = annotation = &(restofheader[3]); /* Skip " 0 " */
+	while (*p != '\0' && *p != '\n' && !isspace(*p)) p++;
+	donor_genome_coord = (char *) MALLOC((p - annotation + 1)*sizeof(char));
+	strncpy(donor_genome_coord,annotation,p - annotation);
+	donor_genome_coord[p - annotation] = '\0';
+	if (alloc_header_p) {
+	  FREE(restofheader);
+	}
+	
+	p = label = IIT_label(splicing_iit,match,&allocp);
+	while (*p != '\0' && *p != '.') p++;
+	donor_gene = (char *) MALLOC((p - label + 1)*sizeof(char));
+	strncpy(donor_gene,label,p - label);
+	donor_gene[p - label] = '\0';
+	if (allocp) {
+	  FREE(label);
+	}
+	FREE(matches);
+      }
+#endif
+      
+    } else if (genestruct_iit != NULL) {
+      /* Also a transcript splice, but not at a known site */
+      if ((chrpos = IIT_genestruct_chrpos(&strand,&divstring,&donor_gene,
+					  genestruct_iit,donor_chr,donor_coord)) > 0) {
+	/* chrpos is an unsigned int, so must be 10 chars at most */
+	donor_genome_coord = (char *) MALLOC((1+strlen(divstring)+11)*sizeof(char));
+	sprintf(donor_genome_coord,"%c%s@%u",strand,divstring,chrpos);
+      }
+    }
+  
+
+    if (acceptor_prob > 1.0 && transcript_splicing_p == true) {
+      /* acceptor_coord is really a known transcript splicesite */
+      /* Assumes that IIT label starts with <gene>. and that coordinates follow " 0 " in header */
+#if 0
+      /* This value for match does not work.  knowni is for splicetrie, not splicing_iit */
+      match = Substring_splicesitesA_knowni(acceptor);
+      IIT_annotation(&restofheader,splicing_iit,match,&alloc_header_p);
+#else
+      matches = IIT_get_typed(&nmatches,splicing_iit,acceptor_chr,acceptor_coord,acceptor_coord,acceptor_typeint,/*sortp*/false);
+      if (nmatches > 0) {
+	match = matches[0];
+	/* annotation = */ IIT_annotation(&restofheader,splicing_iit,match,&alloc_header_p);
+	p = annotation = &(restofheader[3]); /* Skip " 0 " */
+	while (*p != '\0' && *p != '\n' && !isspace(*p)) p++;
+	acceptor_genome_coord = (char *) MALLOC((p - annotation + 1)*sizeof(char));
+	strncpy(acceptor_genome_coord,annotation,p - annotation);
+	acceptor_genome_coord[p - annotation] = '\0';
+	if (alloc_header_p) {
+	  FREE(restofheader);
+	}
+	
+	p = label = IIT_label(splicing_iit,match,&allocp);
+	while (*p != '\0' && *p != '.') p++;
+	acceptor_gene = (char *) MALLOC((p - label + 1)*sizeof(char));
+	strncpy(acceptor_gene,label,p - label);
+	acceptor_gene[p - label] = '\0';
+	if (allocp) {
+	  FREE(label);
+	}
+	FREE(matches);
+      }
+#endif
+
+    } else if (genestruct_iit != NULL) {
+      /* Also a transcript splice, but not at a known site */
+      if ((chrpos = IIT_genestruct_chrpos(&strand,&divstring,&acceptor_gene,
+					  genestruct_iit,acceptor_chr,acceptor_coord)) > 0) {
+	/* chrpos is an unsigned int, so must be 10 chars at most */
+	acceptor_genome_coord = (char *) MALLOC((1+strlen(divstring)+11)*sizeof(char));
+	sprintf(acceptor_genome_coord,"%c%s@%u",strand,divstring,chrpos);
+      }
+    }
+
+
+    if (donor_genome_coord != NULL && acceptor_genome_coord != NULL && donor_gene != NULL && acceptor_gene != NULL) {
+      FPRINTF(fp,",%s..%s",donor_genome_coord,acceptor_genome_coord);
+      FPRINTF(fp,",%s..%s",donor_gene,acceptor_gene);
+    }
+
+    if (transcript_splicing_p == true || genestruct_iit != NULL) {
+      FREE(donor_genome_coord);
+      FREE(acceptor_genome_coord);
+      FREE(donor_gene);
+      FREE(acceptor_gene);
+    }
   }
 
   /* 12. TAGS: XC */
@@ -3007,19 +1896,21 @@ static void
 print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_T acceptor, Stage3end_T this, Stage3end_T mate,
 		    char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
 		    int absmq_score, int first_absmq, int second_absmq, int mapq_score,
-		    Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
-		    Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos, Chrpos_T mate_chrpos,
-		    int hardclip_low, int hardclip_high, Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
+		    Univ_IIT_T chromosome_iit, Shortread_T queryseq, Shortread_T queryseq_mate, int pairedlength,
+		    Chrpos_T donor_chrpos, Chrpos_T acceptor_chrpos,
+		    Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+		    int hardclip_low, int hardclip_high, int mate_hardclip_low, int mate_hardclip_high,
+		    Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
 		    int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
 		    bool use_hardclip_p, bool print_xt_p, int acceptor_sensedir, char donor_strand, char acceptor_strand,
 		    char *donor_chr, char *acceptor_chr, char donor1, char donor2, char acceptor2, char acceptor1,
-		    double donor_prob, double acceptor_prob, bool circularp) {
+		    double donor_prob, double acceptor_prob, bool circularp, bool supplementaryp) {
   unsigned int flag = 0U;
   int nmismatches_refdiff = 0, nmismatches_bothdiff = 0, querylength;
   bool sensep;
   char *genomicfwd_refdiff, *genomicfwd_bothdiff, *genomicdir_refdiff, *genomicdir_bothdiff;
   int substring_start, substring_length;
-  int transloc_hardclip_low, transloc_hardclip_high;
+  /* int transloc_hardclip_low, transloc_hardclip_high; */
   bool plusp, printp;
   bool start_ambig, end_ambig;
   int n, i;
@@ -3027,6 +1918,12 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
 #ifdef PRINT_AMBIG_COORDS
   Univcoord_T chroffset;
 #endif
+  Chrpos_T donor_coord, acceptor_coord, chrpos;
+  char strand, *divstring, *donor_genome_coord = NULL, *acceptor_genome_coord = NULL,
+    *donor_gene = NULL, *acceptor_gene = NULL, *label, *p;
+  int match, *matches, nmatches;
+  char *annotation, *restofheader;
+  bool alloc_header_p, allocp;
 
 
   querylength = Shortread_fulllength(queryseq);
@@ -3042,7 +1939,7 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
   /* 2. FLAG */
   flag = SAM_compute_flag(plusp,mate,resulttype,first_read_p,
 			  pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
-			  absmq_score,first_absmq,invertp,invert_mate_p);
+			  absmq_score,first_absmq,invertp,invert_mate_p,supplementaryp);
   FPRINTF(fp,"\t%u",flag);
 
   /* 3. RNAME: chr */
@@ -3054,154 +1951,12 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
   FPRINTF(fp,"\t%d\t",mapq_score);
 
   /* 6. CIGAR */
-  if (Stage3end_sensedir(this) == SENSE_ANTI) {
-    sensep = false;
-  } else {
-    sensep = true;
-  }
-
-  if (use_hardclip_p == true) {
-    if (sensep == true) {
-      if (plusp == true) {
-	transloc_hardclip_high = 0;
-	transloc_hardclip_low = Substring_querystart(acceptor);
-      } else {
-	transloc_hardclip_low = 0;
-	transloc_hardclip_high = Substring_querystart(acceptor);
-      }
-
-    } else {
-      if (plusp == true) {
-	transloc_hardclip_low = 0;
-	transloc_hardclip_high = querylength - Substring_queryend(acceptor);
-      } else {
-	transloc_hardclip_high = 0;
-	transloc_hardclip_low = querylength - Substring_queryend(acceptor);
-      }
-    }
-
-    if (transloc_hardclip_low > hardclip_low) {
-      hardclip_low = transloc_hardclip_low;
-    }
-    if (transloc_hardclip_high > hardclip_high) {
-      hardclip_high = transloc_hardclip_high;
-    }
-  }
-
-
-  if (sensep == true) {
-    /* Doesn't hold for DNA-Seq chimeras */
-    /* assert(Substring_siteA_pos(acceptor) == Substring_querystart(acceptor)); */
-    if (plusp == true) {
-      /* sensep true, plusp true */
-      /* FPRINTF(fp,"acceptor sensep true, plusp true\n"); */
-      if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'M',Substring_querystart(acceptor) + Substring_match_length(acceptor),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
-		    /*querypos*/Substring_queryend(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
-		    /*trimlength*/Substring_trim_right(acceptor));
-      } else {
-	print_cigar(fp,/*type*/'S',Substring_querystart(acceptor),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
-		    /*querypos*/Substring_querystart(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
-		    /*querypos*/Substring_queryend(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
-		    /*trimlength*/Substring_trim_right(acceptor));
-      }
-
-    } else {
-      /* sensep true, plusp false */
-      /* FPRINTF(fp,"acceptor sensep true, plusp false\n"); */
-      if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
-		    /*querypos*/querylength,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/Substring_trim_right(acceptor));
-	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor) + Substring_querystart(acceptor),
-		    /*querypos*/Substring_queryend(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/true,
-		    /*trimlength*/0);
-      } else {
-	print_cigar(fp,/*type*/'E',querylength - Substring_queryend(acceptor),
-		    /*querypos*/querylength,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/Substring_trim_right(acceptor));
-	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
-		    /*querypos*/Substring_queryend(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/0);
-	print_cigar(fp,/*type*/'S',Substring_querystart(acceptor),
-		    /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
-		    /*plusp*/false,/*lastp*/true,/*trimlength*/0);
-      }
-    }
-
-  } else {
-    /* sensep false, plusp true */
-    /* Doesn't hold for DNA-Seq chimeras */
-    /* assert(Substring_siteA_pos(acceptor) == Substring_queryend(acceptor)); */
-    if (plusp == true) {
-      /* FPRINTF(fp,"acceptor sensep false, plusp true\n"); */
-      if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(acceptor));
-	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor) + (querylength - Substring_queryend(acceptor)),
-		    /*querypos*/Substring_querystart(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
-		    /*trimlength*/0);
-      } else {
-	print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
-		    /*querypos*/0,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/true,/*lastp*/false,/*trimlength*/Substring_trim_left(acceptor));
-	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
-		    /*querypos*/Substring_querystart(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(acceptor),
-		    /*querypos*/Substring_queryend(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/true,/*lastp*/true,
-		    /*trimlength*/Substring_trim_right(acceptor));
-      }
-
-    } else {
-      /* sensep false, plusp false */
-      /* FPRINTF(fp,"acceptor sensep false, plusp false\n"); */
-      if (hide_soft_clips_p == true) {
-	print_cigar(fp,/*type*/'M',(querylength - Substring_queryend(acceptor)) + Substring_match_length(acceptor),
-		    /*querypos*/querylength,querylength,hardclip_low,hardclip_high,
-		    /*plusp*/false,/*lastp*/false,/*trimlength*/0);
-	print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
-		    /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
-		    /*plusp*/false,/*lastp*/true,/*trimlength*/Substring_trim_left(acceptor));
-      } else {
-	print_cigar(fp,/*type*/'S',querylength - Substring_queryend(acceptor),
-		    /*querypos*/querylength,querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/0);
-	print_cigar(fp,/*type*/'M',Substring_match_length(acceptor),
-		    /*querypos*/Substring_queryend(acceptor),querylength,
-		    hardclip_low,hardclip_high,/*plusp*/false,/*lastp*/false,
-		    /*trimlength*/0);
-	print_cigar(fp,/*type*/'E',Substring_querystart(acceptor),
-		    /*querypos*/Substring_querystart(acceptor),querylength,hardclip_low,hardclip_high,
-		    /*plusp*/false,/*lastp*/true,/*trimlength*/Substring_trim_left(acceptor));
-      }
-    }
-  }
-
+  Cigar_print_halfacceptor(fp,acceptor,this,querylength,&hardclip_low,&hardclip_high,use_hardclip_p);
 
   /* 7. MRNM: Mate chr */
   /* 8. MPOS: Mate chrpos */
   /* For anchor_chrnum, previously used Stage3end_chrnum(this), but this is 0 */
-  print_mate_chromosomal_pos(fp,Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
-			     mate_chrpos,Stage3end_chrlength(mate),
+  print_mate_chromosomal_pos(fp,mate_chrnum,mate_chrpos_low,Stage3end_chrlength(mate),
 			     /*anchor_chrnum*/Substring_chrnum(acceptor),acceptor_chrpos,chromosome_iit);
 
 
@@ -3212,12 +1967,12 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
     } else {
       FPRINTF(fp,"\t%d",pairedlength);
     }
-  } else if (mate_chrpos == 0) {
+  } else if (mate_chrpos_low == 0) {
     FPRINTF(fp,"\t%d",pairedlength);
 #if 0
-  } else if (concordant_chrpos < mate_chrpos) {
+  } else if (concordant_chrpos < mate_chrpos_low) {
     FPRINTF(fp,"\t%d",pairedlength);
-  } else if (concordant_chrpos > mate_chrpos) {
+  } else if (concordant_chrpos > mate_chrpos_low) {
     FPRINTF(fp,"\t%d",-pairedlength);
 #endif
   } else if (first_read_p == true) {
@@ -3240,6 +1995,14 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
   }
 
 
+  /* 12. TAGS: XM */
+  if (queryseq_mate == NULL) {
+    /* Unpaired alignment.  Don't print XM. */
+  } else {
+    FPRINTF(fp,"\tXM:Z:");
+    Cigar_print_mate(fp,mate,Shortread_fulllength(queryseq_mate),mate_hardclip_low,mate_hardclip_high);
+  }
+
   /* 12. TAGS: RG */
   if (sam_read_group_id != NULL) {
     FPRINTF(fp,"\tRG:Z:%s",sam_read_group_id);
@@ -3274,6 +2037,12 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
   FPRINTF(fp,"\tMD:Z:");
   printp = false;
 
+  if (Stage3end_sensedir(this) == SENSE_ANTI) {
+    sensep = false;
+  } else {
+    sensep = true;
+  }
+
   if (hide_soft_clips_p == true) {
     substring_start = Substring_querystart_orig(acceptor);
     substring_length = Substring_match_length_orig(acceptor);
@@ -3492,9 +2261,111 @@ print_halfacceptor (Filestring_T fp, char *abbrev, Substring_T donor, Substring_
 
   /* 12. TAGS: XT */
   if (print_xt_p == true) {
+    donor_coord = Substring_chr_splicecoord_D(donor,donor_strand);
+    acceptor_coord = Substring_chr_splicecoord_A(acceptor,acceptor_strand);
     FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob);
-    FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor),
-	    acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor));
+    FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,donor_coord,
+	    acceptor_strand,acceptor_chr,acceptor_coord);
+
+    if (donor_prob > 1.0 && transcript_splicing_p == true) {
+      /* donor_coord is really a known transcript splicesite */
+      /* Assumes that IIT label starts with <gene>. and that coordinates follow " 0 " in header */
+#if 0
+      /* This value for match does not work.  knowni is for splicetrie, not splicing_iit */
+      match = Substring_splicesitesD_knowni(donor);
+      IIT_annotation(&restofheader,splicing_iit,match,&alloc_header_p);
+#else
+      matches = IIT_get_typed(&nmatches,splicing_iit,donor_chr,donor_coord,donor_coord,donor_typeint,/*sortp*/false);
+      if (nmatches > 0) {
+	match = matches[0];
+	IIT_annotation(&restofheader,splicing_iit,match,&alloc_header_p);
+	p = annotation = &(restofheader[3]); /* Skip " 0 " */
+	while (*p != '\0' && *p != '\n' && !isspace(*p)) p++;
+	donor_genome_coord = (char *) MALLOC((p - annotation + 1)*sizeof(char));
+	strncpy(donor_genome_coord,annotation,p - annotation);
+	donor_genome_coord[p - annotation] = '\0';
+	if (alloc_header_p) {
+	  FREE(restofheader);
+	}
+	
+	p = label = IIT_label(splicing_iit,match,&allocp);
+	while (*p != '\0' && *p != '.') p++;
+	donor_gene = (char *) MALLOC((p - label + 1)*sizeof(char));
+	strncpy(donor_gene,label,p - label);
+	donor_gene[p - label] = '\0';
+	if (allocp) {
+	  FREE(label);
+	}
+	FREE(matches);
+      }
+#endif
+
+    } else if (genestruct_iit != NULL) {
+      /* Also a transcript splice, but not at a known site */
+      if ((chrpos = IIT_genestruct_chrpos(&strand,&divstring,&donor_gene,
+					  genestruct_iit,donor_chr,donor_coord)) > 0) {
+	/* chrpos is an unsigned int, so must be 10 chars at most */
+	donor_genome_coord = (char *) MALLOC((1+strlen(divstring)+11)*sizeof(char));
+	sprintf(donor_genome_coord,"%c%s@%u",strand,divstring,chrpos);
+      }
+    }
+  
+
+    if (acceptor_prob > 1.0 && transcript_splicing_p == true) {
+      /* acceptor_coord is really a known transcript splicesite */
+      /* Assumes that IIT label starts with <gene>. and that coordinates follow " 0 " in header */
+#if 0
+      /* This value for match does not work.  knowni is for splicetrie, not splicing_iit */
+      match = Substring_splicesitesA_knowni(acceptor);
+      IIT_annotation(&restofheader,splicing_iit,match,&alloc_header_p);
+#else
+      matches = IIT_get_typed(&nmatches,splicing_iit,acceptor_chr,acceptor_coord,acceptor_coord,acceptor_typeint,/*sortp*/false);
+      if (nmatches > 0) {
+	match = matches[0];
+	/* annotation = */ IIT_annotation(&restofheader,splicing_iit,match,&alloc_header_p);
+	p = annotation = &(restofheader[3]); /* Skip " 0 " */
+	while (*p != '\0' && *p != '\n' && !isspace(*p)) p++;
+	acceptor_genome_coord = (char *) MALLOC((p - annotation + 1)*sizeof(char));
+	strncpy(acceptor_genome_coord,annotation,p - annotation);
+	acceptor_genome_coord[p - annotation] = '\0';
+	if (alloc_header_p) {
+	  FREE(restofheader);
+	}
+	
+	p = label = IIT_label(splicing_iit,match,&allocp);
+	while (*p != '\0' && *p != '.') p++;
+	acceptor_gene = (char *) MALLOC((p - label + 1)*sizeof(char));
+	strncpy(acceptor_gene,label,p - label);
+	acceptor_gene[p - label] = '\0';
+	if (allocp) {
+	  FREE(label);
+	}
+	FREE(matches);
+      }
+#endif
+
+    } else if (genestruct_iit != NULL) {
+      /* Also a transcript splice, but not at a known site */
+      if ((chrpos = IIT_genestruct_chrpos(&strand,&divstring,&acceptor_gene,
+					  genestruct_iit,acceptor_chr,acceptor_coord)) > 0) {
+	/* chrpos is an unsigned int, so must be 10 chars at most */
+	acceptor_genome_coord = (char *) MALLOC((1+strlen(divstring)+11)*sizeof(char));
+	sprintf(acceptor_genome_coord,"%c%s@%u",strand,divstring,chrpos);
+      }
+    }
+
+
+    if (donor_genome_coord != NULL && acceptor_genome_coord != NULL && donor_gene != NULL && acceptor_gene != NULL) {
+      FPRINTF(fp,",%s..%s",donor_genome_coord,acceptor_genome_coord);
+      FPRINTF(fp,",%s..%s",donor_gene,acceptor_gene);
+    }
+
+    if (transcript_splicing_p == true || genestruct_iit != NULL) {
+      FREE(donor_genome_coord);
+      FREE(acceptor_genome_coord);
+      FREE(donor_gene);
+      FREE(acceptor_gene);
+    }
   }
 
 
@@ -3518,10 +2389,12 @@ static void
 print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T mate,
 		 char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
 		 int absmq_score, int first_absmq, int second_absmq, int mapq_score,
-		 Univ_IIT_T chromosome_iit, Shortread_T queryseq, int pairedlength,
-		 Chrpos_T mate_chrpos, int hardclip_low, int hardclip_high,
-		 Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
-		 int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
+		 Univ_IIT_T chromosome_iit, Shortread_T queryseq, Shortread_T queryseq_mate, int pairedlength,
+		 Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+		 int hardclip_low, int hardclip_high, int mate_hardclip_low, int mate_hardclip_high,
+		 Resulttype_T resulttype, bool first_read_p,
+		 bool artificial_mate_p, int npaths_mate, int quality_shift,
+		 char *sam_read_group_id, bool invertp, bool invert_mate_p) {
   Chrpos_T donor_chrpos, acceptor_chrpos;
   Substring_T donor, acceptor;
   char *donor_chr, *acceptor_chr;
@@ -3544,8 +2417,8 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
   /* Shouldn't have any overlap on a distant splice */
   hardclip_low = hardclip_high = 0;
 
-  donor_chrpos = Substring_compute_chrpos(donor,hardclip_low,hide_soft_clips_p);
-  acceptor_chrpos = Substring_compute_chrpos(acceptor,hardclip_low,hide_soft_clips_p);
+  donor_chrpos = Substring_compute_chrpos(donor,hardclip_low,hardclip_high,hide_soft_clips_p);
+  acceptor_chrpos = Substring_compute_chrpos(acceptor,hardclip_low,hardclip_high,hide_soft_clips_p);
 
   halfdonor_dinucleotide(&donor1,&donor2,donor,sensedir);
   halfacceptor_dinucleotide(&acceptor2,&acceptor1,acceptor,sensedir);
@@ -3605,138 +2478,149 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      donor_chrpos,acceptor_chrpos,mate_chrpos,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 		      /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+		      mate_hardclip_low,mate_hardclip_high,
 		      resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/true);
+		      /*circularp*/true,/*supplementaryp*/false);
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 		      /*hardclip_low*/circularpos,/*hardclip_high*/0,
+		      mate_hardclip_low,mate_hardclip_high,
 		      resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/true);
+		      /*circularp*/true,/*supplementaryp*/true);
     } else {
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      donor_chrpos,acceptor_chrpos,mate_chrpos,
-		      hardclip_low,hardclip_high,resulttype,first_read_p,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
+		      hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		      resulttype,first_read_p,
 		      artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/false);
+		      /*circularp*/false,/*supplementaryp*/false);
     }
 
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,acceptor_chrpos,mate_chrpos,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 			 /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+			 mate_hardclip_low,mate_hardclip_high,
 			 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/true);
+			 /*circularp*/true,/*supplementaryp*/false);
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrnum,mate_chrpos_low,
 			 /*hardclip_low*/circularpos,/*hardclip_high*/0,
+			 mate_hardclip_low,mate_hardclip_high,
 			 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/true);
+			 /*circularp*/true,/*supplementaryp*/true);
     } else {
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,acceptor_chrpos,mate_chrpos,
-			 hardclip_low,hardclip_high,resulttype,first_read_p,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
+			 hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+			 resulttype,first_read_p,
 			 artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/false);
+			 /*circularp*/false,/*supplementaryp*/false);
     }
 
   } else if (Stage3end_sensedir(this) == SENSE_ANTI) {
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,acceptor_chrpos,mate_chrpos,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 			 /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+			 mate_hardclip_low,mate_hardclip_high,
 			 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/true);
+			 /*circularp*/true,/*supplementaryp*/false);
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrnum,mate_chrpos_low,
 			 /*hardclip_low*/circularpos,/*hardclip_high*/0,
+			 mate_hardclip_low,mate_hardclip_high,
 			 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/true);
+			 /*circularp*/true,/*supplementaryp*/true);
     } else {
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,acceptor_chrpos,mate_chrpos,
-			 hardclip_low,hardclip_high,resulttype,first_read_p,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
+			 hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,resulttype,first_read_p,
 			 artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/false);
+			 /*circularp*/false,/*supplementaryp*/false);
     }
 
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      donor_chrpos,acceptor_chrpos,mate_chrpos,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 		      /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+		      mate_hardclip_low,mate_hardclip_high,
 		      resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/true);
+		      /*circularp*/true,/*supplementaryp*/false);
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 		      /*hardclip_low*/circularpos,/*hardclip_high*/0,
+		      mate_hardclip_low,mate_hardclip_high,
 		      resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/true);
+		      /*circularp*/true,/*supplementaryp*/true);
     } else {
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      donor_chrpos,acceptor_chrpos,mate_chrpos,
-		      hardclip_low,hardclip_high,resulttype,first_read_p,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
+		      hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		      resulttype,first_read_p,
 		      artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/false);
+		      /*circularp*/false,/*supplementaryp*/false);
     }
 
   } else {
@@ -3744,69 +2628,75 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,acceptor_chrpos,mate_chrpos,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 			 /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+			 mate_hardclip_low,mate_hardclip_high,
 			 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/true);
+			 /*circularp*/true,/*supplementaryp*/false);
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrpos,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,/*acceptor_chrpos*/1,mate_chrnum,mate_chrpos_low,
 			 /*hardclip_low*/circularpos,/*hardclip_high*/0,
+			 mate_hardclip_low,mate_hardclip_high,
 			 resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/true);
+			 /*circularp*/true,/*supplementaryp*/true);
     } else {
       print_halfacceptor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 			 absmq_score,first_absmq,second_absmq,mapq_score,
-			 chromosome_iit,queryseq,pairedlength,
-			 donor_chrpos,acceptor_chrpos,mate_chrpos,
-			 hardclip_low,hardclip_high,resulttype,first_read_p,
+			 chromosome_iit,queryseq,queryseq_mate,pairedlength,
+			 donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
+			 hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+			 resulttype,first_read_p,
 			 artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 			 invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 			 acceptor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 			 donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-			 /*circularp*/false);
+			 /*circularp*/false,/*supplementaryp*/false);
     }
 
     if (0 && (circularpos = Stage3end_circularpos(this)) > 0) {
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      donor_chrpos,acceptor_chrpos,mate_chrpos,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 		      /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+		      mate_hardclip_low,mate_hardclip_high,
 		      resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/true);
+		      /*circularp*/true,/*supplementaryp*/false);
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrpos,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      /*donor_chrpos*/1,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
 		      /*hardclip_low*/circularpos,/*hardclip_high*/0,
+		      mate_hardclip_low,mate_hardclip_high,
 		      resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/true);
+		      /*circularp*/true,/*supplementaryp*/true);
     } else {
       print_halfdonor(fp,abbrev,donor,acceptor,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
 		      absmq_score,first_absmq,second_absmq,mapq_score,
-		      chromosome_iit,queryseq,pairedlength,
-		      donor_chrpos,acceptor_chrpos,mate_chrpos,
-		      hardclip_low,hardclip_high,resulttype,first_read_p,
+		      chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      donor_chrpos,acceptor_chrpos,mate_chrnum,mate_chrpos_low,
+		      hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		      resulttype,first_read_p,
 		      artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p,/*use_hardclip_p*/true,/*print_xt_p*/true,
 		      donor_sensedir,donor_strand,acceptor_strand,donor_chr,acceptor_chr,
 		      donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob,
-		      /*circularp*/false);
+		      /*circularp*/false,/*supplementaryp*/false);
     }
   }
 
@@ -3823,12 +2713,12 @@ print_exon_exon (Filestring_T fp, char *abbrev, Stage3end_T this, Stage3end_T ma
 void
 SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
 	   Stage3end_T this, Stage3end_T mate, char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
-	   int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
-	   Shortread_T queryseq_mate, int pairedlength, Chrpos_T chrpos, Chrpos_T mate_chrpos,
-	   int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high,
+	   int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit,
+	   Shortread_T queryseq, Shortread_T queryseq_mate, int pairedlength,
+	   Chrnum_T chrnum, Chrpos_T chrpos, Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+	   int hardclip_low, int hardclip_high, int mate_hardclip_low, int mate_hardclip_high,
 	   Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
-	   int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p,
-	   bool merge_samechr_p) {
+	   int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p) {
   Hittype_T hittype;
   unsigned int flag;
   int circularpos, querylength;
@@ -3839,9 +2729,10 @@ SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
   /* Test for nomapping was chrpos == 0, but we can actually align to chrpos 0 */
   /* Also, can use this test here because --quiet-if-excessive cases go directly to SAM_print_nomapping */
   if (npaths_primary + npaths_altloc == 0) {
-    SAM_print_nomapping(fp,abbrev,queryseq,mate,acc1,acc2,chromosome_iit,resulttype,first_read_p,
+    SAM_print_nomapping(fp,abbrev,queryseq,queryseq_mate,mate,acc1,acc2,chromosome_iit,resulttype,first_read_p,
 			/*pathnum*/0,/*npaths_primary*/0,/*npaths_altloc*/0,artificial_mate_p,npaths_mate,
-			mate_chrpos,quality_shift,sam_read_group_id,invertp,invert_mate_p);
+			mate_chrnum,mate_chrpos_low,mate_hardclip_low,mate_hardclip_high,quality_shift,sam_read_group_id,
+			invertp,invert_mate_p);
 
     if (fp_failedinput != NULL) {
       if (first_read_p == true) {
@@ -3854,27 +2745,26 @@ SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
   } else if (hittype == GMAP) {
     /* Note: sam_paired_p must be true because we are calling GMAP only on halfmapping uniq */
 
+#if 0
+    /* Values provided as parameters */
     if (mate == NULL) {
-      chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,
-				  this,Shortread_fulllength(queryseq),/*first_read_p*/true);
-      mate_chrpos = 0U;
+      chrpos_low = SAM_compute_chrpos(&chrnum,/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,
+				      this,Shortread_fulllength(queryseq),/*first_read_p*/true);
+      mate_chrpos_low = 0U;
       hardclip3_low = hardclip3_high = 0;
 
     } else if (first_read_p == true) {
-      chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
-				  this,Shortread_fulllength(queryseq),/*first_read_p*/true);
-      mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
-				       mate,Shortread_fulllength(queryseq_mate),/*first_read_p*/false);
+      chrpos_low = SAM_compute_chrpos(&chrnum,/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+				      this,Shortread_fulllength(queryseq),/*first_read_p*/true);
+      mate_chrpos_low = SAM_compute_chrpos(&mate_chrnum,/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+					   mate,Shortread_fulllength(queryseq_mate),/*first_read_p*/false);
     } else {
-      chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
-				  this,Shortread_fulllength(queryseq),/*first_read_p*/false);
-      mate_chrpos = SAM_compute_chrpos(/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
-				       mate,Shortread_fulllength(queryseq_mate),/*first_read_p*/true);
+      chrpos_low = SAM_compute_chrpos(&chrnum,/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+				      this,Shortread_fulllength(queryseq),/*first_read_p*/false);
+      mate_chrpos_low = SAM_compute_chrpos(&mate_chrnum,/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+					   mate,Shortread_fulllength(queryseq_mate),/*first_read_p*/true);
     }
-
-    flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
-			    pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
-			    absmq_score,first_absmq,invertp,invert_mate_p);
+#endif
 
     querylength = Shortread_fulllength(queryseq);
     if ((circularpos = Stage3end_circularpos(this)) > 0
@@ -3887,73 +2777,92 @@ SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
 			 /*watsonp*/Stage3end_plusp(this),first_read_p,/*circularp*/true) == true
 #endif
 	) {
+      flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
+			      pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
+			      absmq_score,first_absmq,invertp,invert_mate_p,/*supplementaryp*/false);
       Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
 		     Stage3end_cigar_tokens(this),Stage3end_gmap_intronp(this),
-		     acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
+		     acc1,acc2,chrnum,chromosome_iit,/*usersegment*/(Sequence_T) NULL,
 		     Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
-		     /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,Shortread_fulllength(queryseq),
+		     /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+		     mate_hardclip_low,mate_hardclip_high,
+		     Shortread_fulllength(queryseq),
 		     /*watsonp*/Stage3end_plusp(this),Stage3end_sensedir(this),
 		     /*chimera_part*/0,/*chimera*/NULL,quality_shift,first_read_p,
 		     pathnum,npaths_primary,npaths_altloc,absmq_score,second_absmq,chrpos,Stage3end_chrlength(this),
-		     queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
-		     Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
-		     mate_chrpos,Stage3end_chrlength(mate),/*mate_sensedir*/Stage3end_sensedir(mate),
-		     pairedlength,sam_read_group_id,invertp,/*merged_overlap_p*/false,
-		     Stage3end_sarrayp(this));
+		     queryseq,queryseq_mate,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
+		     mate,mate_chrnum,mate_chrpos_low,Stage3end_chrlength(mate),
+		     /*mate_sensedir*/Stage3end_sensedir(mate),pairedlength,sam_read_group_id,
+		     invertp,/*merged_overlap_p*/false,Stage3end_sarrayp(this));
+
+      flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
+			      pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
+			      absmq_score,first_absmq,invertp,invert_mate_p,/*supplementaryp*/true);
       Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
 		     Stage3end_cigar_tokens(this),Stage3end_gmap_intronp(this),
-		     acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
+		     acc1,acc2,chrnum,chromosome_iit,/*usersegment*/(Sequence_T) NULL,
 		     Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
-		     /*hardclip_low*/circularpos,/*hardclip_high*/0,Shortread_fulllength(queryseq),
+		     /*hardclip_low*/circularpos,/*hardclip_high*/0,
+		     mate_hardclip_low,mate_hardclip_high,
+		     Shortread_fulllength(queryseq),
 		     /*watsonp*/Stage3end_plusp(this),Stage3end_sensedir(this),
 		     /*chimera_part*/0,/*chimera*/NULL,quality_shift,first_read_p,
 		     pathnum,npaths_primary,npaths_altloc,absmq_score,second_absmq,/*chrpos*/1,Stage3end_chrlength(this),
-		     queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
-		     Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
-		     mate_chrpos,Stage3end_chrlength(mate),/*mate_sensedir*/Stage3end_sensedir(mate),
-		     pairedlength,sam_read_group_id,invertp,/*merged_overlap_p*/false,
-		     Stage3end_sarrayp(this));
+		     queryseq,queryseq_mate,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
+		     mate,mate_chrnum,mate_chrpos_low,Stage3end_chrlength(mate),
+		     /*mate_sensedir*/Stage3end_sensedir(mate),pairedlength,sam_read_group_id,
+		     invertp,/*merged_overlap_p*/false,Stage3end_sarrayp(this));
+
     } else if (first_read_p == true) {
+      flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
+			      pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
+			      absmq_score,first_absmq,invertp,invert_mate_p,/*supplementaryp*/false);
       Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
 		     Stage3end_cigar_tokens(this),Stage3end_gmap_intronp(this),
-		     acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
+		     acc1,acc2,chrnum,chromosome_iit,/*usersegment*/(Sequence_T) NULL,
 		     Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
-		     hardclip5_low,hardclip5_high,Shortread_fulllength(queryseq),
-		     Stage3end_plusp(this),Stage3end_sensedir(this),
+		     hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		     Shortread_fulllength(queryseq),Stage3end_plusp(this),Stage3end_sensedir(this),
 		     /*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,
 		     pathnum,npaths_primary,npaths_altloc,absmq_score,second_absmq,chrpos,Stage3end_chrlength(this),
-		     queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
-		     Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
-		     mate_chrpos,Stage3end_chrlength(mate),/*mate_sensedir*/Stage3end_sensedir(mate),
-		     pairedlength,sam_read_group_id,invertp,/*merged_overlap_p*/false,
-		     Stage3end_sarrayp(this));
+		     queryseq,queryseq_mate,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
+		     mate,mate_chrnum,mate_chrpos_low,Stage3end_chrlength(mate),
+		     /*mate_sensedir*/Stage3end_sensedir(mate),pairedlength,sam_read_group_id,
+		     invertp,/*merged_overlap_p*/false,Stage3end_sarrayp(this));
+
     } else {
+      flag = SAM_compute_flag(Stage3end_plusp(this),mate,resulttype,first_read_p,
+			      pathnum,npaths_primary + npaths_altloc,artificial_mate_p,npaths_mate,
+			      absmq_score,first_absmq,invertp,invert_mate_p,/*supplementaryp*/false);
       Pair_print_sam(fp,abbrev,Stage3end_pairarray(this),Stage3end_npairs(this),
 		     Stage3end_cigar_tokens(this),Stage3end_gmap_intronp(this),
-		     acc1,acc2,Stage3end_chrnum(this),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
+		     acc1,acc2,chrnum,chromosome_iit,/*usersegment*/(Sequence_T) NULL,
 		     Shortread_fullpointer(queryseq),Shortread_quality_string(queryseq),
-		     hardclip3_low,hardclip3_high,Shortread_fulllength(queryseq),
-		     Stage3end_plusp(this),Stage3end_sensedir(this),
+		     hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		     Shortread_fulllength(queryseq),Stage3end_plusp(this),Stage3end_sensedir(this),
 		     /*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/false,
 		     pathnum,npaths_primary,npaths_altloc,absmq_score,second_absmq,chrpos,Stage3end_chrlength(this),
-		     queryseq,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
-		     Stage3end_chrnum(mate),Stage3end_effective_chrnum(mate),
-		     mate_chrpos,Stage3end_chrlength(mate),/*mate_sensedir*/Stage3end_sensedir(mate),
-		     pairedlength,sam_read_group_id,invertp,/*merged_overlap_p*/false,
-		     Stage3end_sarrayp(this));
+		     queryseq,queryseq_mate,resulttype,flag,/*pair_mapq_score*/mapq_score,/*end_mapq_score*/mapq_score,
+		     mate,mate_chrnum,mate_chrpos_low,Stage3end_chrlength(mate),
+		     /*mate_sensedir*/Stage3end_sensedir(mate),pairedlength,sam_read_group_id,
+		     invertp,/*merged_overlap_p*/false,Stage3end_sarrayp(this));
     }
 
   } else if (hittype == TRANSLOC_SPLICE || (hittype == SAMECHR_SPLICE && merge_samechr_p == false)) {
     if (first_read_p == true) {
       print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
-		      absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
-		      mate_chrpos,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+		      absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      mate_chrnum,mate_chrpos_low,
+		      hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		      resulttype,/*first_read_p*/true,
 		      artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p);
     } else {
       print_exon_exon(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
-		      absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,pairedlength,
-		      mate_chrpos,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
+		      absmq_score,first_absmq,second_absmq,mapq_score,chromosome_iit,queryseq,queryseq_mate,pairedlength,
+		      mate_chrnum,mate_chrpos_low,
+		      hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		      resulttype,/*first_read_p*/false,
 		      artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
 		      invertp,invert_mate_p);
     }
@@ -3967,27 +2876,35 @@ SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
 #endif
 	) {
       print_substrings(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
-		       absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
-		       chrpos,mate_chrpos,/*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+		       absmq_score,first_absmq,second_absmq,mapq_score,queryseq,queryseq_mate,pairedlength,
+		       chrpos,mate_chrnum,mate_chrpos_low,
+		       /*hardclip_low*/0,/*hardclip_high*/querylength-circularpos,
+		       mate_hardclip_low,mate_hardclip_high,
 		       resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
-		       invertp,invert_mate_p,/*circularp*/true);
+		       invertp,invert_mate_p,/*circularp*/true,/*supplementaryp*/false);
       print_substrings(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
-		       absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
-		       /*chrpos*/1,mate_chrpos,/*hardclip_low*/circularpos,/*hardclip_high*/0,
+		       absmq_score,first_absmq,second_absmq,mapq_score,queryseq,queryseq_mate,pairedlength,
+		       /*chrpos*/1,mate_chrnum,mate_chrpos_low,
+		       /*hardclip_low*/circularpos,/*hardclip_high*/0,mate_hardclip_low,mate_hardclip_high,
+
 		       resulttype,first_read_p,artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
-		       invertp,invert_mate_p,/*circularp*/true);
+		       invertp,invert_mate_p,/*circularp*/true,/*supplementaryp*/true);
     } else if (first_read_p == true) {
       print_substrings(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
-		       absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
-		       chrpos,mate_chrpos,hardclip5_low,hardclip5_high,resulttype,/*first_read_p*/true,
+		       absmq_score,first_absmq,second_absmq,mapq_score,queryseq,queryseq_mate,pairedlength,
+		       chrpos,mate_chrnum,mate_chrpos_low,
+		       hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		       resulttype,/*first_read_p*/true,
 		       artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
-		       invertp,invert_mate_p,/*circularp*/false);
+		       invertp,invert_mate_p,/*circularp*/false,/*supplementaryp*/false);
     } else {
       print_substrings(fp,abbrev,this,mate,acc1,acc2,pathnum,npaths_primary,npaths_altloc,
-		       absmq_score,first_absmq,second_absmq,mapq_score,queryseq,pairedlength,
-		       chrpos,mate_chrpos,hardclip3_low,hardclip3_high,resulttype,/*first_read_p*/false,
+		       absmq_score,first_absmq,second_absmq,mapq_score,queryseq,queryseq_mate,pairedlength,
+		       chrpos,mate_chrnum,mate_chrpos_low,
+		       hardclip_low,hardclip_high,mate_hardclip_low,mate_hardclip_high,
+		       resulttype,/*first_read_p*/false,
 		       artificial_mate_p,npaths_mate,quality_shift,sam_read_group_id,
-		       invertp,invert_mate_p,/*circularp*/false);
+		       invertp,invert_mate_p,/*circularp*/false,/*supplementaryp*/false);
     }
   }
 
@@ -3995,16 +2912,15 @@ SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
 }
 
 
-
 void
 SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2,
 		  Result_T result, Resulttype_T resulttype, Univ_IIT_T chromosome_iit,
 		  Shortread_T queryseq1, Shortread_T queryseq2, bool invert_first_p, bool invert_second_p,
-		  bool nofailsp, bool failsonlyp, bool merge_samechr_p,
-		  int quality_shift, char *sam_read_group_id) {
+		  bool nofailsp, bool failsonlyp, int quality_shift, char *sam_read_group_id) {
   Stage3pair_T *stage3pairarray, stage3pair;
   Stage3end_T *stage3array1, *stage3array2, stage3, mate, hit5, hit3;
-  Chrpos_T chrpos, chrpos5, chrpos3;
+  Chrnum_T chrnum, chrnum5, chrnum3;
+  Chrpos_T chrpos_low, chrpos_low_5, chrpos_low_3;
   int npaths_primary, npaths_altloc, npaths_max, npaths_primary_max, npaths_altloc_max,
     npaths1_primary, npaths1_altloc, npaths2_primary, npaths2_altloc, pathnum;
   int first_absmq, second_absmq, first_absmq1, second_absmq1, first_absmq2, second_absmq2;
@@ -4027,24 +2943,25 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
       /* No output */
       return;
       
-    } else 
+    } else {
       Filestring_set_split_output(fp,OUTPUT_NM);
-      SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,queryseq1,/*mate*/(Stage3end_T) NULL,
+      SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,queryseq1,/*queryseq_mate*/queryseq2,/*mate*/(Stage3end_T) NULL,
 			  acc1,acc2,chromosome_iit,resulttype,
 			  /*first_read_p*/true,/*pathnum*/0,/*npaths_primary*/0,/*npaths_altloc*/0,
 			  /*artificial_mate_p*/false,/*npaths_mate*/0,
-			  /*mate_chrpos*/0U,quality_shift,
-			  sam_read_group_id,invert_first_p,invert_second_p);
-      SAM_print_nomapping(fp,ABBREV_NOMAPPING_2,queryseq2,/*mate*/(Stage3end_T) NULL,
+			  /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
+			  quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+      SAM_print_nomapping(fp,ABBREV_NOMAPPING_2,queryseq2,/*queryseq_mate*/queryseq1,/*mate*/(Stage3end_T) NULL,
 			  acc1,acc2,chromosome_iit,resulttype,
 			  /*first_read_p*/false,/*pathnum*/0,/*npaths_primary*/0,/*npaths_altloc*/0,
 			  /*artificial_mate_p*/false,/*npaths_mate*/0,
-			  /*mate_chrpos*/0U,quality_shift,
-			  sam_read_group_id,invert_second_p,invert_first_p);
+			  /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
+			  quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 
       if (fp_failedinput_1 != NULL) {
 	Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
       }
+    }
 
   } else {
     if (failsonlyp == true) {
@@ -4057,7 +2974,7 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
       stage3pair = stage3pairarray[0];
       hit5 = Stage3pair_hit5(stage3pair);
       hit3 = Stage3pair_hit3(stage3pair);
-
+      
       if (Stage3pair_circularp(stage3pair) == true) {
 	/* Don't resolve overlaps on a circular alignment */
 	clipdir = 0;
@@ -4065,90 +2982,102 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	Filestring_set_split_output(fp,OUTPUT_CC);
 	abbrev = ABBREV_CONCORDANT_CIRCULAR;
 
-      } else if (clip_overlap_p == false && merge_overlap_p == false) {
-	clipdir = 0;
-	hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
-	Filestring_set_split_output(fp,OUTPUT_CU);
-	abbrev = ABBREV_CONCORDANT_UNIQ;
+      } else if (omit_concordant_uniq_p == true) {
+	Filestring_set_split_output(fp,OUTPUT_NONE);
 
       } else {
-	clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
-	debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
-		      clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
-	Filestring_set_split_output(fp,OUTPUT_CU);
-	abbrev = ABBREV_CONCORDANT_UNIQ;
-      }
+	if (clip_overlap_p == false && merge_overlap_p == false) {
+	  clipdir = 0;
+	  hardclip5_low = hardclip5_high = hardclip3_low = hardclip3_high = 0;
+	  Filestring_set_split_output(fp,OUTPUT_CU);
+	  abbrev = ABBREV_CONCORDANT_UNIQ;
+	  
+	} else {
+	  clipdir = Stage3pair_overlap(&hardclip5_low,&hardclip5_high,&hardclip3_low,&hardclip3_high,stage3pair);
+	  debug3(printf("clipdir %d with hardclip5 = %d..%d, hardclip3 = %d..%d\n",
+			clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
+	  Filestring_set_split_output(fp,OUTPUT_CU);
+	  abbrev = ABBREV_CONCORDANT_UNIQ;
+	}
 
-      chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
-      chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
-
-      if (merge_overlap_p == false || clipdir == 0) {
-	/* print first end */
-	SAM_print(fp,fp_failedinput_1,abbrev,hit5,/*mate*/hit3,
-		  acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
-		  Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
-		  Stage3pair_mapq_score(stage3pair),chromosome_iit,
-		  /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		  Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		  hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
-		  resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		  quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
-		  merge_samechr_p);
-
-	/* print second end */
-	SAM_print(fp,fp_failedinput_2,abbrev,hit3,/*mate*/hit5,
-		  acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
-		  Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
-		  Stage3pair_mapq_score(stage3pair),chromosome_iit,
-		  /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		  Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		  hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
-		  resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		  quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
-		  merge_samechr_p);
+	chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
+					  hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+	chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
+					  hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
+
+	if (merge_overlap_p == false || clipdir == 0) {
+	  /* print first end */
+	  SAM_print(fp,fp_failedinput_1,abbrev,hit5,/*mate*/hit3,
+		    acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
+		    Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
+		    Stage3pair_mapq_score(stage3pair),chromosome_iit,
+		    /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
+		    Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		    /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		    /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+		    /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
+		    resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
+		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+
+	  /* print second end */
+	  SAM_print(fp,fp_failedinput_2,abbrev,hit3,/*mate*/hit5,
+		    acc1,acc2,/*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
+		    Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
+		    Stage3pair_mapq_score(stage3pair),chromosome_iit,
+		    /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
+		    Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		    /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		    /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+		    /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
+		    resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
+		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 
-      } else {
-	/* merge_overlap_p == true and overlap was found */
-	pairarray = Stage3pair_merge(&npairs,&querylength_merged,&queryseq_merged,&quality_merged,
-				     stage3pair,queryseq1,queryseq2,
-				     /*querylength5*/Stage3end_querylength(hit5),
-				     /*querylength3*/Stage3end_querylength(hit3),
-				     clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
-	/* printf("queryseq_merged: %s\n",queryseq_merged); */
-	if (clipdir >= 0) {
-	  chrpos = chrpos5;
 	} else {
-	  chrpos = chrpos3;
-	}
-	/* merging changes resulttype from UNPAIRED_UNIQ to SINGLEEND_UNIQ */
-	flag = SAM_compute_flag(Stage3end_plusp(hit5),/*mate*/NULL,/*resulttype*/SINGLEEND_UNIQ,/*first_read_p*/true,
-				/*pathnum*/1,/*npaths*/1,/*artificial_mate_p*/false,/*npaths_mate*/0,
-				Stage3pair_absmq_score(stage3pair),first_absmq,/*invertp*/false,
-				/*invert_mate_p*/false);
-	Filestring_set_split_output(fp,OUTPUT_UU);
-	Pair_print_sam(fp,/*abbrev*/ABBREV_UNPAIRED_UNIQ,pairarray,npairs,/*cigar_tokens*/NULL,/*gmap_intronp*/false,
-		       acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,/*usersegment*/(Sequence_T) NULL,
-		       /*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
-		       /*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,
-		       Stage3end_plusp(hit5),Stage3end_sensedir(hit5),
-		       /*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,
-		       /*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
+	  /* merge_overlap_p == true and overlap was found */
+	  pairarray = Stage3pair_merge(&npairs,&querylength_merged,&queryseq_merged,&quality_merged,
+				       stage3pair,queryseq1,queryseq2,
+				       /*querylength5*/Stage3end_querylength(hit5),
+				       /*querylength3*/Stage3end_querylength(hit3),
+				       clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
+	  /* printf("queryseq_merged: %s\n",queryseq_merged); */
+	  if (clipdir >= 0) {
+	    chrnum = chrnum5;
+	    chrpos_low = chrpos_low_5;
+	  } else {
+	    chrnum = chrnum3;
+	    chrpos_low = chrpos_low_3;
+	  }
+	  /* merging changes resulttype from UNPAIRED_UNIQ to SINGLEEND_UNIQ */
+	  flag = SAM_compute_flag(Stage3end_plusp(hit5),/*mate*/NULL,/*resulttype*/SINGLEEND_UNIQ,/*first_read_p*/true,
+				  /*pathnum*/1,/*npaths*/1,/*artificial_mate_p*/false,/*npaths_mate*/0,
+				  Stage3pair_absmq_score(stage3pair),first_absmq,/*invertp*/false,
+				  /*invert_mate_p*/false,/*supplementaryp*/false);
+	  Filestring_set_split_output(fp,OUTPUT_UU);
+	  Pair_print_sam(fp,/*abbrev*/ABBREV_UNPAIRED_UNIQ,pairarray,npairs,/*cigar_tokens*/NULL,/*gmap_intronp*/false,
+			 acc1,/*acc2*/NULL,chrnum,chromosome_iit,/*usersegment*/(Sequence_T) NULL,
+			 /*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
+			 /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
+			 /*querylength*/querylength_merged,
+			 Stage3end_plusp(hit5),Stage3end_sensedir(hit5),
+			 /*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,
+			 /*pathnum*/1,/*npaths_primary*/1,/*npaths_altloc*/0,
 #if 0
-		       Stage3pair_absmq_score(stage3pair),/*second_absmq*/0,
+			 Stage3pair_absmq_score(stage3pair),/*second_absmq*/0,
 #else
-		       /*absmq_score*/MAX_QUALITY_SCORE,/*second_absmq*/0,
+			 /*absmq_score*/MAX_QUALITY_SCORE,/*second_absmq*/0,
 #endif
-		       chrpos,Stage3end_chrlength(hit5),/*queryseq*/NULL,resulttype,flag,
-		       /*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
-		       /*mate_chrnum*/0,/*mate_effective_chrnum*/0,/*mate_chrpos*/0,/*mate_chrlength*/0,
-		       /*mate_sensedir*/SENSE_NULL,/*pairedlength*/0,
-		       sam_read_group_id,/*invertp*/false,/*merged_overlap_p*/true,
-		       Stage3end_sarrayp(hit5));
-	if (quality_merged != NULL) {
-	  FREE_OUT(quality_merged);
+			 chrpos_low,Stage3end_chrlength(hit5),/*queryseq*/NULL,/*queryseq_mate*/NULL,resulttype,flag,
+			 /*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
+			 /*mate*/NULL,/*mate_chrnum*/0,/*mate_chrpos_low*/0,
+			 /*mate_chrlength*/0,/*mate_sensedir*/SENSE_NULL,/*pairedlength*/0,
+			 sam_read_group_id,/*invertp*/false,/*merged_overlap_p*/true,
+			 Stage3end_sarrayp(hit5));
+	  if (quality_merged != NULL) {
+	    FREE_OUT(quality_merged);
+	  }
+	  FREE_OUT(queryseq_merged);
+	  FREE_OUT(pairarray);
 	}
-	FREE_OUT(queryseq_merged);
-	FREE_OUT(pairarray);
       }
 
     } else if (resulttype == CONCORDANT_TRANSLOC) {
@@ -4158,16 +3087,18 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
       if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths_report) {
 	/* Print as nomapping, but send to fp_concordant_transloc */
 	SAM_print_nomapping(fp,ABBREV_CONCORDANT_TRANSLOC,
-			    queryseq1,/*mate*/(Stage3end_T) NULL,
+			    queryseq1,/*queryseq_mate*/queryseq2,/*mate*/(Stage3end_T) NULL,
 			    acc1,acc2,chromosome_iit,resulttype,
 			    /*first_read_p*/true,/*pathnum*/1,npaths_primary,npaths_altloc,
-			    /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,/*mate_chrpos*/0U,
+			    /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
+			    /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 			    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	SAM_print_nomapping(fp,ABBREV_CONCORDANT_TRANSLOC,
-			    queryseq2,/*mate*/(Stage3end_T) NULL,
+			    queryseq2,/*queryseq_mate*/queryseq1,/*mate*/(Stage3end_T) NULL,
 			    acc1,acc2,chromosome_iit,resulttype,
 			    /*first_read_p*/false,/*pathnum*/1,npaths_primary,npaths_altloc,
-			    /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,/*mate_chrpos*/0U,
+			    /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
+			    /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 			    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 
       } else {
@@ -4194,8 +3125,10 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 			  clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
 	  }
 
-	  chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
-	  chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
+	  chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
+					    hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+	  chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
+					    hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
 	  /* print first end */
 	  SAM_print(fp,fp_failedinput_1,ABBREV_CONCORDANT_TRANSLOC,
@@ -4203,11 +3136,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		    Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
 		    Stage3pair_mapq_score(stage3pair),chromosome_iit,
 		    /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		    Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		    hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+		    Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		    /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		    /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+		    /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 		    resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
-		    merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	  /* print second end */
 	  SAM_print(fp,fp_failedinput_2,ABBREV_CONCORDANT_TRANSLOC,
@@ -4215,34 +3149,39 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		    Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
 		    Stage3pair_mapq_score(stage3pair),chromosome_iit,
 		    /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		    Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		    hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+		    Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		    /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		    /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+		    /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 		    resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
-		    merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	}
       }
     
     } else if (resulttype == CONCORDANT_MULT) {
       stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
 
-      if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths_report) {
+      if (omit_concordant_mult_p == true) {
+	/* Skip printing */
+	Filestring_set_split_output(fp,OUTPUT_NONE);
+	
+      } else if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths_report) {
 	/* Print as nomapping, but send to fp_concordant_mult_xs */
 	Filestring_set_split_output(fp,OUTPUT_CX);
 	SAM_print_nomapping(fp,ABBREV_CONCORDANT_MULT_XS,
-			    queryseq1,/*mate*/(Stage3end_T) NULL,
+			    queryseq1,/*queryseq_mate*/queryseq2,/*mate*/(Stage3end_T) NULL,
 			    acc1,acc2,chromosome_iit,resulttype,
 			    /*first_read_p*/true,/*pathnum*/1,npaths_primary,npaths_altloc,
 			    /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-			    /*mate_chrpos*/0U,quality_shift,
-			    sam_read_group_id,invert_first_p,invert_second_p);
+			    /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
+			    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	SAM_print_nomapping(fp,ABBREV_CONCORDANT_MULT_XS,
-			    queryseq2,/*mate*/(Stage3end_T) NULL,
+			    queryseq2,/*queryseq_mate*/queryseq1,/*mate*/(Stage3end_T) NULL,
 			    acc1,acc2,chromosome_iit,resulttype,
 			    /*first_read_p*/false,/*pathnum*/1,npaths_primary,npaths_altloc,
 			    /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-			    /*mate_chrpos*/0U,quality_shift,
-			    sam_read_group_id,invert_second_p,invert_first_p);
+			    /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
+			    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 
 	if (fp_failedinput_1 != NULL) {
 	  Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
@@ -4273,8 +3212,10 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 			  clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high));
 	  }
 
-	  chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
-	  chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
+	  chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
+					    hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+	  chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
+					    hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
 	  if (merge_overlap_p == false || clipdir == 0) {
 	    /* print first end */
@@ -4283,11 +3224,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		      Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
 		      Stage3pair_mapq_score(stage3pair),chromosome_iit,
 		      /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		      Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		      hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+		      Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		      /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+		      /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 		      resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
-		      merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	    /* print second end */
 	    SAM_print(fp,fp_failedinput_2,ABBREV_CONCORDANT_MULT,
@@ -4295,11 +3237,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		      Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
 		      Stage3pair_mapq_score(stage3pair),chromosome_iit,
 		      /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		      Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		      hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+		      Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		      /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+		      /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 		      resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
-		      merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	    
 	  } else {
 	    /* merge_overlap_p == true and overlap was found */
@@ -4310,20 +3253,23 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 					 clipdir,hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high);
 	    /* printf("queryseq_merged: %s\n",queryseq_merged); */
 	    if (clipdir >= 0) {
-	      chrpos = chrpos5;
+	      chrnum = chrnum5;
+	      chrpos_low = chrpos_low_5;
 	    } else {
-	      chrpos = chrpos3;
+	      chrnum = chrnum3;
+	      chrpos_low = chrpos_low_3;
 	    }
 	    /* merging changes resulttype from UNPAIRED_UNIQ to SINGLEEND_UNIQ */
 	    flag = SAM_compute_flag(Stage3end_plusp(hit5),/*mate*/NULL,/*resulttype*/SINGLEEND_UNIQ,/*first_read_p*/true,
 				    /*pathnum*/1,/*npaths*/1,/*artificial_mate_p*/false,/*npaths_mate*/0,
 				    Stage3pair_absmq_score(stage3pair),first_absmq,/*invertp*/false,
-				    /*invert_mate_p*/false);
+				    /*invert_mate_p*/false,/*supplementaryp*/false);
 	    Pair_print_sam(fp,ABBREV_CONCORDANT_MULT,pairarray,npairs,/*cigar_tokens*/NULL,/*gmap_intronp*/false,
-			   acc1,/*acc2*/NULL,Stage3end_chrnum(hit5),chromosome_iit,
+			   acc1,/*acc2*/NULL,chrnum,chromosome_iit,
 			   /*usersegment*/(Sequence_T) NULL,
 			   /*queryseq_ptr*/queryseq_merged,/*quality_string*/quality_merged,
-			   /*hardclip_low*/0,/*hardclip_high*/0,/*querylength*/querylength_merged,
+			   /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
+			   /*querylength*/querylength_merged,
 			   Stage3end_plusp(hit5),Stage3end_sensedir(hit5),
 			   /*chimera_part*/0,/*chimera*/NULL,quality_shift,/*first_read_p*/true,pathnum,
 			   npaths_primary,npaths_altloc,
@@ -4332,10 +3278,10 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 #else
 			   /*absmq_score*/MAX_QUALITY_SCORE,/*second_absmq*/0,
 #endif
-			   chrpos,Stage3end_chrlength(hit5),/*queryseq*/NULL,resulttype,flag,
+			   chrpos_low,Stage3end_chrlength(hit5),/*queryseq*/NULL,/*queryseq_mate*/NULL,resulttype,flag,
 			   /*pair_mapq_score*/MAX_QUALITY_SCORE,/*end_mapq_score*/MAX_QUALITY_SCORE,
-			   /*mate_chrnum*/0,/*mate_effective_chrnum*/0,/*mate_chrpos*/0,/*mate_chrlength*/0,
-			   /*mate_sensedir*/SENSE_NULL,/*pairedlength*/0,
+			   /*mate*/NULL,/*mate_chrnum*/0,/*mate_chrpos_low*/0,
+			   /*mate_chrlength*/0,/*mate_sensedir*/SENSE_NULL,/*pairedlength*/0,
 			   sam_read_group_id,/*invertp*/false,/*merged_overlap_p*/true,
 			   Stage3end_sarrayp(hit5));
 	    if (quality_merged != NULL) {
@@ -4373,8 +3319,10 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 
       hit5 = Stage3pair_hit5(stage3pair);
       hit3 = Stage3pair_hit3(stage3pair);
-      chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
-      chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
+      chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
+					hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+      chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
+					hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
       /* print first end */
       SAM_print(fp,fp_failedinput_1,abbrev,hit5,/*mate*/hit3,
@@ -4382,11 +3330,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
 		Stage3pair_mapq_score(stage3pair),chromosome_iit,
 		/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+		Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		/*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		/*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+		/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 		resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
-		merge_samechr_p);
+		quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
       /* print second end */
       SAM_print(fp,fp_failedinput_2,abbrev,hit3,/*mate*/hit5,
@@ -4394,11 +3343,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		Stage3pair_absmq_score(stage3pair),first_absmq,/*second_absmq*/0,
 		Stage3pair_mapq_score(stage3pair),chromosome_iit,
 		/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+		Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		/*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		/*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+		/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 		resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
-		merge_samechr_p);
+		quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 
     } else if (resulttype == PAIRED_MULT) {
       stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
@@ -4407,19 +3357,19 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	/* Print as nomapping, but send to fp_paired_mult */
 	Filestring_set_split_output(fp,OUTPUT_PX);
 	SAM_print_nomapping(fp,ABBREV_PAIRED_MULT_XS,
-			    queryseq1,/*mate*/(Stage3end_T) NULL,
+			    queryseq1,/*queryseq_mate*/queryseq2,/*mate*/(Stage3end_T) NULL,
 			    acc1,acc2,chromosome_iit,resulttype,
 			    /*first_read_p*/true,/*pathnum*/1,npaths_primary,npaths_altloc,
 			    /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-			    /*mate_chrpos*/0U,quality_shift,
-			    sam_read_group_id,invert_first_p,invert_second_p);
+			    /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
+			    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	SAM_print_nomapping(fp,ABBREV_PAIRED_MULT_XS,
-			    queryseq2,/*mate*/(Stage3end_T) NULL,
+			    queryseq2,/*queryseq_mate*/queryseq1,/*mate*/(Stage3end_T) NULL,
 			    acc1,acc2,chromosome_iit,resulttype,
 			    /*first_read_p*/false,/*pathnum*/1,npaths_primary,npaths_altloc,
 			    /*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-			    /*mate_chrpos*/0U,quality_shift,
-			    sam_read_group_id,invert_second_p,invert_first_p);
+			    /*mate_chrnum*/0,/*mate_chrpos_low*/0U,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
+			    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 
 	if (fp_failedinput_1 != NULL) {
 	  Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2);
@@ -4435,8 +3385,10 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 
 	  hit5 = Stage3pair_hit5(stage3pair);
 	  hit3 = Stage3pair_hit3(stage3pair);
-	  chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
-	  chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
+	  chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
+					    hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+	  chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
+					    hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
 	  /* print first end */
 	  SAM_print(fp,fp_failedinput_1,ABBREV_PAIRED_MULT,
@@ -4444,11 +3396,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		    Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
 		    Stage3pair_mapq_score(stage3pair),chromosome_iit,
 		    /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		    Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		    hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+		    Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		    /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		    /*hardclip_low*/hardclip5_low,/*hardclip_high*/hardclip5_high,
+		    /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 		    resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p,
-		    merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	  /* print second end */
 	  SAM_print(fp,fp_failedinput_2,ABBREV_PAIRED_MULT,
@@ -4456,11 +3409,12 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		    Stage3pair_absmq_score(stage3pair),first_absmq,second_absmq,
 		    Stage3pair_mapq_score(stage3pair),chromosome_iit,
 		    /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		    Stage3pair_pairlength(stage3pair),/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		    hardclip5_low,hardclip5_high,hardclip3_low,hardclip3_high,
+		    Stage3pair_pairlength(stage3pair),/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		    /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		    /*hardclip_low*/hardclip3_low,/*hardclip_high*/hardclip3_high,
+		    /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 		    resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_primary + npaths_altloc,
-		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p,
-		    merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	}
       }
 
@@ -4473,8 +3427,10 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 
       hit5 = stage3array1[0];
       hit3 = stage3array2[0];
-      chrpos5 = SAM_compute_chrpos(hardclip5_low,hardclip5_high,hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
-      chrpos3 = SAM_compute_chrpos(hardclip3_low,hardclip3_high,hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
+      chrpos_low_5 = SAM_compute_chrpos(&chrnum5,hardclip5_low,hardclip5_high,
+					hit5,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+      chrpos_low_3 = SAM_compute_chrpos(&chrnum3,hardclip3_low,hardclip3_high,
+					hit3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
       if (Stage3end_circularpos(hit5) > 0 || Stage3end_circularpos(hit3) > 0) {
 	Filestring_set_split_output(fp,OUTPUT_UC);
@@ -4491,10 +3447,11 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		Stage3end_absmq_score(stage3array1[0]),first_absmq1,/*second_absmq*/0,
 		Stage3end_mapq_score(stage3array1[0]),chromosome_iit,
 		/*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		/*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		/*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		/*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		/*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/1,quality_shift,sam_read_group_id,
-		invert_first_p,invert_second_p,merge_samechr_p);
+		invert_first_p,invert_second_p);
 
       /* Note: Do not act on add_paired_nomappers_p, since the two reads are artificially paired up already */
 
@@ -4505,10 +3462,11 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		Stage3end_absmq_score(stage3array2[0]),first_absmq2,/*second_absmq*/0,
 		Stage3end_mapq_score(stage3array2[0]),chromosome_iit,
 		/*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		/*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		/*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		/*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		/*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		/*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/1,quality_shift,sam_read_group_id,
-		invert_second_p,invert_first_p,merge_samechr_p);
+		invert_second_p,invert_first_p);
 
     } else if (resulttype == UNPAIRED_MULT || resulttype == UNPAIRED_TRANSLOC) {
       if (resulttype == UNPAIRED_MULT) {
@@ -4562,32 +3520,36 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	for (pathnum = 1; pathnum <= npaths1_primary + npaths1_altloc &&
 	       pathnum <= npaths2_primary + npaths2_altloc && pathnum <= maxpaths_report; pathnum++) {
 	  /* hardclip5_low = hardclip5_high = 0; */
-	  chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,/*stage3*/stage3array1[pathnum-1],
-				       Shortread_fulllength(queryseq1),/*first_read_p*/true);
+	  chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
+					    /*stage3*/stage3array1[pathnum-1],
+					    Shortread_fulllength(queryseq1),/*first_read_p*/true);
 
 	  /* hardclip3_low = hardclip3_high = 0; */
-	  chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,/*stage3*/stage3array2[pathnum-1],
-				       Shortread_fulllength(queryseq2),/*first_read_p*/false);
+	  chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
+					    /*stage3*/stage3array2[pathnum-1],
+					    Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
 	  stage3 = stage3array1[pathnum-1];
 	  SAM_print(fp,fp_failedinput_1,abbrev,stage3,/*mate*/stage3array2[pathnum-1],acc1,acc2,pathnum,
 		    npaths_primary_max,npaths_altloc_max,Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
 		    Stage3end_mapq_score(stage3),chromosome_iit,
 		    /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		    /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		    /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		    /*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		    /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		    /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		    resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
-		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	  stage3 = stage3array2[pathnum-1];
 	  SAM_print(fp,fp_failedinput_2,abbrev,stage3,/*mate*/stage3array1[pathnum-1],acc1,acc2,pathnum,
 		    npaths_primary_max,npaths_altloc_max,Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
 		    Stage3end_mapq_score(stage3),chromosome_iit,
 		    /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		    /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		    /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		    /*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		    /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		    /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		    resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
-		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p,merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	}
 
 	/* Print remaining results with non-mappers */
@@ -4595,23 +3557,27 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	  for ( ; pathnum <= npaths1_primary + npaths1_altloc && pathnum <= maxpaths_report; pathnum++) {
 	    stage3 = stage3array1[pathnum-1];
 	    /* hardclip5_low = hardclip5_high = 0; */
-	    chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,
-					 Shortread_fulllength(queryseq1),/*first_read_p*/true);
-	    chrpos3 = 0;
+	    chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
+					      stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+	    chrnum3 = 0;
+	    chrpos_low_3 = 0;
 
 	    SAM_print(fp,fp_failedinput_1,abbrev,stage3,/*mate*/NULL,acc1,acc2,pathnum,
 		      npaths_primary_max,npaths_altloc_max,Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
 		      Stage3end_mapq_score(stage3),chromosome_iit,
 		      /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		      /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		      /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		      /*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		      /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		      resulttype,/*first_read_p*/true,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
-		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	    /* matching nomapper for second end */
-	    SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+	    SAM_print_nomapping(fp,abbrev,queryseq2,/*queryseq_mate*/queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
 				resulttype,/*first_read_p*/false,pathnum,npaths_primary_max,npaths_altloc_max,
-				/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos5,
+				/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
+				/*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+				/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 				quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	  }
 
@@ -4619,24 +3585,28 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	  for ( ; pathnum <= npaths2_primary + npaths2_altloc && pathnum <= maxpaths_report; pathnum++) {
 	    stage3 = stage3array2[pathnum-1];
 	    /* hardclip3_low = hardclip3_high = 0; */
-	    chrpos5 = 0;
-	    chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,
-					 Shortread_fulllength(queryseq2),/*first_read_p*/false);
+	    chrnum5 = 0;
+	    chrpos_low_5 = 0;
+	    chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
+					      stage3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
 	    /* matching nomapper for first end */
-	    SAM_print_nomapping(fp,abbrev,queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
+	    SAM_print_nomapping(fp,abbrev,queryseq1,/*queryseq_mate*/queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
 				resulttype,/*first_read_p*/true,pathnum,npaths_primary_max,npaths_altloc_max,
-				/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos3,
+				/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
+				/*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+				/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 				quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	    SAM_print(fp,fp_failedinput_2,abbrev,stage3,/*mate*/NULL,acc1,acc2,pathnum,
 		      npaths_primary_max,npaths_altloc_max,Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
 		      Stage3end_mapq_score(stage3),chromosome_iit,
 		      /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		      /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		      /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		      /*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		      /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		      resulttype,/*first_read_p*/false,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
-		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p,merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	  }
 	}
 
@@ -4644,106 +3614,120 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	/* print first end results */
 	if (npaths2_primary + npaths2_altloc == 0) {
 	  mate = (Stage3end_T) NULL;
-	  chrpos3 = 0U;
+	  chrnum3 = 0;
+	  chrpos_low_3 = 0U;
 	} else if (quiet_if_excessive_p && npaths2_primary + npaths2_altloc > maxpaths_report) {
 	  mate = (Stage3end_T) NULL;
-	  chrpos3 = 0U;
+	  chrnum3 = 0;
+	  chrpos_low_3 = 0U;
 	} else {
 	  mate = stage3array2[0];
 	  hardclip3_low = hardclip3_high = 0;
-	  chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq2),/*first_read_p*/false);
+	  chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
+					    mate,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 	}
 
 	if (npaths1_primary + npaths1_altloc == 1) {
 	  stage3 = stage3array1[0];
 	  hardclip5_low = hardclip5_high = 0;
-	  chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
+	  chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
+					    stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
 
 	  SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1_primary,npaths1_altloc,
 		    Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
 		    Stage3end_mapq_score(stage3),chromosome_iit,
 		    /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		    /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		    /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		    /*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		    /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		    /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		    resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
-		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	} else if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report) {
 	  /* Just printing one end as nomapping */
-	  SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,
+	  SAM_print_nomapping(fp,abbrev,queryseq1,/*queryseq_mate*/queryseq2,mate,acc1,acc2,chromosome_iit,
 			      resulttype,/*first_read_p*/true,/*pathnum*/1,npaths1_primary,npaths1_altloc,
-			      /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,/*mate_chrpos*/chrpos3,
+			      /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
+			      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+			      /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 			      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 
 	} else {
 	  for (pathnum = 1; pathnum <= npaths1_primary + npaths1_altloc && pathnum <= maxpaths_report; pathnum++) {
 	    stage3 = stage3array1[pathnum-1];
 	    hardclip5_low = hardclip5_high = 0;
-	    chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,stage3,Shortread_fulllength(queryseq1),
-					 /*first_read_p*/true);
+	    chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/hardclip5_high,
+					      stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
 	    
 	    SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1_primary,npaths1_altloc,
 		      Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
 		      Stage3end_mapq_score(stage3),chromosome_iit,
 		      /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		      /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		      /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		      /*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		      /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		      resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
-		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	  }
 	}
 			  
 	/* print second end results */
 	if (npaths1_primary + npaths1_altloc == 0) {
 	  mate = (Stage3end_T) NULL;
-	  chrpos5 = 0U;
+	  chrnum5 = 0;
+	  chrpos_low_5 = 0U;
 	} else if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report) {
 	  mate = (Stage3end_T) NULL;
-	  chrpos5 = 0U;
+	  chrnum5 = 0;
+	  chrpos_low_5 = 0U;
 	} else {
 	  mate = stage3array1[0];
 	  hardclip5_low = hardclip5_high = 0;
-	  chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq1),
-				       /*first_read_p*/true);
+	  chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
+					    mate,Shortread_fulllength(queryseq1),/*first_read_p*/true);
 	}
 
 	if (npaths2_primary + npaths2_altloc == 1) {
 	  stage3 = stage3array2[0];
 	  hardclip3_low = hardclip3_high = 0;
-	  chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
-				       /*first_read_p*/false);
+	  chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
+					    stage3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 	  
 	  SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2_primary,npaths2_altloc,
 		    Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
 		    Stage3end_mapq_score(stage3),chromosome_iit,
 		    /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		    /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		    /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		    /*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		    /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		    /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		    resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
-		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p,merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	  
 	} else if (quiet_if_excessive_p && npaths2_primary + npaths2_altloc > maxpaths_report) {
 	  /* Just printing one end as nomapping */
-	  SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,
+	  SAM_print_nomapping(fp,abbrev,queryseq2,/*queryseq_mate*/queryseq1,mate,acc1,acc2,chromosome_iit,
 			      resulttype,/*first_read_p*/false,/*pathnum*/1,npaths2_primary,npaths2_altloc,
-			      /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,/*mate_chrpos*/chrpos5,
+			      /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
+			      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+			      /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 			      quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	  
 	} else {
 	  for (pathnum = 1; pathnum <= npaths2_primary + npaths2_altloc && pathnum <= maxpaths_report; pathnum++) {
 	    stage3 = stage3array2[pathnum-1];
 	    hardclip3_low = hardclip3_high = 0;
-	    chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
-					 /*first_read_p*/false);
+	    chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
+					      stage3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
 	    SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2_primary,npaths2_altloc,
 		      Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
 		      Stage3end_mapq_score(stage3),chromosome_iit,
 		      /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		      /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		      /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		      /*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		      /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		      resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
-		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p,merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	  }
 	}
       }
@@ -4807,15 +3791,17 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
       /* print first end results */
       if (npaths2_primary + npaths2_altloc == 0) {
 	mate = (Stage3end_T) NULL;
-	chrpos3 = 0U;
+	chrnum3 = 0;
+	chrpos_low_3 = 0U;
       } else if (quiet_if_excessive_p && npaths2_primary + npaths2_altloc > maxpaths_report) {
 	mate = (Stage3end_T) NULL;
-	chrpos3 = 0U;
+	chrnum3 = 0;
+	chrpos_low_3 = 0U;
       } else {
 	mate = stage3array2[0];
 	hardclip3_low = hardclip3_high = 0;
-	chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq2),
-				     /*first_read_p*/false);
+	chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
+					  mate,Shortread_fulllength(queryseq2),/*first_read_p*/false);
       }
 
       if (npaths1_primary + npaths1_altloc == 0) {
@@ -4824,9 +3810,11 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	if (add_paired_nomappers_p == true) {
 	  /* Handle nomappers with each mapped mate */
 	} else {
-	  SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
+	  SAM_print_nomapping(fp,abbrev,queryseq1,/*queryseq_mate*/queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
 			      /*first_read_p*/true,/*pathnum*/0,npaths1_primary,npaths1_altloc,
-			      /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,/*mate_chrpos*/chrpos3,
+			      /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
+			      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+			      /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 			      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	}
 
@@ -4835,8 +3823,8 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 
 	stage3 = stage3array1[0];
 	hardclip5_low = hardclip5_high = 0;
-	chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
-				     /*first_read_p*/true);
+	chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
+					  stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
 
 	if (add_paired_nomappers_p == true) {
 	  /* matching nomapper for second end */
@@ -4845,23 +3833,27 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 		    Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
 		    Stage3end_mapq_score(stage3),chromosome_iit,
 		    /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		    /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		    /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		    /*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		    /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		    /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		    resulttype,/*first_read_p*/true,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
-		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
-	  SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
+	  SAM_print_nomapping(fp,abbrev,queryseq2,/*queryseq_mate*/queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
 			      resulttype,/*first_read_p*/false,/*pathnum*/1,npaths1_primary,npaths1_altloc,
-			      /*artificial_mate_p*/false,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos5,
+			      /*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
+			      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+			      /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 			      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	} else {
 	  SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths1_primary,npaths1_altloc,
 		    Stage3end_absmq_score(stage3),first_absmq1,/*second_absmq1*/0,
 		    Stage3end_mapq_score(stage3),chromosome_iit,
 		    /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		    /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		    /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		    /*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		    /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		    /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		    resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
-		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	}
 
       } else if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report) {
@@ -4870,9 +3862,11 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	if (add_paired_nomappers_p == true) {
 	  /* Handle nomappers with each mapped mate */
 	} else {
-	  SAM_print_nomapping(fp,abbrev,queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
+	  SAM_print_nomapping(fp,abbrev,queryseq1,/*queryseq_mate*/queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
 			      /*first_read_p*/true,/*pathnum*/1,npaths1_primary,npaths1_altloc,
-			      /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,/*mate_chrpos*/chrpos3,
+			      /*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
+			      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+			      /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 			      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	}
 
@@ -4881,34 +3875,38 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	for (pathnum = 1; pathnum <= npaths1_primary + npaths1_altloc && pathnum <= maxpaths_report; pathnum++) {
 	  stage3 = stage3array1[pathnum-1];
 	  hardclip5_low = hardclip5_high = 0;
-	  chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1),
-				       /*first_read_p*/true);
+	  chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
+					    stage3,Shortread_fulllength(queryseq1),/*first_read_p*/true);
 
 	  if (add_paired_nomappers_p == true) {
 	    /* matching nomapper for second end */
 	    npaths_max = npaths1_primary + npaths1_altloc; /* since npaths2 == 0 */
-	    SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+	    SAM_print_nomapping(fp,abbrev,queryseq2,/*queryseq_mate*/queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
 				resulttype,/*first_read_p*/false,pathnum,
-				npaths1_primary,npaths1_altloc,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos5,
+				npaths1_primary,npaths1_altloc,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
+				/*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+				/*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 				quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	    SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1_primary,npaths1_altloc,
 		      Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
 		      Stage3end_mapq_score(stage3),chromosome_iit,
 		      /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		      /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		      /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		      /*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		      /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		      resulttype,/*first_read_p*/true,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,quality_shift,sam_read_group_id,
-		      invert_first_p,invert_second_p,merge_samechr_p);
+		      invert_first_p,invert_second_p);
 
 	  } else {
 	    SAM_print(fp,fp_failedinput_1,abbrev,stage3,mate,acc1,acc2,pathnum,npaths1_primary,npaths1_altloc,
 		      Stage3end_absmq_score(stage3),first_absmq1,second_absmq1,
 		      Stage3end_mapq_score(stage3),chromosome_iit,
 		      /*queryseq*/queryseq1,/*queryseq_mate*/queryseq2,
-		      /*pairedlength*/0U,/*chrpos*/chrpos5,/*mate_chrpos*/chrpos3,
-		      /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		      /*pairedlength*/0U,/*chrnum*/chrnum5,/*chrpos*/chrpos_low_5,
+		      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+		      /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		      resulttype,/*first_read_p*/true,/*artificial_mate_p*/false,/*npaths_mate*/npaths2_primary + npaths2_altloc,
-		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p,merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	  }
 	}
       }
@@ -4916,15 +3914,17 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
       /* print second end results */
       if (npaths1_primary + npaths1_altloc == 0) {
 	mate = (Stage3end_T) NULL;
-	chrpos5 = 0U;
+	chrnum5 = 0;
+	chrpos_low_5 = 0U;
       } else if (quiet_if_excessive_p && npaths1_primary + npaths1_altloc > maxpaths_report) {
 	mate = (Stage3end_T) NULL;
-	chrpos5 = 0U;
+	chrnum5 = 0;
+	chrpos_low_5 = 0U;
       } else {
 	mate = stage3array1[0];
 	hardclip5_low = hardclip5_high = 0;
-	chrpos5 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,mate,Shortread_fulllength(queryseq1),
-				     /*first_read_p*/true);
+	chrpos_low_5 = SAM_compute_chrpos(&chrnum5,/*hardclip_low*/0,/*hardclip_high*/0,
+					  mate,Shortread_fulllength(queryseq1),/*first_read_p*/true);
       }
 
       if (npaths2_primary + npaths2_altloc == 0) {
@@ -4933,9 +3933,11 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	if (add_paired_nomappers_p == true) {
 	  /* Handle nomappers with each mapped mate */
 	} else {
-	  SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
+	  SAM_print_nomapping(fp,abbrev,queryseq2,/*queryseq_mate*/queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
 			      /*first_read_p*/false,/*pathnum*/0,npaths2_primary,npaths2_altloc,
-			      /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,/*mate_chrpos*/chrpos5,
+			      /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
+			      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+			      /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 			      quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	}
 
@@ -4944,34 +3946,38 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 
 	stage3 = stage3array2[0];
 	hardclip3_low = hardclip3_high = 0;
-	chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
-				     /*first_read_p*/false);
+	chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
+					  stage3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
 	if (add_paired_nomappers_p == true) {
 	  /* matching nomapper for first end */
 	  npaths_max = npaths2_primary + npaths2_altloc; /* since npaths1_primary + npaths1_altloc == 0 */
-	  SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+	  SAM_print_nomapping(fp,abbrev,queryseq2,/*queryseq_mate*/queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
 			      resulttype,/*first_read_p*/true,/*pathnum*/1,
-			      npaths2_primary,npaths2_altloc,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos3,
+			      npaths2_primary,npaths2_altloc,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
+			      /*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+			      /*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 			      quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	  SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2_primary,npaths2_altloc,
 		    Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
 		    Stage3end_mapq_score(stage3),chromosome_iit,
 		    /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		    /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		    /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		    /*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		    /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		    /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		    resulttype,/*first_read_p*/false,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
-		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p,merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 
 	} else {
 	  SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,/*pathnum*/1,npaths2_primary,npaths2_altloc,
 		    Stage3end_absmq_score(stage3),first_absmq2,/*second_absmq2*/0,
 		    Stage3end_mapq_score(stage3),chromosome_iit,
 		    /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		    /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		    /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		    /*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		    /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		    /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		    resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
-		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p,merge_samechr_p);
+		    quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	}
 
       } else if (quiet_if_excessive_p && npaths2_primary + npaths2_altloc > maxpaths_report) {
@@ -4980,9 +3986,11 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	if (add_paired_nomappers_p == true) {
 	  /* Handle nomappers with each mapped mate */
 	} else {
-	  SAM_print_nomapping(fp,abbrev,queryseq2,mate,acc1,acc2,chromosome_iit,resulttype,
+	  SAM_print_nomapping(fp,abbrev,queryseq2,/*queryseq_mate*/queryseq1,mate,acc1,acc2,chromosome_iit,resulttype,
 			      /*first_read_p*/false,/*pathnum*/1,npaths2_primary,npaths2_altloc,
-			      /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,/*mate_chrpos*/chrpos5,
+			      /*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
+			      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+			      /*mate_hardclip_low*/hardclip5_low,/*mate_hardclip_high*/hardclip5_high,
 			      quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	}
 
@@ -4991,34 +3999,38 @@ SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T f
 	for (pathnum = 1; pathnum <= npaths2_primary + npaths2_altloc && pathnum <= maxpaths_report; pathnum++) {
 	  stage3 = stage3array2[pathnum-1];
 	  hardclip3_low = hardclip3_high = 0;
-	  chrpos3 = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq2),
-				       /*first_read_p*/false);
+	  chrpos_low_3 = SAM_compute_chrpos(&chrnum3,/*hardclip_low*/0,/*hardclip_high*/0,
+					    stage3,Shortread_fulllength(queryseq2),/*first_read_p*/false);
 
 	  if (add_paired_nomappers_p == true) {
 	    /* matching nomapper for first end */
 	    npaths_max = npaths2_primary + npaths2_altloc; /* since npaths1_primary + npaths1_altloc == 0 */
-	    SAM_print_nomapping(fp,abbrev,queryseq2,/*mate*/stage3,acc1,acc2,chromosome_iit,
+	    SAM_print_nomapping(fp,abbrev,queryseq2,/*queryseq_mate*/queryseq1,/*mate*/stage3,acc1,acc2,chromosome_iit,
 				resulttype,/*first_read_p*/true,pathnum,
-				npaths2_primary,npaths2_altloc,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,/*mate_chrpos*/chrpos3,
+				npaths2_primary,npaths2_altloc,/*artificial_mate_p*/false,/*npaths_mate*/npaths_max,
+				/*mate_chrnum*/chrnum3,/*mate_chrpos_low*/chrpos_low_3,
+				/*mate_hardclip_low*/hardclip3_low,/*mate_hardclip_high*/hardclip3_high,
 				quality_shift,sam_read_group_id,invert_first_p,invert_second_p);
 	    SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2_primary,npaths2_altloc,
 		      Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
 		      Stage3end_mapq_score(stage3),chromosome_iit,
 		      /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		      /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		      /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		      /*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		      /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		      resulttype,/*first_read_p*/false,/*artificial_mate_p*/true,/*npaths_mate*/npaths_max,
-		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p,merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 
 	  } else {
 	    SAM_print(fp,fp_failedinput_2,abbrev,stage3,mate,acc1,acc2,pathnum,npaths2_primary,npaths2_altloc,
 		      Stage3end_absmq_score(stage3),first_absmq2,second_absmq2,
 		      Stage3end_mapq_score(stage3),chromosome_iit,
 		      /*queryseq*/queryseq2,/*queryseq_mate*/queryseq1,
-		      /*pairedlength*/0U,/*chrpos*/chrpos3,/*mate_chrpos*/chrpos5,
-		      /*hardclip5_low*/0,/*hardclip5_high*/0,/*hardclip3_low*/0,/*hardclip3_high*/0,
+		      /*pairedlength*/0U,/*chrnum*/chrnum3,/*chrpos*/chrpos_low_3,
+		      /*mate_chrnum*/chrnum5,/*mate_chrpos_low*/chrpos_low_5,
+		      /*hardclip_low*/0,/*hardclip_high*/0,/*mate_hardclip_low*/0,/*mate_hardclip_high*/0,
 		      resulttype,/*first_read_p*/false,/*artificial_mate_p*/false,/*npaths_mate*/npaths1_primary + npaths1_altloc,
-		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p,merge_samechr_p);
+		      quality_shift,sam_read_group_id,invert_second_p,invert_first_p);
 	  }
 	}
       }
diff --git a/src/samprint.h b/src/samprint.h
index 8b6c484..bea98fe 100644
--- a/src/samprint.h
+++ b/src/samprint.h
@@ -1,4 +1,4 @@
-/* $Id: samprint.h 184433 2016-02-17 20:00:43Z twu $ */
+/* $Id: samprint.h 207323 2017-06-14 19:39:31Z twu $ */
 #ifndef SAMPRINT_INCLUDED
 #define SAMPRINT_INCLUDED
 
@@ -13,6 +13,7 @@
 #include "filestring.h"
 
 #ifdef GSNAP
+#include "chrnum.h"
 #include "shortread.h"
 #include "stage3hr.h"
 #include "resulthr.h"
@@ -23,42 +24,54 @@
 #ifdef GSNAP
 extern void
 SAM_setup (bool add_paired_nomappers_p_in, bool paired_flag_means_concordant_p_in,
+	   bool omit_concordant_uniq_p_in, bool omit_concordant_mult_p_in, 
 	   bool quiet_if_excessive_p_in, int maxpaths_report_in,
 	   char *failedinput_root_in, bool fastq_format_p_in, bool hide_soft_clips_p_in,
-	   bool clip_overlap_p_in, bool merge_overlap_p_in, bool sam_multiple_primaries_p_in,
+	   bool clip_overlap_p_in, bool merge_overlap_p_in, bool merge_samechr_p_in, bool sam_multiple_primaries_p_in,
 	   bool force_xs_direction_p_in, bool md_lowercase_variant_p_in, IIT_T snps_iit_in,
-	   Univ_IIT_T chromosome_iit_in, Genome_T genome_in);
+	   bool find_dna_chimeras_p_in,IIT_T splicing_iit_in, int donor_typeint_in, int acceptor_typeint_in,
+	   bool transcript_splicing_p_in, IIT_T genestruct_iit_in, Univ_IIT_T chromosome_iit_in, Genome_T genome_in);
 
 extern Chrpos_T
-SAM_compute_chrpos (int hardclip_low, int hardclip_high, Stage3end_T this, int querylength,
-		    bool first_read_p);
+SAM_compute_chrpos (Chrnum_T *chrnum, int hardclip_low, int hardclip_high,
+		    Stage3end_T this, int querylength, bool first_read_p);
 
 extern unsigned int
 SAM_compute_flag (bool plusp, Stage3end_T mate, Resulttype_T resulttype,
 		  bool first_read_p, int pathnum, int npaths, bool artificial_mate_p, int npaths_mate,
-		  int absmq_score, int first_absmq, bool invertp, bool invert_mate_p);
+		  int absmq_score, int first_absmq, bool invertp, bool invert_mate_p, bool supplementaryp);
 
 extern void
-SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Stage3end_T mate, char *acc1, char *acc2,
+SAM_print_mate_cigar (Filestring_T fp, Stage3end_T mate, int mate_querylength, int mate_hardclip_low, int mate_hardclip_high);
+
+extern void
+SAM_print_pairs_nomapping (Filestring_T fp, char *abbrev, char *acc1, char *acc2, char *queryseq_ptr,
+			   char *quality_string, int querylength, int quality_shift,
+			   bool first_read_p, bool sam_paired_p, char *sam_read_group_id);
+
+extern void
+SAM_print_nomapping (Filestring_T fp, char *abbrev, Shortread_T queryseq, Shortread_T queryseq_mate,
+		     Stage3end_T mate, char *acc1, char *acc2,
 		     Univ_IIT_T chromosome_iit, Resulttype_T resulttype, bool first_read_p,
 		     int pathnum, int npaths_primary, int npaths_altloc, bool artificial_mate_p, int npaths_mate,
-		     Chrpos_T mate_chrpos, int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p);
+		     Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low, int mate_hardclip_low, int mate_hardclip_high,
+		     int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p);
 
 extern void
 SAM_print (Filestring_T fp, Filestring_T fp_failedinput, char *abbrev,
 	   Stage3end_T this, Stage3end_T mate, char *acc1, char *acc2, int pathnum, int npaths_primary, int npaths_altloc,
-	   int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit, Shortread_T queryseq,
-	   Shortread_T queryseq2, int pairedlength, Chrpos_T chrpos, Chrpos_T mate_chrpos,
-	   int hardclip5_low, int hardclip5_high, int hardclip3_low, int hardclip3_high,
-	   Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate, int quality_shift,
-	   char *sam_read_group_id, bool invertp, bool invert_mate_p, bool merge_samechr_p);
+	   int absmq_score, int first_absmq, int second_absmq, int mapq_score, Univ_IIT_T chromosome_iit,
+	   Shortread_T queryseq, Shortread_T queryseq_mate, int pairedlength,
+	   Chrnum_T chrnum, Chrpos_T chrpos, Chrnum_T mate_chrnum, Chrpos_T mate_chrpos_low,
+	   int hardclip_low, int hardclip_high, int mate_hardclip_low, int mate_hardclip_high,
+	   Resulttype_T resulttype, bool first_read_p, bool artificial_mate_p, int npaths_mate,
+	   int quality_shift, char *sam_read_group_id, bool invertp, bool invert_mate_p);
 
 extern void
 SAM_print_paired (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2,
 		  Result_T result, Resulttype_T resulttype, Univ_IIT_T chromosome_iit,
 		  Shortread_T queryseq1, Shortread_T queryseq2, bool invert_first_p, bool invert_second_p,
-		  bool nofailsp, bool failsonlyp, bool merge_samechr_p,
-		  int quality_shift, char *sam_read_group_id);
+		  bool nofailsp, bool failsonlyp, int quality_shift, char *sam_read_group_id);
 #endif
 
 #endif
diff --git a/src/sarray-read.c b/src/sarray-read.c
index c69089f..5f3e617 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 207324 2017-06-14 19:41:18Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -26,78 +26,30 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #include "assert.h"
 #include "access.h"
 #include "types.h"
-#include "listdef.h"
-#include "list.h"
+#include "genomicpos.h"
 #include "genome128_hr.h"
-#include "splice.h"
-#include "indel.h"
-#include "stage3hr.h"
 #include "bytecoding.h"
 #include "bitpack64-read.h"
 #include "bitpack64-readtwo.h"
 #include "bitpack64-access.h"
 
-#include "comp.h"
-#include "diagdef.h"
-#include "diag.h"
-#include "univdiagdef.h"
-#include "univdiag.h"
-#include "substring.h"
-#include "junction.h"
-#include "stage3hr.h"
-#include "sedgesort.h"
+#include "sarray-read.h"
 
 
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
-#else
+#ifdef USE_CSA
+
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
 #include <emmintrin.h>
 #endif
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSSE3)
-#else
-#include <tmmintrin.h>
-#endif
-#if defined(WORDS_BIGENDIAN) || !defined(HAVE_AVX2)
-#else
-#include <immintrin.h>
-#endif
 
-#if !defined(HAVE_SSE4_2)
-/* Skip popcnt */
-#elif defined(HAVE_POPCNT)
-#include <immintrin.h>
-#elif defined(HAVE_MM_POPCNT)
-#include <nmmintrin.h>
 #endif
 
 
-#define MIN_ENDLENGTH 12
-#define MIN_INTRONLEN 9
-
-#define MAX_HITS_FOR_BEST_ELT 1000
-
-/* A value of 10000 misses various splices, although they are caught by GSNAP algorithm */
-#define EXCESS_SARRAY_HITS 100000
-#define LOCALSPLICING_NMATCHES_SLOP 1
-#define LOCALSPLICING_PROB_SLOP 0.05
-
-#define USE_SHUFFLE_MASK 1	/* Alternative requires AVX, and that part of the code isn't called much */
-/* #define REQUIRE_ALIGNMENT 1 */
-
-#define GUESS_ALLOCATION 10
-
-/* #define USE_SEPARATE_BUCKETS 1 */
-
-/* Results of each suffix array search */
-#ifdef DEBUG
-#define debug(x) x
-#else
-#define debug(x)
-#endif
-
 #define MAX_DEBUG1_HITS 100
 
 /* Details of suffix array search */
 #ifdef DEBUG1
+#include "genome.h"
 #define debug1(x) x
 #else
 #define debug1(x)
@@ -138,70 +90,6 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #define debug3b(x)
 #endif
 
-/* known splicing */
-#ifdef DEBUG4S
-#define debug4s(x) x
-#else
-#define debug4s(x)
-#endif
-
-/* find_multimiss_iter */
-#ifdef DEBUG7
-#define debug7(x) x
-#else
-#define debug7(x)
-#endif
-
-/* find_multimiss_iter details */
-#ifdef DEBUG7A
-#define debug7a(x) x
-#else
-#define debug7a(x)
-#endif
-
-/* SIMD new filtering */
-#ifdef DEBUG7B
-#define debug7b(x) x
-#else
-#define debug7b(x)
-#endif
-
-
-/* Comparing SIMD with non-SIMD */
-#ifdef DEBUG8
-#define debug8(x) x
-#else
-#define debug8(x)
-#endif
-
-/* binary_search */
-#ifdef DEBUG10
-#define debug10(x) x
-#else
-#define debug10(x)
-#endif
-
-/* Sorting of diagonals */
-#ifdef DEBUG12
-#define debug12(x) x
-#else
-#define debug12(x)
-#endif
-
-/* GMAP */
-#ifdef DEBUG13
-#define debug13(x) x
-#else
-#define debug13(x)
-#endif
-
-/* Oligoindex fillin */
-#ifdef DEBUG14
-#define debug14(x) x
-#else
-#define debug14(x)
-#endif
-
 /* Compare separate buckets with a single one */
 #ifdef DEBUG15
 #define debug15(x) x
@@ -210,47 +98,6 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #endif
 
 
-#ifdef DEBUG7B
-static void
-print_vector_hex (__m128i x) {
-  UINT4 *s = (UINT4 *) &x;
-
-  /* printf("%08X %08X %08X %08X\n",s[0],s[1],s[2],s[3]); */
-  printf("%08X %08X %08X %08X\n",s[3],s[2],s[1],s[0]);
-  return;
-}
-
-static void
-print_vector_uint (__m128i x) {
-  UINT4 *s = (UINT4 *) &x;
-
-  /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */
-  printf("%u %u %u %u\n",s[3],s[2],s[1],s[0]);
-  return;
-}
-
-#ifdef HAVE_AVX2
-static void
-print_vector_hex_256 (__m256i x) {
-  UINT4 *s = (UINT4 *) &x;
-
-  /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */
-  printf("%08X %08X %08X %08X %08X %08X %08X %08X\n",s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]);
-  return;
-}
-
-static void
-print_vector_uint_256 (__m256i x) {
-  UINT4 *s = (UINT4 *) &x;
-
-  /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */
-  printf("%u %u %u %u %u %u %u %u\n",s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]);
-  return;
-}
-#endif
-#endif
-
-
 
 #define T Sarray_T
 struct T {
@@ -290,7 +137,7 @@ struct T {
 #endif
 
   int indexsize;
-  UINT4 indexspace;		/* 4^indexsize.  Used by sarray_search to detect when we have a poly-T oligo shorter than indexsize */
+  UINT4 indexspace;		/* 4^indexsize.  Used by sarray_read to detect when we have a poly-T oligo shorter than indexsize */
 #ifdef DEBUG15
   UINT4 *indexi_ptrs, *indexi_comp, *indexj_ptrs, *indexj_comp; /* bucket array: oligomer lookup into suffix array */
   UINT4 *indexij_ptrs, *indexij_comp;
@@ -336,43 +183,6 @@ Sarray_size (Sarray_T this) {
 }
 
 
-static Sarray_T sarray_fwd;
-static Sarray_T sarray_rev;
-static Genome_T genome;
-static bool *circularp;
-
-static char conversion_fwd[128];
-static char conversion_rev[128];
-
-static Univ_IIT_T chromosome_iit;
-static int circular_typeint;
-static int splicing_penalty;
-
-static Chrpos_T overall_max_distance;
-static Chrpos_T shortsplicedist;
-static Chrpos_T max_deletionlen;
-static Chrpos_T max_insertionlen_default;
-static int max_end_deletions;
-static int max_middle_insertions_default;
-
-/* Splicing */
-static Univcoord_T *splicesites;
-static Splicetype_T *splicetypes;
-static Chrpos_T *splicedists;
-static int nsplicesites;
-
-
-#if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
-static __m256i epi32_convert_256;	/* For converting unsigned ints to signed ints */
-#endif
-
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-static __m128i epi32_convert;	/* For converting unsigned ints to signed ints */
-#endif
-
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN) && defined(USE_SHUFFLE_MASK)
-static __m128i shuffle_mask16[16];
-#endif
 
 
 #if 0
@@ -444,108 +254,6 @@ sarray_search_char (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char desired_ch
 #endif
 
 
-void
-Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
-	      Univ_IIT_T chromosome_iit_in, int circular_typeint_in, bool *circularp_in,
-	      Chrpos_T shortsplicedist_in, int splicing_penalty_in,
-	      int max_deletionlength, int max_end_deletions_in,
-	      int max_middle_insertions_in, int max_end_insertions,
-	      Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
-	      Chrpos_T *splicedists_in, int nsplicesites_in) {
-  int i;
-
-  sarray_fwd = sarray_fwd_in;
-  sarray_rev = sarray_rev_in;
-  genome = genome_in;
-  circularp = circularp_in;
-
-  for (i = 0; i < 128; i++) {
-    conversion_fwd[i] = i;
-    conversion_rev[i] = i;
-  }
-  if (mode == STANDARD) {
-    /* Don't change conversion */
-  } else if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
-    conversion_fwd['C'] = 'T';	/* CT */
-    conversion_rev['G'] = 'A';	/* GA */
-  } else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
-    conversion_fwd['A'] = 'G';	/* AG */
-    conversion_rev['T'] = 'C';	/* TC */
-  } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
-    conversion_fwd['T'] = 'C';	/* TC */
-    conversion_rev['A'] = 'G';	/* AG */
-  }
-
-  chromosome_iit = chromosome_iit_in;
-  circular_typeint = circular_typeint_in;
-  shortsplicedist = shortsplicedist_in;
-  splicing_penalty = splicing_penalty_in;
-
-  max_deletionlen = max_deletionlength;
-  max_end_deletions = max_end_deletions_in;
-  max_middle_insertions_default = max_middle_insertions_in;
-  if (max_middle_insertions_in > max_end_insertions) {
-    max_insertionlen_default = max_middle_insertions_in;
-  } else {
-    max_insertionlen_default = max_end_insertions;
-  }
-
-  if (shortsplicedist > max_deletionlen) {
-    overall_max_distance = shortsplicedist;
-  } else {
-    overall_max_distance = max_deletionlen;
-  }
-
-  splicesites = splicesites_in;
-  splicetypes = splicetypes_in;
-  splicedists = splicedists_in;
-  nsplicesites = nsplicesites_in;
-
-#if 0
-  sarray_search_char(&(sarray->initindexi[0]),&(sarray->initindexj[0]),/*desired_char*/'A',sarray->array,sarray->n);
-  sarray_search_char(&(sarray->initindexi[1]),&(sarray->initindexj[1]),/*desired_char*/'C',sarray->array,sarray->n);
-  sarray_search_char(&(sarray->initindexi[2]),&(sarray->initindexj[2]),/*desired_char*/'G',sarray->array,sarray->n);
-  sarray_search_char(&(sarray->initindexi[3]),&(sarray->initindexj[3]),/*desired_char*/'T',sarray->array,sarray->n);
-#endif
-
-#if 0
-  printf("A => %u %u\n",sarray->initindexi[0],sarray->initindexj[0]);
-  printf("C => %u %u\n",sarray->initindexi[1],sarray->initindexj[1]);
-  printf("G => %u %u\n",sarray->initindexi[2],sarray->initindexj[2]);
-  printf("T => %u %u\n",sarray->initindexi[3],sarray->initindexj[3]);
-#endif
-
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-  epi32_convert = _mm_set1_epi32(2147483648); /* 2^31 */
-#endif
-
-#if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
-  epi32_convert_256 = _mm256_set1_epi32(2147483648); /* 2^31 */
-#endif
-
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN) && defined(USE_SHUFFLE_MASK)
-  /* Used by fill_positions_filtered_first */
-  shuffle_mask16[0] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1);
-  shuffle_mask16[1] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  3, 2, 1, 0);
-  shuffle_mask16[2] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  7, 6, 5, 4);
-  shuffle_mask16[3] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1,  7, 6, 5, 4,  3, 2, 1, 0);
-  shuffle_mask16[4] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8);
-  shuffle_mask16[5] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8,  3, 2, 1, 0);
-  shuffle_mask16[6] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8,  7, 6, 5, 4);
-  shuffle_mask16[7] =  _mm_set_epi8(-1,-1,-1,-1, 11,10, 9, 8,  7, 6, 5, 4,  3, 2, 1, 0);
-  shuffle_mask16[8] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12);
-  shuffle_mask16[9] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12,  3, 2, 1, 0);
-  shuffle_mask16[10] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12,  7, 6, 5, 4);
-  shuffle_mask16[11] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12,  7, 6, 5, 4,  3, 2, 1, 0);
-  shuffle_mask16[12] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8);
-  shuffle_mask16[13] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8,  3, 2, 1, 0);
-  shuffle_mask16[14] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8,  7, 6, 5, 4);
-  shuffle_mask16[15] = _mm_set_epi8(15,14,13,12, 11,10, 9, 8,  7, 6, 5, 4,  3, 2, 1, 0);
-#endif
-  
-  return;
-}
-
 
 static int
 log4 (int result) {
@@ -651,16 +359,22 @@ Sarray_shmem_remove (char *dir, char *fileroot, char *snps_root, Mode_T mode, bo
 #endif
 
 
+Univcoord_T *
+Sarray_array (T this) {
+  return this->array;
+}
+
+
 #ifdef USE_CSA
 
-static Univcoord_T
-csa_lookup (T sarray, Sarrayptr_T i) {
+Univcoord_T
+Sarray_position (T sarray, Sarrayptr_T i) {
   Univcoord_T nhops = 0, expected_sa_i;
   Sarrayptr_T expected_i;
   __m128i converted, cmp;
   int matchbits;
 
-  debug3(printf("Entered csa_lookup for %u:",i));
+  debug3(printf("Entered Sarray_position for %u:",i));
 #ifdef DEBUG3A
   expected_sa_i = sarray->array[i];
 #endif
@@ -727,11 +441,17 @@ csa_lookup (T sarray, Sarrayptr_T i) {
 
 #elif defined(WORDS_BIGENDIAN)
 
-#define csa_lookup(sarray,i) Bigendian_convert_uint(sarray->array[i])
+Univcoord_T
+Sarray_position (T sarray, Sarrayptr_T i) {
+  return Bigendian_convert_uint(sarray->array[i]);
+}
 
 #else
 
-#define csa_lookup(sarray,i) sarray->array[i]
+Univcoord_T
+Sarray_position (T sarray, Sarrayptr_T i) {
+  return sarray->array[i];
+}
 
 #endif
 
@@ -848,7 +568,7 @@ Sarray_new (char *dir, char *fileroot, Access_mode_T sarray_access, Access_mode_
     if (sarray_access == USE_MMAP_PRELOAD) {
       if (old_format_p == true) {
 	fprintf(stderr,"Pre-loading suffix array...");
-	new->array = (UINT4 *) Access_mmap_and_preload(&new->array_fd,&new->array_len,&npages,&seconds,sarrayfile,
+	new->array = (Univcoord_T *) Access_mmap_and_preload(&new->array_fd,&new->array_len,&npages,&seconds,sarrayfile,
 						       sizeof(UINT4));
 	new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
 	new->n = new->n_plus_one - 1;
@@ -861,7 +581,7 @@ Sarray_new (char *dir, char *fileroot, Access_mode_T sarray_access, Access_mode_
 
     } else if (sarray_access == USE_MMAP_ONLY) {
       if (old_format_p == true) {
-	new->array = (UINT4 *) Access_mmap(&new->array_fd,&new->array_len,sarrayfile,/*randomp*/true);
+	new->array = (Univcoord_T *) Access_mmap(&new->array_fd,&new->array_len,sarrayfile,/*randomp*/true);
 	new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
 	new->n = new->n_plus_one - 1;
       }
@@ -871,10 +591,10 @@ Sarray_new (char *dir, char *fileroot, Access_mode_T sarray_access, Access_mode_
       if (old_format_p == true) {
 	fprintf(stderr,"Allocating memory for suffix array...");
 	if (sharedp == true) {
-	  new->array = (UINT4 *) Access_allocate_shared(&new->array_access,&new->array_shmid,&new->array_key,
+	  new->array = (Univcoord_T *) Access_allocate_shared(&new->array_access,&new->array_shmid,&new->array_key,
 							&new->array_fd,&new->array_len,&seconds,sarrayfile,sizeof(UINT4));
 	} else {
-	  new->array = (UINT4 *) Access_allocate_private(&new->array_access,&new->array_len,&seconds,sarrayfile,sizeof(UINT4));
+	  new->array = (Univcoord_T *) Access_allocate_private(&new->array_access,&new->array_len,&seconds,sarrayfile,sizeof(UINT4));
 	}
 	new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
 	new->n = new->n_plus_one - 1;
@@ -1560,7 +1280,7 @@ find_longest_match (UINT4 nmatches, Sarrayptr_T *initptr, Sarrayptr_T *finalptr,
     if (i == j) {
       /* Singleton interval */
       debug1(printf("Singleton interval %u..%u\n",i,j));
-      SA_i = csa_lookup(sarray,i);
+      SA_i = Sarray_position(sarray,i);
       nmatches +=
 	Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
 					     /*pos5*/queryoffset+nmatches,/*pos3*/queryoffset+querylength,
@@ -1591,7 +1311,7 @@ find_longest_match (UINT4 nmatches, Sarrayptr_T *initptr, Sarrayptr_T *finalptr,
 	/* Check only up to minlength, so we validate the entire interval */
 	minlength = (lcp_whole < querylength) ? lcp_whole : querylength;
 	debug1(printf("Looking up genome for query from %d .. %d - 1\n",nmatches,minlength));
-	SA_i = csa_lookup(sarray,i);
+	SA_i = Sarray_position(sarray,i);
 	nmatches +=
 	  Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
 					       /*pos5*/queryoffset+nmatches,/*pos3*/queryoffset+minlength,
@@ -1635,11 +1355,11 @@ find_longest_match (UINT4 nmatches, Sarrayptr_T *initptr, Sarrayptr_T *finalptr,
    where m wis the querylength and |Sigma| is the size of the alphabet
    (4 for DNA) */
 /* query is a substring of the original, starting with queryoffset */
-static void
-sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
-	       UINT4 *nmatches, char *query, UINT4 querylength, int queryoffset,
-	       Compress_T query_compress, T sarray, bool plusp, int genestrand,
-	       char conversion[]) {
+void
+Sarray_read (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
+	     UINT4 *nmatches, char *query, UINT4 querylength, int queryoffset,
+	     Compress_T query_compress, T sarray, bool plusp, int genestrand,
+	     char conversion[]) {
   int effective_querylength;	/* length to first N */
   Oligospace_T oligo;
   UINT4 l, r;
@@ -1652,10 +1372,13 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
   bool failp;
 #endif
 
-  debug1(printf("sarray_search on %.*s, querylength %d, plusp %d\n",querylength,query,querylength,plusp));
+  debug1(printf("Sarray_read on %.*s, querylength %d, plusp %d\n",querylength,query,querylength,plusp));
 
   /* Find initial lcp-interval */
   effective_querylength = nt_querylength(query,querylength);
+  debug1(printf("sarray_search on %.*s, querylength %d, effective querylength %d, plusp %d\n",
+		querylength,query,querylength,effective_querylength,plusp));
+
 
   *nmatches = 0;
   if (effective_querylength == 0) {
@@ -1734,7 +1457,7 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
 
   /* Before */
   if (*nmatches > 0 && *initptr > 0U) {
-    SA_i = csa_lookup(sarray,(*initptr)-1);
+    SA_i = Sarray_position(sarray,(*initptr)-1);
     recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
 						   /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
 						   plusp,genestrand);
@@ -1773,7 +1496,7 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
   /* Hits */
   lcp_prev = lcp_i;
   for (k = 0; k < (int) (*finalptr - *initptr + 1) && k < MAX_DEBUG1_HITS; k++) {
-    SA_i = csa_lookup(sarray,(*initptr)+k);
+    SA_i = Sarray_position(sarray,(*initptr)+k);
     recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
 						   /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
 						   plusp,genestrand);
@@ -1813,7 +1536,7 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
     if (recount != *nmatches) {
       printf("querylength is %d\n",querylength);
       printf("false positive: recount %d at %u does not equal expected nmatches %d\n",
-	     recount,csa_lookup(sarray,(*initptr)),*nmatches);
+	     recount,Sarray_position(sarray,(*initptr)),*nmatches);
       failp = true;
     }
 
@@ -1824,7 +1547,7 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
     /* Overflow */
     printf("...\n");
     k = (int) (*finalptr - *initptr);
-    hit = csa_lookup(sarray,(*initptr)+k);
+    hit = Sarray_position(sarray,(*initptr)+k);
     recount = Genome_consecutive_matches_rightward(query_compress,/*left*/hit-queryoffset,
 						   /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
 						   plusp,genestrand);
@@ -1853,7 +1576,7 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
     if (recount != *nmatches) {
       printf("querylength is %d\n",querylength);
       printf("false positive: recount %d at %u does not equal expected nmatches %d\n",
-	     recount,csa_lookup(sarray,*initptr),*nmatches);
+	     recount,Sarray_position(sarray,*initptr),*nmatches);
       failp = true;
     }
     /* hits[k] = sarray->array[(*initptr)++]; */
@@ -1861,7 +1584,7 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
 
 
   /* After */
-  if (*nmatches > 0 && (SA_i = csa_lookup(sarray,(*finalptr)+1)) > 0U) {
+  if (*nmatches > 0 && (SA_i = Sarray_position(sarray,(*finalptr)+1)) > 0U) {
     printf("\n");
     recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
 						   /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
@@ -1905,7108 +1628,33 @@ sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
 }
 
 
-/* For fill_positions_all: ELT_VIRGIN -> ELT_FILLED */
-/* For fill_positions_filtered: ELT_VIRGIN -(1st call)-> ELT_UNSORTED -(2nd call)-> ELT_SORTED */
-typedef enum {ELT_VIRGIN, ELT_FILLED, ELT_UNSORTED, ELT_SORTED} Elt_status_T;
-
-
-/* Simplified version of Spanningelt_T */
-typedef struct Elt_T *Elt_T;
-struct Elt_T {
-  int querystart;
-  int queryend;
-
-  int querystart_leftward; /* Modified when we extend matches leftward */
-  int queryend_leftward; /* Modified when we extend matches leftward */
-
-  int nmatches;
-
-  Sarrayptr_T initptr;			/* in sarray */
-  Sarrayptr_T finalptr;
-  Sarrayptr_T nptr;
-
-  Univcoord_T *positions_allocated; /* all or filtered positions needed */
-  Univcoord_T *positions;
-  int npositions_allocated;
-  int npositions;		/* from goal to high */
-
-  bool temporaryp;
-  bool fillin_p;		/* Created by oligoindex algorithm */
-
-  /* filled/sorted by Elt_fill_positions_filtered to speed up on multiple calls */
-  Univcoord_T *all_positions;
-  int n_all_positions;
-
-  Elt_status_T status;
-};
-
-
-static void
-Elt_reset (Elt_T this) {
-  this->querystart_leftward = this->querystart;
-  this->queryend_leftward = this->queryend;
-  return;
-}
-
-
-static Elt_T
-Elt_new (int querypos, int nmatches, Sarrayptr_T initptr, Sarrayptr_T finalptr, bool temporaryp) {
-  Elt_T new = (Elt_T) MALLOC(sizeof(*new));
-
-  new->querystart = new->querystart_leftward = querypos;
-  new->queryend = new->queryend_leftward = querypos + nmatches - 1;
-  new->nmatches = nmatches;
-
-  new->initptr = initptr;
-  new->finalptr = finalptr;
-  new->nptr = new->finalptr - new->initptr + 1;
-
-  /* new->positions is a pointer that advances to goal */
-  new->positions_allocated = new->positions = (Univcoord_T *) NULL;
-  new->npositions_allocated = new->npositions = 0;
-
-  new->temporaryp = temporaryp;
-  new->fillin_p = false;
-
-  new->all_positions = (Univcoord_T *) NULL;
-  new->n_all_positions = 0;
-
-  new->status = ELT_VIRGIN;
-
-  return new;
-}
-
-#if 0
-static Elt_T
-Elt_new_fillin (int querystart, int queryend, int indexsize, Univcoord_T left) {
-  Elt_T new = (Elt_T) MALLOC(sizeof(*new));
-
-  new->querystart = new->querystart_leftward = querystart;
-  new->queryend = new->queryend_leftward = queryend + indexsize - 1;
-  new->nmatches = new->queryend - querystart + 1;
-
-  new->initptr = 0;
-  new->finalptr = 0;
-  new->nptr = 0;
-
-  new->npositions = 1;
-  new->positions_allocated = new->positions = (Univcoord_T *) MALLOC(sizeof(Univcoord_T));
-  new->positions[0] = left;
-
-  new->temporaryp = true;
-  new->fillin_p = true;
-
-  new->all_positions = (Univcoord_T *) NULL;
-  new->n_all_positions = 0;
-
-  new->status = ELT_VIRGIN;
-
-  return new;
-}
-#endif
-
-#if 0
-static void
-Elt_replace (Elt_T this, int querypos, int nmatches, Sarrayptr_T initptr, Sarrayptr_T finalptr) {
-  this->querystart = querypos;
-  this->queryend = querypos + nmatches - 1;
-  this->nmatches = nmatches;
-
-  this->initptr = initptr;
-  this->finalptr = finalptr;
-
-  if (this->positions_allocated != NULL) {
-    FREE(this->positions_allocated);
-  }
-  this->positions_allocated = this->positions = (Univcoord_T *) NULL;
-  this->npositions_allocated = this->npositions = 0;
-
-
-  if (this->all_positions != NULL) {
-    FREE(this->all_positions);
-  }
-  this->all_positions = (Univcoord_T *) NULL;
-  this->n_all_positions = 0;
-
-  this->status = ELT_VIRGIN;
-
-  return;
-}
-#endif
-
-
-static void
-Elt_free (Elt_T *old) {
-
-  if ((*old)->positions_allocated != NULL) {
-    FREE((*old)->positions_allocated);
-  }
-  if ((*old)->all_positions != NULL) {
-    FREE((*old)->all_positions);
-  }
-  FREE(*old);
-  return;
-}
-
-
-#if 0
-static int
-Elt_nmatches_cmp (const void *a, const void *b) {
-  Elt_T x = * (Elt_T *) a;
-  Elt_T y = * (Elt_T *) b;
-
-  if (x->nmatches > y->nmatches) {
-    return -1;
-  } else if (y->nmatches > x->nmatches) {
-    return +1;
-  } else {
-    return 0;
-  }
-}
-#endif
-
-#if 0
-static int
-Elt_querypos_ascending_cmp (const void *a, const void *b) {
-  Elt_T x = * (Elt_T *) a;
-  Elt_T y = * (Elt_T *) b;
-
-  if (x->querystart < y->querystart) {
-    return -1;
-  } else if (y->querystart < x->querystart) {
-    return +1;
-  } else {
-    return 0;
-  }
-}
-#endif
-
-#if 0
-static int
-Elt_querypos_descending_cmp (const void *a, const void *b) {
-  Elt_T x = * (Elt_T *) a;
-  Elt_T y = * (Elt_T *) b;
-
-  if (x->querystart > y->querystart) {
-    return -1;
-  } else if (y->querystart > x->querystart) {
-    return +1;
-  } else {
-    return 0;
-  }
-}
-#endif
-
-
-static int
-Elt_extend_leftward (int *min_leftward, Elt_T elt, Compress_T query_compress,
-		     bool plusp, int genestrand, int skip_left) {
-  int max_leftward, nmatches;
-  int i;
-
-  if (elt->npositions == 0) {
-    *min_leftward = 0;
-    return 0;
-  } else {
-    max_leftward = *min_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/elt->positions[0],
-								       /*pos5*/0,/*pos3*/elt->querystart - skip_left,
-								       plusp,genestrand);
-    for (i = 1; i < elt->npositions; i++) {
-      if ((nmatches = Genome_consecutive_matches_leftward(query_compress,/*left*/elt->positions[i],
-							  /*pos5*/0,/*pos3*/elt->querystart,
-							  plusp,genestrand)) > max_leftward) {
-	max_leftward = nmatches;
-      } else if (nmatches < *min_leftward) {
-	*min_leftward = nmatches;
-      }
-    }
-    return max_leftward;
-  }
-}
-
-
-static void
-Elt_fill_positions_all (Elt_T this, T sarray) {
-  Sarrayptr_T ptr;
-  Univcoord_T pos;
-  int i;
+Univcoord_T *
+Sarray_lookup (int *nhits, T sarray, char *query, UINT4 querylength, int queryoffset,
+	       Compress_T query_compress, bool plusp, int genestrand,
+	       char conversion[]) {
+  Univcoord_T *hits;
+  Sarrayptr_T initptr, finalptr, ptr;
+  bool successp;
+  UINT4 nmatches;
+  int k;
 
-  debug7(printf("Entering Elt_fill_positions_all on %p\n",this));
-  if (this->positions_allocated != NULL) {
-    debug7(printf("  positions_allocated is already non-NULL, so skipping\n"));
-    /* Don't free positions_allocated.  Use it. */
+  Sarray_read(&initptr,&finalptr,&successp,&nmatches,query,querylength,queryoffset,
+	      query_compress,sarray,plusp,genestrand,conversion);
 
+  
+  if (successp == false) {
+    *nhits = 0;
+    return (Univcoord_T *) NULL;
   } else {
-    this->npositions_allocated = this->npositions = this->finalptr - this->initptr + 1;
-    debug7(printf("  filling %d positions\n",this->npositions));
-
-    if (this->nmatches == 0 || this->npositions > EXCESS_SARRAY_HITS) {
-      this->positions_allocated = this->positions = (Univcoord_T *) NULL;
-      this->npositions_allocated = this->npositions = 0;
-    } else {
-#ifdef USE_QSORT
-      this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
-#else
-      this->positions_allocated = this->positions = (Univcoord_T *) MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
-#endif
-      i = 0;
-      ptr = this->initptr;
-      while (ptr <= this->finalptr) {
-	if ((pos = csa_lookup(sarray,ptr++)) >= (Univcoord_T) this->querystart) {
-	  this->positions[i++] = pos - this->querystart;
-	}
-      }
-      this->npositions = i;
-#ifdef USE_QSORT
-      qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
-#else
-      Sedgesort_uint4(this->positions,this->npositions);
-#endif
+    hits = (Univcoord_T *) MALLOC((finalptr - initptr + 1)*sizeof(UINT4));
+    k = 0;
+    for (ptr = initptr; ptr <= finalptr; ptr++) {
+      hits[k++] = Sarray_position(sarray,ptr);
     }
+    *nhits = k;
+    return hits;
   }
-
-  this->status = ELT_FILLED;
-  return;
 }
 
 
-#ifdef DEBUG7
-static void
-print_vector (__m128i x, char *label) {
-  __m128i a[1];
-  unsigned int *s = a;
-
-  _mm_store_si128(a,x);
-  printf("%s: %u %u %u %u\n",label,s[0],s[1],s[2],s[3]);
-  return;
-}
-
-static void
-print_vector_looking (__m128i x, Univcoord_T low, Univcoord_T high) {
-  __m128i a[1];
-  unsigned int *s = a;
-
-  _mm_store_si128(a,x);
-  printf("Looking at value %u, relative to low %u and high %u\n",s[0],low,high);
-  printf("Looking at value %u, relative to low %u and high %u\n",s[1],low,high);
-  printf("Looking at value %u, relative to low %u and high %u\n",s[2],low,high);
-  printf("Looking at value %u, relative to low %u and high %u\n",s[3],low,high);
-  return;
-}
-#endif
-
-
-#ifdef DEBUG8
-/* Non-SIMD methods for comparison */
-static void
-positions_compare (Univcoord_T *positions, int npositions,
-		   Univcoord_T *positions_std, int npositions_std) {
-  int i;
-  bool problemp = false;
-
-  if (npositions != npositions_std) {
-    fprintf(stderr,"npositions %d != npositions_std %d\n",npositions,npositions_std);
-    for (i = 0; i < npositions; i++) {
-      printf("%u\n",positions[i]);
-    }
-    printf("\n");
-
-    for (i = 0; i < npositions_std; i++) {
-      printf("%u\n",positions_std[i]);
-    }
-    printf("\n");
-    abort();
-
-  } else {
-    qsort(positions,npositions,sizeof(Univcoord_T),Univcoord_compare);
-    qsort(positions_std,npositions,sizeof(Univcoord_T),Univcoord_compare);
-    for (i = 0; i < npositions; i++) {
-      if (positions[i] != positions_std[i]) {
-	fprintf(stderr,"At %d, positions %u != positions_std %u\n",i,positions[i],positions_std[i]);
-	problemp = true;
-      }
-    }
-    if (problemp == true) {
-      abort();
-    }
-  }
-
-  return;
-}
-#endif
-
-
-#ifdef DEBUG8
-static Univcoord_T *
-fill_positions_std (int *npositions, Univcoord_T low_adj, Univcoord_T high_adj,
-		    Sarrayptr_T initptr, Sarrayptr_T finalptr,
-		    int querystart, Univcoord_T *array) {
-  Univcoord_T *more_positions;
-  Univcoord_T *positions, value;
-  Sarrayptr_T ptr, lastptr;
-  int i;
-
-  positions = (Univcoord_T *) MALLOC(GUESS_ALLOCATION * sizeof(Univcoord_T)); /* Return value, so cannot use alloca */
-
-  *npositions = 0;
-  ptr = initptr;      
-
-  while (ptr <= finalptr) {
-    debug7a(printf("Std: Looking at value %u, relative to low %u and high %u\n",array[ptr],low_adj,high_adj));
-    if ((value = CONVERT(array[ptr++])) < low_adj) {
-      /* Skip */
-    } else if (value > high_adj) {
-      /* Skip */
-    } else if (*npositions < GUESS_ALLOCATION) {
-      debug7(printf("Std: Found position %u between low %u and high %u, and within allocation\n",value,low_adj,high_adj));
-      positions[(*npositions)++] = value - querystart;
-    } else {
-      debug7(printf("Std: Found position %u between low %u and high %u, but exceeds allocation\n",value,low_adj,high_adj));
-      (*npositions)++;
-      lastptr = ptr;		/* saves us from going through the entire sarray below */
-    }
-  }
-
-  debug7(printf("Std method found %d positions\n",*npositions));
-  if (*npositions > GUESS_ALLOCATION) {
-    /* Copy the positions we have stored so far */
-    more_positions = (Univcoord_T *) MALLOC((*npositions) * sizeof(Univcoord_T));
-    memcpy(more_positions,positions,GUESS_ALLOCATION*sizeof(Univcoord_T));
-    FREE(positions);
-    positions = more_positions;
-    
-    i = GUESS_ALLOCATION;	/* Start count with the number stored */
-    ptr = lastptr;	/* One past the last ptr with a result */
-
-    while (i < *npositions) {
-      if ((value = CONVERT(array[--ptr])) < low_adj) {
-	/* Skip */
-      } else if (value > high_adj) {
-	/* Skip */
-      } else {
-	positions[i++] = value - querystart;
-      }
-    }
-  }
-
-  return positions;
-}
-#endif
-
-
-
-/* Call fill_positions_filtered_first for first time, which is
-   linear in number of entries or O(n), then on second call, do sort with O(n*log n),
-   plus O(log n) for each additional call */
-
-#ifdef HAVE_ALLOCA
-
-#if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
-
-/* Using pext method, because _mm256_shuffle_epi32 doesn't work well
-because it works only within lanes, and MASTER_CONTROL does not extend
-well to 256 bits */
-
-static void
-fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
-  Univcoord_T low_adj, high_adj;
-  Univcoord_T *array = sarray->array, value0;
-  Sarrayptr_T *array_stop, *array_end, *array_ptr;
-  Univcoord_T *positions_temp;
-  Univcoord_T *out;
-  __m256i converted, adjusted, match;
-  __m256i floor, ceiling, values, adj, p;
-  unsigned int mask, selector;
-  unsigned int univ_selector = 0x76543210;
-  __m256i _selector, _varshifts;
-#if defined(REQUIRE_ALIGNMENT)
-  int n_prealign, k;
-#endif
-#if defined(DEBUG) || defined(DEBUG7)
-  int nmatches;
-#endif
-#ifdef DEBUG7
-  UINT8 pointer;
-  int i;
-#endif
-#ifdef DEBUG8
-  Univcoord_T *positions_std;
-  int npositions_std;
-#endif
-
-
-  debug(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
-	       low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
-  debug7(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
-		low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
-  
-  if (this->positions_allocated != NULL) {
-    /* Filled from a previous call */
-    FREE(this->positions_allocated);
-  }
-
-  if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) {
-    this->all_positions = (Univcoord_T *) NULL;
-
-  } else {
-    /* Function surrounded by HAVE_ALLOCA */
-#ifdef USE_QSORT
-    positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
-#else
-    positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1 + 1) * sizeof(Univcoord_T));
-#endif
-
-    low_adj = low + this->querystart;
-    high_adj = high + this->querystart;
-  
-    floor = _mm256_set1_epi32(low_adj - 1 - 2147483648);
-    ceiling = _mm256_set1_epi32(high_adj + 1 - 2147483648);
-    adj = _mm256_set1_epi32(this->querystart);
-    _varshifts = _mm256_set_epi32(28,24,20,16,12,8,4,0);
-
-    this->npositions_allocated = this->npositions = 0;
-#if defined(REQUIRE_ALIGNMENT)
-    array_ptr = &(array[this->initptr]);
-    
-    /* Initial part */
-    n_prealign = ((32 - ((UINT8) array_ptr & 0x1F))/4) & 0x7;
-    debug7(printf("Initial ptr is at location %p.  Need %d to get to 256-bit boundary\n",pointer,n_prealign));
-
-    debug7(printf("Initial part:\n"));
-    if (n_prealign > this->finalptr - this->initptr + 1) {
-      n_prealign = this->finalptr - this->initptr + 1;
-    }
-    for (k = 0; k < n_prealign; k++) {
-      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj));
-      if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) {
-	*out++ = value0 - this->querystart;
-      }
-    }
-#else
-    array_ptr = &(array[this->initptr]);
-#endif	/* REQUIRE_ALIGNMENT */
-
-
-    /* Aligned part */
-    if (this->finalptr < 8) {
-      array_stop = &(array[0]);
-    } else {
-      array_stop = &(array[this->finalptr - 8]);
-    }
-    array_end = &(array[this->finalptr]);
-
-    while (array_ptr < array_stop) {
-
-#if defined(REQUIRE_ALIGNMENT)
-      /* Use stream_load to avoid polluting the cache with suffix array entries */
-      values = _mm256_stream_load_si256((__m256i *) array_ptr);
-#else
-      /* It looks like loadu is just as fast as load */
-      values = _mm256_loadu_si256((__m256i *) array_ptr);
-#endif
-      debug7b(print_vector_uint_256(values));
-
-      converted = _mm256_sub_epi32(values,epi32_convert_256);
-      /* match = _mm256_andnot_si256(_mm256_cmpgt_epi32(floor,converted),_mm256_cmpgt_epi32(ceiling,converted)); -- This is off by 1 at floor */
-      match = _mm256_and_si256(_mm256_cmpgt_epi32(converted,floor),_mm256_cmpgt_epi32(ceiling,converted));
-      /* Example:
-	 {0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000} (8 x 32-bit ints) */
-      debug7b(print_vector_hex_256(match));
-
-      /* Get most significant from each byte from each byte to obtain a mask in terms of 4-bit nibbles */
-      mask = (unsigned int) _mm256_movemask_epi8(match);
-      /* 11110000 11110000 11110000 11110000  (32-bit int) */
-
-      if (mask) {
-	adjusted = _mm256_sub_epi32(values,adj);
-
-	selector = _pext_u32(univ_selector,mask); /* Requires compilation with -mbmi2 */
-	/* 01110110 01010100 00110010 00010000  (32-bit int, univ_selector) */
-	/* 00000000 00000000 01110101 00110001  (32-bit int, selector), equiv to 0x00007531 */
-	debug7b(printf("selector: %08X\n",selector));
-
-	_selector = _mm256_set1_epi32(selector);
-	/* {0x00007531, 0x00007531, 0x00007531, 0x00007531, 0x00007531, 0x00007531, 0x00007531, 0x00007531} */
-
-	_selector = _mm256_srlv_epi32(_selector,_varshifts);
-	/* {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000007, 0x00000075, 0x00000753, 0x00007531} */
-	debug7b(print_vector_hex_256(_selector));
-
-	p = _mm256_permutevar8x32_epi32(adjusted,_selector);
-	_mm256_storeu_si256((__m256i *) out, p);
-
-	/* Divide popcount(mask) by 4 to get number of matches */
-#ifdef HAVE_POPCNT
-	out += _popcnt32(mask)/4;
-	debug7b(printf("mask: %08X (%d ones)\n",mask,_popcnt32(mask)/4));
-#elif defined HAVE_MM_POPCNT
-	out += _mm_popcnt_u32(mask)/4;
-	debug7b(printf("mask: %08X (%d ones)\n",mask,_mm_popcnt_u32(mask)/4));
-#else
-	out += __builtin_popcount(mask)/4;
-	debug7b(printf("mask: %08X (%d ones)\n",mask,__builtin_popcount(mask)/4));
-#endif
-	debug7b(print_vector_uint_256(p));
-      }
-
-      array_ptr += 8;
-    }
-
-    /* Partial block at end; do scalar */
-    debug7(printf("\nFinal part:\n"));
-    while (array_ptr <= array_end) {
-      if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) {
-	*out++ = value0 - this->querystart;
-      }
-    }
-
-    this->npositions_allocated = this->npositions = out - positions_temp;
-    debug7(printf("SIMD method found %d positions\n",this->npositions));
-
-    /* Copy the positions into heap from temp in stack */
-    if (this->npositions == 0) {
-      this->positions_allocated = this->positions = (Univcoord_T *) NULL;
-    } else {
-      debug7(printf("Sorting %d positions\n",this->npositions));
-#ifdef USE_QSORT
-      qsort(positions_temp,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
-#else
-      Sedgesort_uint4(positions_temp,this->npositions);
-#endif
-
-      /* Need to copy positions before the goal */
-#ifdef USE_QSORT
-      this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
-#else
-      this->positions_allocated = this->positions = MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
-#endif
-      memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T));
-#ifdef DEBUG7
-      for (i = 0; i < this->npositions; i++) {
-	printf("%u\n",this->positions[i]);
-      }
-#endif
-
-#if 0
-      /* Not sure why we were doing this.  We will find collinear set of diagonals later. */
-      /* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */
-      /* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */
-      /* ? Replace with a binary search */
-      i = 0;
-      while (i < this->npositions && positions_temp[i] < goal) {
-	debug7(printf("1 Skipping position %u (%u) < goal %u (%u)\n",
-		      positions_temp[i],positions_temp[i] - chroffset,goal,goal - chroffset));
-	i++;
-      }
-      this->positions += i;
-      this->npositions -= i;
-      debug7(printf("Remaining: %d positions\n",this->npositions));
-#endif
-    }
-    
-    /* Function surrounded by HAVE_ALLOCA */
-    FREEA(positions_temp);
-  }
-
-  return;
-}
-
-
-#elif defined(HAVE_SSSE3) && !defined(WORDS_BIGENDIAN)
-/* SSSE3 needed for _mm_shuffle_epi8 */
-
-/* Prefer shuffle_mask, because MASTER_CONTROL requires AVX command
-   _mm_permutevar_ps.  Cannot use pext approach, because that requires
-   BMI2. */
-
-/* Nevertheless, here is an explanation of MASTER_CONTROL:
-
-   For blocks of 128 bits:
-
- MASTER_CONTROL =
-    _mm_set_epi8(0x10, 0x12, 0x13, 0x12, 0x40, 0x68, 0x7C, 0x6B,
-                 0x00, 0x80, 0xC0, 0xBC, 0x00, 0x00, 0x00, 0xC0);
-
-  matchbits = _mm_movemask_ps(_mm_castsi128_ps(match));
-  p = _mm_permutevar_ps(input,_mm_srli_epi32(MASTER_CONTROL,matchbits*2));
-
-  These values come from rotating the control values horizontally, like this:
-
-  15 14 13 12  11 10 09 08  07 06 05 04  03 02 01 00  (matchbits)
-
-  00 01 00 10  00 01 00 11  00 01 00 10  00 01 00 XX  (control 15 downto 0, bits 1 and 0)
-  01 10 10 11  01 11 11 XX  01 10 10 XX  01 XX XX XX  (control 15 downto 0, bits 3 and 2)
-  10 11 11 XX  11 XX XX XX  10 XX XX XX  XX XX XX XX  (control 15 downto 0, bits 5 and 4)
-  11 XX XX XX  XX XX XX XX  XX XX XX XX  XX XX XX XX  (control 15 downto 0, bits 7 and 6)
-
-  and then reading in bytes from right to left for each row.
-
-  Alternatively, create the following table, and read in bits upward
-  starting from the rightmost column:
-
-  matchbits 00: XX XX XX XX ^
-  matchbits 01: XX XX XX 00 |
-  matchbits 02: XX XX XX 01 |
-  matchbits 03: XX XX 01 00 _
-  matchbits 04: XX XX XX 10 ^
-  matchbits 05: XX XX 10 00 |
-  matchbits 06: XX XX 10 01 |
-  matchbits 07: XX 10 01 00 _
-  matchbits 08: XX XX XX 11 .
-  matchbits 09: XX XX 11 00 .
-  matchbits 10: XX XX 11 01 .
-  matchbits 11: XX 11 01 00
-  matchbits 12: XX XX 11 10
-  matchbits 13: XX 11 10 00
-  matchbits 14: XX 11 10 01
-  matchbits 15: 11 10 01 00
-*/
-
-static void
-fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
-  Univcoord_T low_adj, high_adj;
-  Univcoord_T *array = sarray->array, value0;
-  Sarrayptr_T *array_stop, *array_end, *array_ptr;
-  Univcoord_T *positions_temp;
-  Univcoord_T *out;
-  __m128i converted, adjusted, match;
-  __m128i floor, ceiling, values, adj, p;
-  int matchbits;
-#if defined(REQUIRE_ALIGNMENT)
-  int n_prealign, k;
-#endif
-#ifndef USE_SHUFFLE_MASK
-  __m128i MASTER_CONTROL;
-#endif
-#ifdef DEBUG7
-  int i;
-#endif
-#ifdef DEBUG8
-  Univcoord_T *positions_std;
-  int npositions_std;
-#endif
-
-
-  debug(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
-	       low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
-  debug7(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
-		low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
-  
-  if (this->positions_allocated != NULL) {
-    /* Filled from a previous call */
-    FREE(this->positions_allocated);
-  }
-
-  if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) {
-    this->all_positions = (Univcoord_T *) NULL;
-
-  } else {
-    /* Function surrounded by HAVE_ALLOCA */
-    positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
-
-    low_adj = low + this->querystart;
-    high_adj = high + this->querystart;
-  
-    floor = _mm_set1_epi32(low_adj - 1 - 2147483648);
-    ceiling = _mm_set1_epi32(high_adj + 1 - 2147483648);
-    adj = _mm_set1_epi32(this->querystart);
-
-    this->npositions_allocated = this->npositions = 0;
-#if defined(REQUIRE_ALIGNMENT)
-    array_ptr = &(array[this->initptr]);
-    
-    /* Initial part */
-#ifdef HAVE_64_BIT
-    n_prealign = ((16 - ((UINT8) array_ptr & 0xF))/4) & 0x3;
-#else
-    n_prealign = ((16 - ((UINT4) array_ptr & 0xF))/4) & 0x3;
-#endif
-    debug7(printf("Initial ptr is at location %p.  Need %d to get to 128-bit boundary\n",pointer,n_prealign));
-
-    debug7(printf("Initial part:\n"));
-    if (n_prealign > this->finalptr - this->initptr + 1) {
-      n_prealign = this->finalptr - this->initptr + 1;
-    }
-    for (k = 0; k < n_prealign; k++) {
-      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj));
-      if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) {
-	*out++ = value0 - this->querystart;
-      }
-    }
-#else
-    array_ptr = &(array[this->initptr]);
-#endif	/* REQUIRE_ALIGNMENT */
-
-
-    /* Aligned part */
-    if (this->finalptr < 4) {
-      array_stop = &(array[0]);
-    } else {
-      array_stop = &(array[this->finalptr - 4]);
-    }
-    array_end = &(array[this->finalptr]);
-
-#ifndef USE_SHUFFLE_MASK
-    MASTER_CONTROL = _mm_setr_epi8(0x10, 0x12, 0x13, 0x12, 0x40, 0x68, 0x7C, 0x6B,
-				   0x00, 0x80, 0xC0, 0xBC, 0x00, 0x00, 0x00, 0xC0);
-#endif
-
-    while (array_ptr < array_stop) {
-#if defined(REQUIRE_ALIGNMENT)
-
-#ifdef HAVE_SSE4_1      
-      /* Use stream_load to avoid polluting the cache with suffix array entries */
-      values = _mm_stream_load_si128((__m128i *) array_ptr);
-#else
-      values = _mm_load_si128((__m128i *) array_ptr);
-#endif
-
-#else
-      /* It looks like loadu is just as fast as load */
-      values = _mm_loadu_si128((__m128i *) array_ptr);
-#endif
-      debug7b(print_vector_uint(values));
-
-      converted = _mm_sub_epi32(values,epi32_convert);
-      /* match = _mm_andnot_si128(_mm_cmpgt_epi32(floor,converted),_mm_cmpgt_epi32(ceiling,converted)); -- This is off by 1 at floor */
-      match = _mm_and_si128(_mm_cmpgt_epi32(converted,floor),_mm_cmplt_epi32(converted,ceiling));
-      debug7b(print_vector_hex(match));
-
-      matchbits = _mm_movemask_ps(_mm_castsi128_ps(match));
-      if (matchbits) {
-	adjusted = _mm_sub_epi32(values,adj);
-#ifdef USE_SHUFFLE_MASK
-	p = _mm_shuffle_epi8(adjusted, shuffle_mask16[matchbits]);
-#else
-	p = _mm_castps_si128(_mm_permutevar_ps(_mm_castsi128_ps(adjusted),_mm_srli_epi32(MASTER_CONTROL,matchbits*2)));
-#endif
-	_mm_storeu_si128((__m128i *) out, p);
-
-#if !defined(HAVE_SSE4_2)
-	out += __builtin_popcount(matchbits);
-	debug7b(printf("matchbits: %08X (%d ones)\n",matchbits,__builtin_popcount(matchbits)));
-#elif defined(HAVE_POPCNT)
-	out += _popcnt32(matchbits);
-	debug7b(printf("matchbits: %08X (%d ones)\n",matchbits,_popcnt32(matchbits)));
-#elif defined HAVE_MM_POPCNT
-	out += _mm_popcnt_u32(matchbits);
-	debug7b(printf("matchbits: %08X (%d ones)\n",matchbits,_mm_popcnt_u32(matchbits)));
-#else
-	out += __builtin_popcount(matchbits);
-	debug7b(printf("matchbits: %08X (%d ones)\n",matchbits,__builtin_popcount(matchbits)));
-#endif
-	debug7b(print_vector_hex(shuffle_mask16[matchbits]));
-	debug7b(print_vector_uint(p));
-      }
-
-      array_ptr += 4;
-    }
-
-    /* Partial block at end; do scalar */
-    debug7(printf("\nFinal part:\n"));
-    while (array_ptr <= array_end) {
-      if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) {
-	*out++ = value0 - this->querystart;
-      }
-    }
-
-    this->npositions_allocated = this->npositions = out - positions_temp;
-    debug7(printf("SIMD method found %d positions\n",this->npositions));
-
-    /* Copy the positions into heap from temp in stack */
-    if (this->npositions == 0) {
-      this->positions_allocated = this->positions = (Univcoord_T *) NULL;
-    } else {
-      debug7(printf("Sorting %d positions\n",this->npositions));
-#ifdef USE_QSORT
-      qsort(positions_temp,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
-#else
-      Sedgesort_uint4(positions_temp,this->npositions);
-#endif
-
-      /* Need to copy positions before the goal */
-#ifdef USE_QSORT
-      this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
-#else
-      this->positions_allocated = this->positions = MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
-#endif
-      memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T));
-#ifdef DEBUG7
-      for (i = 0; i < this->npositions; i++) {
-	printf("%u\n",this->positions[i]);
-      }
-#endif
-
-#if 0
-      /* Not sure why we were doing this.  We will find collinear set of diagonals later. */
-      /* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */
-      /* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */
-      /* ? Replace with a binary search */
-      i = 0;
-      while (i < this->npositions && positions_temp[i] < goal) {
-	debug7(printf("1 Skipping position %u (%u) < goal %u (%u)\n",
-		      positions_temp[i],positions_temp[i] - chroffset,goal,goal - chroffset));
-	i++;
-      }
-      this->positions += i;
-      this->npositions -= i;
-      debug7(printf("Remaining: %d positions\n",this->npositions));
-#endif
-    }
-    
-    /* Function surrounded by HAVE_ALLOCA */
-    FREEA(positions_temp);
-  }
-
-  return;
-}
-
-
-#else
-/* Bigendian or missing SSSE3 */
-
-static void
-fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
-  Sarrayptr_T ptr;
-  Univcoord_T low_adj, high_adj;
-  Univcoord_T *array = sarray->array;
-  Univcoord_T value3, value2, value1, value0;
-  Univcoord_T *positions_temp;
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-#ifdef HAVE_64_BIT
-  UINT8 pointer;
-#else
-  UINT4 pointer;
-#endif
-  __m128i floor, ceiling, values, compare;
-  int n_prealign, k;
-#endif
-
-
-  debug7(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
-		low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
-  
-  if (this->positions_allocated != NULL) {
-    /* Filled from a previous call */
-    FREE(this->positions_allocated);
-  }
-
-  if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) {
-    this->all_positions = (Univcoord_T *) NULL;
-
-  } else {
-    /* Function surrounded by HAVE_ALLOCA */
-#ifdef USE_QSORT
-    positions_temp = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
-#else
-    positions_temp = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1 + 1) * sizeof(Univcoord_T));
-#endif
-
-    low_adj = low + this->querystart;
-    high_adj = high + this->querystart;
-
-    this->npositions_allocated = this->npositions = 0;
-    ptr = this->initptr;
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-    if (ptr + 3 > this->finalptr) { /* ptr + 4 > (this->finalptr + 1) */
-      /* Handle in normal manner */
-      debug7(printf("Small batch, because %u + 3 <= %u\n",ptr,this->finalptr));
-      while (ptr <= this->finalptr) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-	if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
-	  /* Skip */
-	} else if (value0 > high_adj) {
-	  /* Skip */
-	} else {
-	  debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	  positions_temp[this->npositions++] = value0 - this->querystart;
-	}
-      }
-
-    } else {
-#ifdef HAVE_64_BIT
-      pointer = (UINT8) &(array[ptr]);
-#else
-      pointer = (UINT4) &(array[ptr]);
-#endif
-      n_prealign = ((16 - (pointer & 0xF))/4) & 0x3;
-      debug7(printf("Initial ptr is at location %p.  Need %d to get to 128-bit boundary\n",
-		    &(array[ptr]),n_prealign));
-
-      /* Initial part */
-      debug7(printf("Initial part:\n"));
-      for (k = 0; k < n_prealign; k++) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj));
-	if ((value0 = CONVERT(array[ptr++])) < low_adj) {
-	  /* Skip */
-	} else if (value0 > high_adj) {
-	  /* Skip */
-	} else {
-	  debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	  positions_temp[this->npositions++] = value0 - this->querystart;
-	}
-      }
-
-      /* Aligned part */
-      debug7(printf("\nAligned part:\n"));
-      /* Since compare operations not available for unsigned ints, using the fact that
-	 unsigned_gt(a,b) is equivalent to signed_gt(a - 2^31, b - 2^31) */
-      floor = _mm_set1_epi32(low_adj - 1 - 2147483648);
-      ceiling = _mm_set1_epi32(high_adj + 1 - 2147483648);
-      while (ptr + 3 <= this->finalptr) { /* ptr + 4 < this->finalptr + 1 */
-	/* Missing SSSE3 so cannot do stream load */
-	values = _mm_load_si128((__m128i *) &(array[ptr]));
-	debug7a(print_vector_looking(values,low_adj,high_adj));
-	values = _mm_sub_epi32(values,epi32_convert);
-	compare = _mm_and_si128(_mm_cmpgt_epi32(values,floor),_mm_cmplt_epi32(values,ceiling));
-	if (/*cmp*/_mm_movemask_epi8(compare) == 0x0000) {
-	  /* All results are false, indicating no values between low_adj and high_adj (most common case) */
-	  ptr += 4;
-	} else {
-	  value3 = CONVERT(array[ptr++]);
-	  if (value3 < low_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u < low %u\n",value3,low_adj));
-	  } else if (value3 > high_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u > high %u\n",value3,high_adj));
-	  } else {
-	    debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value3,low_adj,high_adj));
-	    positions_temp[this->npositions++] = value3 - this->querystart;
-	  }
-
-	  value2 = CONVERT(array[ptr++]);
-	  if (value2 < low_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u < low %u\n",value2,low_adj));
-	  } else if (value2 > high_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u > high %u\n",value2,high_adj));
-	  } else {
-	    debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value2,low_adj,high_adj));
-	    positions_temp[this->npositions++] = value2 - this->querystart;
-	  }
-
-	  value1 = CONVERT(array[ptr++]);
-	  if (value1 < low_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u < low %u\n",value1,low_adj));
-	  } else if (value1 > high_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u > high %u\n",value1,high_adj));
-	  } else {
-	    debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value1,low_adj,high_adj));
-	    positions_temp[this->npositions++] = value1 - this->querystart;
-	  }
-
-	  value0 = CONVERT(array[ptr++]);
-	  if (value0 < low_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u < low %u\n",value0,low_adj));
-	  } else if (value0 > high_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u > high %u\n",value0,high_adj));
-	  } else {
-	    debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	    positions_temp[this->npositions++] = value0 - this->querystart;
-	  }
-	}
-      }
-
-      /* Final part */
-      debug7(printf("\nFinal part:\n"));
-      while (ptr <= this->finalptr) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-	if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
-	  /* Skip */
-	} else if (value0 > high_adj) {
-	  /* Skip */
-	} else {
-	  debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	  positions_temp[this->npositions++] = value0 - this->querystart;
-	}
-      }
-    }
-
-#else
-
-    while (ptr <= this->finalptr) {
-      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-      if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
-	/* Skip */
-      } else if (value0 > high_adj) {
-	/* Skip */
-      } else {
-	debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	positions_temp[this->npositions++] = value0 - this->querystart;
-      }
-    }
-#endif
-
-    debug7(printf("SIMD method found %d positions\n",this->npositions));
-
-    /* Copy the positions from temp */
-    if (this->npositions == 0) {
-      this->positions_allocated = this->positions = (Univcoord_T *) NULL;
-    } else {
-      debug7(printf("Sorting %d positions\n",this->npositions));
-#ifdef USE_QSORT
-      qsort(positions_temp,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
-#else
-      Sedgesort_uint4(positions_temp,this->npositions);
-#endif
-
-      /* Need to copy positions before the goal */
-      this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
-      memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T));
-
-#if 0
-      /* Not sure why we were doing this.  We will find collinear set of diagonals later. */
-      /* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */
-      /* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */
-      /* ? Replace with a binary search */
-      i = 0;
-      while (i < this->npositions && positions_temp[i] < goal) {
-	debug7(printf("2 Skipping position %u < goal %u\n",positions_temp[i] - chroffset,goal - chroffset));
-	i++;
-      }
-      this->positions += i;
-      this->npositions -= i;
-      debug7(printf("Remaining: %d positions\n",this->npositions));
-#endif
-    }
-    
-    /* Function surrounded by HAVE_ALLOCA */
-    FREEA(positions_temp);
-  }
-
-  return;
-}
-#endif
-
-
-#else
-/* Non-ALLOCA version */
-
-static void
-fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
-  Sarrayptr_T ptr, lastptr;
-  int nmatches;
-  int i;
-  Univcoord_T low_adj, high_adj;
-  Univcoord_T value3, value2, value1, value0;
-  Univcoord_T *array = sarray->array;
-  Univcoord_T *more_positions;
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-#ifdef HAVE_64_BIT
-  UINT8 pointer;
-#else
-  UINT4 pointer;
-#endif
-  __m128i floor, ceiling, values, compare;
-  int n_prealign, k;
-#endif
-
-
-  debug7(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
-		low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
-  
-  if (this->positions_allocated != NULL) {
-    /* Filled from a previous call */
-    FREE(this->positions_allocated);
-  }
-
-  if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) {
-    this->all_positions = (Univcoord_T *) NULL;
-
-  } else {
-    /* Guess at allocation size */
-#ifdef USE_QSORT
-    this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(GUESS_ALLOCATION * sizeof(Univcoord_T));
-#else
-    this->positions_allocated = this->positions = (Univcoord_T *) MALLOC((GUESS_ALLOCATION + 1) * sizeof(Univcoord_T));
-#endif
-
-    low_adj = low + this->querystart;
-    high_adj = high + this->querystart;
-
-    this->npositions_allocated = this->npositions = 0;
-    ptr = this->initptr;
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-    if (ptr + 3 > this->finalptr) { /* ptr + 4 > (this->finalptr + 1) */
-      /* Handle in normal manner */
-      debug7(printf("Small batch, because %u + 3 <= %u\n",ptr,this->finalptr));
-      while (ptr <= this->finalptr) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-	if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
-	  /* Skip */
-	} else if (value0 > high_adj) {
-	  /* Skip */
-	} else if (this->npositions < GUESS_ALLOCATION) {
-	  debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	  this->positions[this->npositions++] = value0 - this->querystart;
-	} else {
-	  debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
-	  this->npositions++;
-	  lastptr = ptr;		/* saves us from going through the entire sarray below */
-	}
-      }
-
-    } else {
-#ifdef HAVE_64_BIT
-      pointer = (UINT8) &(array[ptr]);
-#else
-      pointer = (UINT4) &(array[ptr]);
-#endif
-      n_prealign = ((16 - (pointer & 0xF))/4) & 0x3;
-      debug7(printf("Initial ptr is at location %p.  Need %d to get to 128-bit boundary\n",
-		    &(array[ptr]),n_prealign));
-
-      /* Initial part */
-      debug7(printf("Initial part:\n"));
-      for (k = 0; k < n_prealign; k++) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj));
-	if ((value0 = CONVERT(array[ptr++])) < low_adj) {
-	  /* Skip */
-	} else if (value0 > high_adj) {
-	  /* Skip */
-	} else if (this->npositions < GUESS_ALLOCATION) {
-	  debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	  this->positions[this->npositions++] = value0 - this->querystart;
-	} else {
-	  debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
-	  this->npositions++;
-	  lastptr = ptr;		/* saves us from going through the entire sarray below */
-	}
-      }
-
-      /* Aligned part */
-      debug7(printf("\nAligned part:\n"));
-      /* Since compare operations not available for unsigned ints, using the fact that
-	 unsigned_gt(a,b) is equivalent to signed_gt(a - 2^31, b - 2^31) */
-      floor = _mm_set1_epi32(low_adj - 1 - 2147483648);
-      ceiling = _mm_set1_epi32(high_adj + 1 - 2147483648);
-      while (ptr + 3 <= this->finalptr) { /* ptr + 4 < this->finalptr + 1 */
-	values = _mm_load_si128((__m128i *) &(array[ptr]));
-	debug7a(print_vector_looking(values,low_adj,high_adj));
-	values = _mm_sub_epi32(values,epi32_convert);
-	compare = _mm_and_si128(_mm_cmpgt_epi32(values,floor),_mm_cmplt_epi32(values,ceiling));
-	if (/*cmp*/_mm_movemask_epi8(compare) == 0x0000) {
-	  /* All results are false, indicating no values between low_adj and high_adj (most common case) */
-	  ptr += 4;
-	} else {
-	  value3 = CONVERT(array[ptr++]);
-	  if (value3 < low_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u < low %u\n",value3,low_adj));
-	  } else if (value3 > high_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u > high %u\n",value3,high_adj));
-	  } else if (this->npositions < GUESS_ALLOCATION) {
-	    debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value3,low_adj,high_adj));
-	    this->positions[this->npositions++] = value3 - this->querystart;
-	  } else {
-	    debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value3,low_adj,high_adj));
-	    this->npositions++;
-	    lastptr = ptr;		/* saves us from going through the entire sarray below */
-	  }
-
-	  value2 = CONVERT(array[ptr++]);
-	  if (value2 < low_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u < low %u\n",value2,low_adj));
-	  } else if (value2 > high_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u > high %u\n",value2,high_adj));
-	  } else if (this->npositions < GUESS_ALLOCATION) {
-	    debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value2,low_adj,high_adj));
-	    this->positions[this->npositions++] = value2 - this->querystart;
-	  } else {
-	    debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value2,low_adj,high_adj));
-	    this->npositions++;
-	    lastptr = ptr;		/* saves us from going through the entire sarray below */
-	  }
-
-	  value1 = CONVERT(array[ptr++]);
-	  if (value1 < low_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u < low %u\n",value1,low_adj));
-	  } else if (value1 > high_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u > high %u\n",value1,high_adj));
-	  } else if (this->npositions < GUESS_ALLOCATION) {
-	    debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value1,low_adj,high_adj));
-	    this->positions[this->npositions++] = value1 - this->querystart;
-	  } else {
-	    debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value1,low_adj,high_adj));
-	    this->npositions++;
-	    lastptr = ptr;		/* saves us from going through the entire sarray below */
-	  }
-
-	  value0 = CONVERT(array[ptr++]);
-	  if (value0 < low_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u < low %u\n",value0,low_adj));
-	  } else if (value0 > high_adj) {
-	    /* Skip */
-	    debug7(printf("Skipping position %u > high %u\n",value0,high_adj));
-	  } else if (this->npositions < GUESS_ALLOCATION) {
-	    debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	    this->positions[this->npositions++] = value0 - this->querystart;
-	  } else {
-	    debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
-	    this->npositions++;
-	    lastptr = ptr;		/* saves us from going through the entire sarray below */
-	  }
-	}
-      }
-
-      /* Final part */
-      debug7(printf("\nFinal part:\n"));
-      while (ptr <= this->finalptr) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-	if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
-	  /* Skip */
-	} else if (value0 > high_adj) {
-	  /* Skip */
-	} else if (this->npositions < GUESS_ALLOCATION) {
-	  debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	  this->positions[this->npositions++] = value0 - this->querystart;
-	} else {
-	  debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
-	  this->npositions++;
-	  lastptr = ptr;		/* saves us from going through the entire sarray below */
-	}
-      }
-    }
-
-#else
-
-    while (ptr <= this->finalptr) {
-      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-      if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
-	/* Skip */
-      } else if (value0 > high_adj) {
-	/* Skip */
-      } else if (this->npositions < GUESS_ALLOCATION) {
-	debug7(printf("Found position %u between low %u and high %u, and within allocation\n",value0,low_adj,high_adj));
-	this->positions[this->npositions++] = value0 - this->querystart;
-      } else {
-	debug7(printf("Found position %u between low %u and high %u, but exceeds allocation\n",value0,low_adj,high_adj));
-	this->npositions++;
-	lastptr = ptr;		/* saves us from going through the entire sarray below */
-      }
-    }
-#endif
-
-    debug7(printf("SIMD method found %d positions\n",this->npositions));
-    if (this->npositions > GUESS_ALLOCATION) {
-      /* Handle the case if we exceeded GUESS_ALLOCATION */
-
-      /* Copy the positions we have stored so far */
-#ifdef USE_QSORT
-      more_positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
-#else
-      more_positions = (Univcoord_T *) MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
-#endif
-      memcpy(more_positions,this->positions,GUESS_ALLOCATION*sizeof(Univcoord_T));
-      FREE(this->positions_allocated);
-      this->positions_allocated = this->positions = more_positions;
-
-      i = GUESS_ALLOCATION;	/* Start count with the number stored */
-      ptr = lastptr;		/* One past the last ptr with a result */
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-      if (this->initptr + 4 < ptr) {
-	while (i < this->npositions) {
-	  if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
-	    /* Skip */
-	  } else if (value0 > high_adj) {
-	    /* Skip */
-	  } else {
-	    this->positions[i++] = value0 - this->querystart;
-	  }
-	}
-
-      } else {
-#ifdef HAVE_64_BIT
-	pointer = (UINT8) &(array[ptr]);
-#else
-	pointer = (UINT4) &(array[ptr]);
-#endif
-	n_prealign = ((pointer & 0xF)/4) & 0x3;
-	debug7(printf("Initial ptr is at location %p.  Need %d to get to 128-bit boundary\n",
-		      &(array[ptr]),n_prealign));
-
-	/* Initial part */
-	while (i < this->npositions) {
-	  if ((value0 = CONVERT(array[--ptr])) < low_adj) {
-	    /* Skip */
-	  } else if (value0 > high_adj) {
-	    /* Skip */
-	  } else {
-	    this->positions[i++] = value0 - this->querystart;
-	  }
-	}
-
-	/* Aligned part */
-	while (i < this->npositions && this->initptr + 4 < ptr) {
-	  values = _mm_load_si128((__m128i *) &(array[ptr-4]));
-	  values = _mm_sub_epi32(values,epi32_convert);
-	  compare = _mm_and_si128(_mm_cmpgt_epi32(values,floor),_mm_cmplt_epi32(values,ceiling));
-	  if (/*cmp*/_mm_movemask_epi8(compare) == 0x0000) {
-	    /* All results are false, indicating no values between low_adj and high_adj (most common case) */
-	    ptr -= 4;
-	  } else {
-	    value0 = CONVERT(array[--ptr]);
-	    if (value0 < low_adj) {
-	      /* Skip */
-	    } else if (value0 > high_adj) {
-	      /* Skip */
-	    } else {
-	      this->positions[i++] = value0 - this->querystart;
-	    }
-
-	    value1 = CONVERT(array[--ptr]);
-	    if (value1 < low_adj) {
-	      /* Skip */
-	    } else if (value1 > high_adj) {
-	      /* Skip */
-	    } else {
-	      this->positions[i++] = value1 - this->querystart;
-	    }
-
-	    value2 = CONVERT(array[--ptr]);
-	    if (value2 < low_adj) {
-	      /* Skip */
-	    } else if (value2 > high_adj) {
-	      /* Skip */
-	    } else {
-	      this->positions[i++] = value2 - this->querystart;
-	    }
-
-	    value3 = CONVERT(array[--ptr]);
-	    if (value3 < low_adj) {
-	      /* Skip */
-	    } else if (value3 > high_adj) {
-	      /* Skip */
-	    } else {
-	      this->positions[i++] = value3 - this->querystart;
-	    }
-	  }
-  	}
-	  
-	/* Last part */
-	while (i < this->npositions) {
-	  if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
-	    /* Skip */
-	  } else if (value0 > high_adj) {
-	    /* Skip */
-	  } else {
-	    this->positions[i++] = value0 - this->querystart;
-	  }
-	}
-      }
-
-#else
-
-      while (i < this->npositions) {
-	if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
-	  /* Skip */
-	} else if (value0 > high_adj) {
-	  /* Skip */
-	} else {
-	  this->positions[i++] = value0 - this->querystart;
-	}
-      }
-#endif
-    }
-
-#ifdef USE_QSORT
-    qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
-#else
-    Sedgesort_uint4(this->positions,this->npositions);
-#endif
-    debug7(printf("Sorting %d positions\n",this->npositions));
-
-#if 0
-    /* Not sure why we were doing this.  We will find collinear set of diagonals later. */
-    /* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */
-    /* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */
-    i = 0;
-    while (i < this->npositions && this->positions[i] < goal) {
-      debug7(printf("3 Skipping position %u < goal %u\n",this->positions[i] - chroffset,goal - chroffset));
-      i++;
-    }
-    this->positions += i;
-    this->npositions -= i;
-    debug7(printf("Remaining: %d positions\n",this->npositions));
-#endif
-  }
-
-  return;
-}
-  
-#endif
-
-
-/* ? Returns first entry that is >= goal */
-static int
-binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) {
-  int middlei;
-
-  debug10(printf("entered binary search with lowi=%d, highi=%d, goal=%u\n",lowi,highi,goal));
-
-  while (lowi < highi) {
-    middlei = lowi + ((highi - lowi) / 2);
-    debug10(printf("  binary: %d:%u %d:%u %d:%u   vs. %u\n",
-		   lowi,positions[lowi],middlei,positions[middlei],
-		   highi-1,positions[highi-1],goal));
-    if (goal < positions[middlei]) {
-      highi = middlei;
-    } else if (goal > positions[middlei]) {
-      lowi = middlei + 1;
-    } else {
-      debug10(printf("binary search returns %d\n",middlei));
-      return middlei;
-    }
-  }
-
-  debug10(printf("binary search returns %d\n",highi));
-  return highi;
-}
-
-
-
-/* Used upon second call to Elt_fill_positions_filtered */
-static void
-fill_positions_filtered_again (Elt_T this, Univcoord_T low, Univcoord_T high) {
-  int lowi, highi, i;
-
-
-  debug(printf("Entered fill_positions_filtered_again with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
-	       low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
-
-  if (this->positions_allocated != NULL) {
-    /* Filled from a previous call */
-    FREE(this->positions_allocated);
-  }
-
-  if (this->n_all_positions == 0) {
-    this->positions_allocated = this->positions = (Univcoord_T *) NULL;
-    this->npositions_allocated = this->npositions = 0;
-
-  } else {
-    /* low_adj and high_adj are inclusive */
-    lowi = binary_search(/*lowi*/0,/*highi*/this->n_all_positions,this->all_positions,/*goal*/low + this->querystart);
-    highi = binary_search(lowi,/*highi*/this->n_all_positions,this->all_positions,/*goal*/high + this->querystart + 1) - 1;
-    if ((this->npositions_allocated = this->npositions = highi - lowi + 1) == 0) {
-      this->positions_allocated = this->positions = (Univcoord_T *) NULL;
-
-    } else {
-      this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
-      memcpy(this->positions,&(this->all_positions[lowi]),this->npositions*sizeof(Univcoord_T));
-      for (i = 0; i < this->npositions; i++) {
-	this->positions[i] -= this->querystart;
-      }
-    }
-  }
-
-  return;
-}
-
-
-
-static void
-Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T low, Univcoord_T high,
-			     Compress_T query_compress, bool plusp, int genestrand, bool multiplep) {
-  int nmatches;
-#ifdef DEBUG8
-  Univcoord_T *positions_std;
-  int npositions_std;
-#endif
-#ifdef WORDS_BIGENDIAN
-  int i;
-#endif
-
-
-  if (this->nmatches == 0 || this->finalptr - this->initptr + 1 > EXCESS_SARRAY_HITS) {
-    /* Check for an extension */
-    nmatches = Genome_consecutive_matches_rightward(query_compress,/*left*/goal,/*pos5*/this->querystart,
-						    /*pos3*/this->queryend + 1,plusp,genestrand);
-    debug7(printf("rightward at goal %u from %d to %d shows %d matches (want %d)\n",goal,this->querystart,this->queryend,
-		  nmatches,this->queryend - this->querystart + 1));
-
-    if (this->positions_allocated != NULL) {
-      /* Filled from a previous call */
-      FREE(this->positions_allocated);
-    }
-
-    if (nmatches == this->queryend - this->querystart + 1) {
-      /* Create a position that works */
-      this->positions_allocated = this->positions = (Univcoord_T *) CALLOC(1,sizeof(Univcoord_T));
-      this->positions[0] = goal;
-      this->npositions_allocated = this->npositions = 1;
-    } else {
-      this->positions_allocated = this->positions = (Univcoord_T *) NULL;
-      this->npositions_allocated = this->npositions = 0;
-    }
-    return;			/* Don't even try other methods */
-
-  } else if (multiplep == true) {
-    if (this->status == ELT_VIRGIN) {
-      /* Just go directly to sorting method, and skip SIMD filtering method */
-      this->status = ELT_UNSORTED;
-    }
-  }
-
-  if (this->status == ELT_VIRGIN) {
-    fill_positions_filtered_first(this,sarray,low,high);
-    if (this->finalptr - this->initptr + 1 > EXCESS_SARRAY_HITS) {
-      /* Just keep filtering using SIMD method */
-      this->all_positions = (Univcoord_T *) NULL;
-      this->n_all_positions = 0;
-    } else {
-      this->status = ELT_UNSORTED;
-    }
-
-  } else if (this->status == ELT_UNSORTED) {
-    if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->npositions > EXCESS_SARRAY_HITS*/) {
-      this->all_positions = (Univcoord_T *) NULL;
-      this->n_all_positions = 0;
-    } else {
-#ifdef USE_QSORT
-      this->all_positions = (Univcoord_T *) MALLOC(this->n_all_positions*sizeof(Univcoord_T));
-#else
-      this->all_positions = (Univcoord_T *) MALLOC((this->n_all_positions + 1) * sizeof(Univcoord_T));
-#endif
-#ifdef WORDS_BIGENDIAN
-      for (i = 0; i < this->n_all_positions; i++) {
-	this->all_positions[i] = Bigendian_convert_uint(sarray->array[this->initptr+i]);
-      }
-#else
-      memcpy(this->all_positions,&(sarray->array[this->initptr]),this->n_all_positions*sizeof(Univcoord_T));
-#endif
-#ifdef USE_QSORT
-      qsort(this->all_positions,this->n_all_positions,sizeof(Univcoord_T),Univcoord_compare);
-#else
-      Sedgesort_uint4(this->all_positions,this->n_all_positions);
-#endif
-    }
-#ifdef DEBUG10
-    for (i = 0; i < this->n_all_positions; i++) {
-      printf("%d: %u\n",i,this->all_positions[i]);
-    }
-    printf("\n");
-#endif
-
-    fill_positions_filtered_again(this,low,high);
-    this->status = ELT_SORTED;
-
-  } else {
-    /* ELT_SORTED */
-    fill_positions_filtered_again(this,low,high);
-  }
-
-#ifdef DEBUG8
-  positions_std = fill_positions_std(&npositions_std,/*low_adj*/low + this->querystart,
-				     /*high_adj*/high + this->querystart,
-				     this->initptr,this->finalptr,this->querystart,sarray->array);
-  positions_compare(this->positions_allocated,this->npositions_allocated,positions_std,npositions_std);
-  FREE(positions_std);
-#endif
-
-  return;
-}
-
-
-#if 0
-static void
-Elt_dump_list (List_T list) {
-  List_T p;
-  Elt_T elt;
-  int maxn = 0, k;
-
-  for (p = list; p != NULL; p = p->rest) {
-    elt = (Elt_T) p->first;
-    if (elt->npositions > maxn) {
-      maxn = elt->npositions;
-    }
-  }
-
-  for (k = 0; k < maxn /* && k < 100 */; k++) {
-    for (p = list; p != NULL; p = p->rest) {
-      elt = (Elt_T) p->first;
-      if (k >= elt->npositions) {
-	printf("\t");
-      } else {
-	printf("%d..%d:%u\t",elt->querystart,elt->queryend,elt->positions[k]);
-      }
-    }
-    printf("\n");
-  }
-  printf("\n");
-
-  return;
-}
-#endif
-
-#ifdef DEBUG
-static void
-Elt_dump (Elt_T elt) {
-  int k;
-
-  printf("Elt %d..%d (SA %u+%d) with %d positions:\n",
-	 elt->querystart,elt->queryend,elt->initptr,elt->finalptr - elt->initptr,elt->npositions);
-  for (k = 0; k < elt->npositions; k++) {
-    printf("  %u\n",elt->positions[k]);
-  }
-  printf("\n");
-
-  return;
-}
-#endif
-
-
-
-#define add_bounded(x,plusterm,highbound) ((x + (plusterm) >= highbound) ? (highbound - 1) : x + (plusterm))
-#define subtract_bounded(x,minusterm,lowbound) ((x < lowbound + (minusterm)) ? lowbound : x - (minusterm))
-
-
-#if 0
-/* Copied to stage1hr.c */
-static int
-donor_match_length_cmp (const void *a, const void *b) {
-  Stage3end_T x = * (Stage3end_T *) a;
-  Stage3end_T y = * (Stage3end_T *) b;
-  
-  int x_length = Substring_match_length_orig(Stage3end_substring_donor(x));
-  int y_length = Substring_match_length_orig(Stage3end_substring_donor(y));
-
-  if (x_length < y_length) {
-    return -1;
-  } else if (y_length < x_length) {
-    return +1;
-  } else {
-    return 0;
-  }
-}
-#endif
-
-
-#if 0
-/* Copied to stage1hr.c */
-static int
-acceptor_match_length_cmp (const void *a, const void *b) {
-  Stage3end_T x = * (Stage3end_T *) a;
-  Stage3end_T y = * (Stage3end_T *) b;
-  
-  int x_length = Substring_match_length_orig(Stage3end_substring_acceptor(x));
-  int y_length = Substring_match_length_orig(Stage3end_substring_acceptor(y));
-
-  if (x_length < y_length) {
-    return -1;
-  } else if (y_length < x_length) {
-    return +1;
-  } else {
-    return 0;
-  }
-}
-#endif
-
-
-/* Also defined in stage1hr.c */
-#define add_bounded(x,plusterm,highbound) ((x + (plusterm) >= highbound) ? (highbound - 1) : x + (plusterm))
-#define subtract_bounded(x,minusterm,lowbound) ((x < lowbound + (minusterm)) ? lowbound : x - (minusterm))
-
-#if 0
-/* Previously called collect_elt_matches */
-static bool
-solve_twopart (int *found_score, List_T *subs, List_T *indels, List_T *ambiguous, List_T *singlesplicing,
-	       int querystart_same, int queryend_same,
-	       Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
-	       Chrpos_T chrlength, Univcoord_T goal, List_T rightward_set, List_T leftward_set,
-	       int querylength, Compress_T query_compress,
-	       bool plusp, int genestrand, bool first_read_p, int nmisses_allowed) {
-  bool twopartp = false;
-  List_T set, p;
-  Stage3end_T hit, *hitarray;
-  Elt_T elt;
-  Univcoord_T left, left1, left2, *array;
-  Uintlist_T difflist = NULL;	/* Won't work with LARGE_GENOMES */
-  int nmismatches, nindels;
-  int nsame, ndiff;
-  int querystart_diff, queryend_diff, indel_pos;
-#if 0
-  int nmismatches1, nmismatches2;
-#endif
-
-  List_T accepted_hits, rejected_hits;
-  List_T spliceends_sense, spliceends_antisense, lowprob;
-  List_T donor_hits, acceptor_hits;
-  int donor_length, acceptor_length;
-  int nhits, nspliceends_sense, nspliceends_antisense, n_good_spliceends;
-  int best_nmismatches, nmismatches_donor, nmismatches_acceptor;
-  double best_prob, prob;
-  Substring_T donor, acceptor;
-
-  Uintlist_T ambcoords;
-  Intlist_T amb_knowni, amb_nmismatches;
-  Doublelist_T amb_probs;
-
-  int segmenti_donor_nknown, segmentj_acceptor_nknown,
-    segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
-  int k, j, i, n;
-  bool segmenti_usedp, segmentj_usedp;
-  bool foundp;
-  int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos,  *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos,
-    *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni;
-
-
-  /* Potential success */
-  debug7(printf("  successful candidate found\n"));
-  if (goal < (Univcoord_T) querylength) {
-    debug7(printf("  Goes over beginning of chromosome\n"));
-    return false;
-  } else if (goal + querylength > chrhigh) {
-    debug7(printf("  Goes over end of chromosome\n"));
-    return false;
-  } else {
-    left = goal /* - querylength */;
-
-#ifdef HAVE_ALLOCA
-    if (querylength <= MAX_STACK_READLENGTH) {
-      segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-      segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-      segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-      segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-      segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-      segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-      segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-      segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-    } else {
-      segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-      segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-      segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-      segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-      segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-      segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-      segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-      segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    }
-#else
-    segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-#endif
-  }
-
-
-  nsame = ndiff = 0;
-  querystart_diff = querylength;
-  queryend_diff = 0;
-  for (set = rightward_set; set /* != NULL */; set = set->rest) {
-    elt = (Elt_T) set->first;
-    debug7(printf("%d..%d:%u vs %u: ",elt->querystart,elt->queryend,elt->positions[-1],goal));
-    /* assert(elt->status != ELT_VIRGIN); */
-    if (elt->positions[-1] == goal) {
-      debug7(printf("same\n"));
-      if (elt->querystart < querystart_same) {
-	querystart_same = elt->querystart;
-      }
-      if (elt->queryend > queryend_same) {
-	queryend_same = elt->queryend;
-      }
-      nsame++;
-
-    } else {
-#if 0
-      /* Assertion holds because of values for low and high given to Elt_fill_positions_filtered */
-      assert(elt->positions[-1] + max_insertionlen + overall_max_distance > goal &&
-	     elt->positions[-1] < goal + max_insertionlen + overall_max_distance);
-#endif
-
-      debug7(printf("diff (npositions %d)\n",elt->npositions));
-      debug7(printf("Pushing position %u\n",elt->positions[-1]));
-      difflist = Uintlist_push(difflist,elt->positions[-1]);
-      for (i = 0; i < elt->npositions; i++) {
-	debug7(printf("Pushing position %u\n",elt->positions[i]));
-	difflist = Uintlist_push(difflist,elt->positions[i]);
-      }
-      if (elt->querystart < querystart_diff) {
-	querystart_diff = elt->querystart;
-      }
-      if (elt->queryend > queryend_diff) {
-	queryend_diff = elt->queryend;
-      }
-      ndiff++;
-    }
-  }
-
-  for (set = leftward_set; set /* != NULL */; set = set->rest) {
-    elt = (Elt_T) set->first;
-    debug7(printf("%d..%d:%u vs %u: ",elt->querystart,elt->queryend,elt->positions[-1],goal));
-    /* assert(elt->status != ELT_VIRGIN); */
-    if (elt->positions[-1] == goal) {
-      debug7(printf("same\n"));
-      if (elt->querystart < querystart_same) {
-	querystart_same = elt->querystart;
-      }
-      if (elt->queryend > queryend_same) {
-	queryend_same = elt->queryend;
-      }
-      nsame++;
-
-    } else {
-#if 0
-      /* Assertion holds because of values for low and high given to Elt_fill_positions_filtered */
-      assert(elt->positions[-1] + max_insertionlen + overall_max_distance > goal &&
-	     elt->positions[-1] < goal + max_insertionlen + overall_max_distance);
-#endif
-
-      debug7(printf("diff (npositions %d)\n",elt->npositions));
-      debug7(printf("Pushing position %u\n",elt->positions[-1]));
-      difflist = Uintlist_push(difflist,elt->positions[-1]);
-      for (i = 0; i < elt->npositions; i++) {
-	debug7(printf("Pushing position %u\n",elt->positions[i]));
-	difflist = Uintlist_push(difflist,elt->positions[i]);
-      }
-      if (elt->querystart < querystart_diff) {
-	querystart_diff = elt->querystart;
-      }
-      if (elt->queryend > queryend_diff) {
-	queryend_diff = elt->queryend;
-      }
-      ndiff++;
-    }
-  }
-
-  debug7(printf("Got %d same, %d diff\n",nsame,ndiff));
-
-  if (ndiff == 0) {
-    /* sub */
-    debug7(printf("  Testing in entire query\n"));
-    nmismatches = Genome_count_mismatches_substring(query_compress,left,/*pos5*/0,/*pos3*/querylength,
-						    plusp,genestrand);
-    debug7(printf("nmismatches = %d (vs %d misses allowed)\n",nmismatches,nmisses_allowed));
-
-    if (nmismatches > nmisses_allowed) {
-      debug7(printf("Result: too many mismatches\n"));
-
-    } else {
-      debug7(printf("Result: successful hit saved\n"));
-      if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,
-					    left,/*genomiclength*/querylength,
-					    query_compress,plusp,genestrand,first_read_p,
-					    chrnum,chroffset,chrhigh,chrlength,
-					    /*sarrayp*/true)) != NULL) {
-	debug1(printf("1. Reporting hit with %d mismatches vs %d allowed\n",nmismatches,nmisses_allowed));
-	*subs = List_push(*subs,(void *) hit);
-	twopartp = true;
-      }
-    }
-    assert(difflist == NULL);
-
-  } else if (querystart_same == 0 && queryend_diff == querylength - 1) {
-    left1 = left;
-    indel_pos = queryend_same + 1;
-    debug7(printf("same is at %u from %d to %d\n",left,querystart_same,queryend_same));
-
-    n = Uintlist_length(difflist);
-#ifdef HAVE_ALLOCA
-#ifdef USE_QSORT
-    array = (UINT4 *) MALLOCA(n * sizeof(UINT4));
-#else
-    array = (UINT4 *) MALLOCA((n + 1) * sizeof(UINT4));
-#endif
-#else
-#ifdef USE_QSORT
-    array = (UINT4 *) MALLOC(n * sizeof(UINT4));
-#else
-    array = (UINT4 *) MALLOC((n + 1) * sizeof(UINT4));
-#endif
-#endif
-
-    Uintlist_fill_array_and_free(array,&difflist);
-#ifdef USE_QSORT
-    qsort(array,n,sizeof(Univcoord_T),Univcoord_compare);
-#else
-    Sedgesort(array,n);
-#endif
-    debug7(printf("Have %d matching diffs\n",n));
-
-    spliceends_sense = spliceends_antisense = (List_T) NULL;
-    lowprob = (List_T) NULL;
-    for (i = 0; i < n; i++) {
-      left2 = array[i];
-      debug7(printf("diff %d/%d is at %u, from %d to %d\n",i,n,left2,querystart_diff - 1,queryend_diff));
-
-      if (i > 0 && left2 == array[i-1]) {
-	/* Already processed */
-
-      } else if (left2 + querylength >= chrhigh) {
-	/* Splice or deletion would extend to next chromosome */
-
-      } else if (left2 > left1 + max_deletionlen) {
-	debug7(printf("A splice..."));
-
-	segmenti_donor_nknown = segmenti_antiacceptor_nknown = 0;
-	if (nsplicesites > 0 &&
-	    Splicetrie_splicesite_p(left1,/*pos5*/1,/*pos3*/querylength) == true) {
-	  j = binary_search(0,nsplicesites,splicesites,left1);
-	  while (j < nsplicesites && splicesites[j] < left1 + querylength) {
-	    if (splicetypes[j] == DONOR) {
-	      debug4s(printf("Setting known donor %d for segmenti at %u\n",j,splicesites[j]));
-	      segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - left1;
-	      segmenti_donor_knowni[segmenti_donor_nknown++] = j;
-	    } else if (splicetypes[j] == ANTIACCEPTOR) {
-	      debug4s(printf("Setting known antiacceptor %d for segmenti at %u\n",j,splicesites[j]));
-	      segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - left1;
-	      segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
-	    }
-	    j++;
-	  }
-	}
-	segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
-	segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
-	  
-	segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
-	if (nsplicesites > 0 &&
-	    Splicetrie_splicesite_p(left2,/*pos5*/1,/*pos3*/querylength) == true) {
-	  j = binary_search(0,nsplicesites,splicesites,left2);
-	  while (j < nsplicesites && splicesites[j] < left2 + querylength) {
-	    if (splicetypes[j] == ACCEPTOR) {
-	      debug4s(printf("Setting known acceptor %d for segmentj at %u\n",j,splicesites[j]));
-	      segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - left2;
-	      segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
-	    } else if (splicetypes[j] == ANTIDONOR) {
-	      debug4s(printf("Setting known antidonor %d for segmentj at %u\n",j,splicesites[j]));
-	      segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - left2;
-	      segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
-	    }
-	    j++;
-	  }
-	}
-	segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
-	segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
-
-	/* nspliceends = 0; */
-	assert(segmenti_donor_knownpos[0] == querylength);
-	assert(segmentj_acceptor_knownpos[0] == querylength);
-	assert(segmentj_antidonor_knownpos[0] == querylength);
-	assert(segmenti_antiacceptor_knownpos[0] == querylength);
-
-	spliceends_sense =
-	  Splice_solve_single_sense(&(*found_score),&nspliceends_sense,spliceends_sense,&lowprob,
-				    &segmenti_usedp,&segmentj_usedp,
-				    /*segmenti_left*/left1,/*segmentj_left*/left2,
-				    chrnum,chroffset,chrhigh,chrlength,
-				    chrnum,chroffset,chrhigh,chrlength,
-				    querylength,query_compress,
-				    segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-				    segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-				    segmenti_donor_knowni,segmentj_acceptor_knowni,
-				    segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-				    segmenti_donor_nknown,segmentj_acceptor_nknown,
-				    segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-				    splicing_penalty,/*max_mismatches_allowed*/1000,
-				    plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
-				    /*sarrayp*/true);
-
-	assert(segmenti_donor_knownpos[0] == querylength);
-	assert(segmentj_acceptor_knownpos[0] == querylength);
-	assert(segmentj_antidonor_knownpos[0] == querylength);
-	assert(segmenti_antiacceptor_knownpos[0] == querylength);
-
-	spliceends_antisense =
-	  Splice_solve_single_antisense(&(*found_score),&nspliceends_antisense,spliceends_antisense,&lowprob,
-					&segmenti_usedp,&segmentj_usedp,
-					/*segmenti_left*/left1,/*segmentj_left*/left2,
-					chrnum,chroffset,chrhigh,chrlength,
-					chrnum,chroffset,chrhigh,chrlength,
-					querylength,query_compress,
-					segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-					segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-					segmenti_donor_knowni,segmentj_acceptor_knowni,
-					segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-					segmenti_donor_nknown,segmentj_acceptor_nknown,
-					segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-					splicing_penalty,/*max_mismatches_allowed*/1000,
-					plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
-					/*sarrayp*/true);
-
-      } else if (left2 > left1) {
-	nindels = left2 - left1;
-	debug7(printf("B deletion of %d bp relative to max_deletionlen %d (nmisses allowed %d)...",
-		      nindels,max_deletionlen,nmisses_allowed));
-	if ((indel_pos < 17 || querylength - indel_pos < 17) && nindels > max_end_deletions) {
-	  /* Allow regular GSNAP algorithm to find this */
-	  debug7(printf("too long for end deletion"));
-	} else {
-#if 0
-	  nmismatches1 = Genome_count_mismatches_substring(query_compress,left1,/*pos5*/0,/*pos3*/indel_pos,
-							   plusp,genestrand);
-	  nmismatches2 = Genome_count_mismatches_substring(query_compress,left2,/*pos5*/indel_pos,
-							   /*pos3*/querylength,plusp,genestrand);
-	  if (plusp == true) {
-	    query_indel_pos = indel_pos;
-	  } else {
-	    query_indel_pos = querylength - indel_pos;
-	  }
-	  /* genomiclength = querylength+nindels; */
-	  if ((hit = Stage3end_new_deletion(&(*found_score),nindels,query_indel_pos,
-					    nmismatches1,nmismatches2,left1,
-					    query_compress,querylength,plusp,genestrand,first_read_p,
-					    chrnum,chroffset,chrhigh,chrlength,
-					    /*indel_penalty*/2,/*sarrayp*/true)) != NULL) {
-	    debug7(printf("successful"));
-	    *indels = List_push(*indels,(void *) hit);
-	    twopartp = true;
-	  }
-#else
-	  *indels = Indel_solve_middle_deletion(&foundp,&(*found_score),&nhits,*indels,
-						/*left*/left1,chrnum,chroffset,chrhigh,chrlength,
-						/*indels*/-nindels,query_compress,querylength,
-						nmisses_allowed,plusp,genestrand,/*sarray*/true);
-	  debug7(
-		 if (foundp == true) {
-		   printf("successful");
-		 }
-		 );
-#endif
-	}
-	debug7(printf("\n"));
-      
-      } else if (left2 < left1) {
-	nindels = left1 - left2;
-	if (nindels >= indel_pos || indel_pos + nindels >= querylength) {
-	  debug7(printf("X insertion of %d bp too long\n",nindels));
-	} else {
-	  debug7(printf("C insertion of %d bp (nmisses allowed %d)...",nindels,nmisses_allowed));
-#if 0
-	  nmismatches1 = Genome_count_mismatches_substring(query_compress,left1,/*pos5*/0,/*pos3*/indel_pos-nindels,
-							   plusp,genestrand);
-	  nmismatches2 = Genome_count_mismatches_substring(query_compress,left2,/*pos5*/indel_pos+nindels,
-							   /*pos3*/querylength,plusp,genestrand);
-	  if (plusp == true) {
-	    query_indel_pos = indel_pos;
-	  } else {
-	    query_indel_pos = querylength - indel_pos - nindels;
-	  }
-	  /* genomiclength = querylength-nindels; */
-	  if ((hit = Stage3end_new_insertion(&(*found_score),nindels,query_indel_pos,
-					     nmismatches1,nmismatches2,left1,
-					     query_compress,querylength,plusp,genestrand,first_read_p,
-					     chrnum,chroffset,chrhigh,chrlength,
-					     /*indel_penalty*/2,/*sarrayp*/true)) != NULL) {
-	    debug7(printf("successful"));
-	    *indels = List_push(*indels,(void *) hit);
-	    twopartp = true;
-	  }
-#else
-	  *indels = Indel_solve_middle_insertion(&foundp,&(*found_score),&nhits,*indels,
-						 /*left*/left1,chrnum,chroffset,chrhigh,chrlength,
-						 /*indels*/+nindels,query_compress,querylength,nmisses_allowed,
-						 plusp,genestrand,/*sarrayp*/true);
-	  debug7(
-		 if (foundp == true) {
-		   printf("successful");
-		 }
-		 );
-#endif
-	  debug7(printf("\n"));
-	}
-      }
-    }
-
-    if (spliceends_sense != NULL) {
-      /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
-      best_nmismatches = querylength;
-      best_prob = 0.0;
-      for (p = spliceends_sense; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
-		      Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
-		      Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
-		      Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
-		      Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
-		      Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
-		      Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
-	  best_nmismatches = nmismatches;
-	}
-	if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
-	  best_prob = prob;
-	}
-      }
-
-      n_good_spliceends = 0;
-      accepted_hits = rejected_hits = (List_T) NULL;
-      for (p = spliceends_sense; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
-	    Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
-	  debug7(printf("accepting distance %d, probabilities %f and %f\n",
-			Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
-			Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	  n_good_spliceends += 1;
-	  accepted_hits = List_push(accepted_hits,(void *) hit);
-	} else {
-	  rejected_hits = List_push(rejected_hits,(void *) hit);
-	}
-      }
-
-      if (n_good_spliceends == 0) {
-	/* Conjunction is too strict.  Allow for disjunction instead. */
-	List_free(&rejected_hits);
-	for (p = spliceends_sense; p != NULL; p = List_next(p)) {
-	  hit = (Stage3end_T) List_head(p);
-	  if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
-	      Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
-	    debug7(printf("accepting distance %d, probabilities %f and %f\n",
-			  Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
-			  Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	    n_good_spliceends += 1;
-	    accepted_hits = List_push(accepted_hits,(void *) hit);
-	  } else {
-	    rejected_hits = List_push(rejected_hits,(void *) hit);
-	  }
-	}
-      }
-
-      for (p = rejected_hits; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	Stage3end_free(&hit);
-      }
-      List_free(&rejected_hits);
-      List_free(&spliceends_sense);
-
-      if (n_good_spliceends == 1) {
-	*singlesplicing = List_push(*singlesplicing,List_head(accepted_hits));
-	nhits += 1;
-	List_free(&accepted_hits);
-
-      } else {
-	/* 1.  Multiple hits, sense, left1 */
-	debug7(printf("multiple hits with best prob, sense\n"));
-	donor_hits = acceptor_hits = (List_T) NULL;
-	if (plusp == true) {
-	  for (p = accepted_hits; p != NULL; p = List_next(p)) {
-	    hit = (Stage3end_T) List_head(p);
-	    donor = Stage3end_substring_donor(hit);
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    if (Substring_genomicstart(donor) == left1) {
-	      donor_hits = List_push(donor_hits,(void *) hit);
-	    } else if (Substring_genomicstart(acceptor) == left1) {
-	      acceptor_hits = List_push(acceptor_hits,(void *) hit);
-	    } else {
-	      Stage3end_free(&hit);
-	    }
-	  }
-	} else {
-	  for (p = accepted_hits; p != NULL; p = List_next(p)) {
-	    hit = (Stage3end_T) List_head(p);
-	    donor = Stage3end_substring_donor(hit);
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    if (Substring_genomicend(donor) == left1) {
-	      donor_hits = List_push(donor_hits,(void *) hit);
-	    } else if (Substring_genomicend(acceptor) == left1) {
-	      acceptor_hits = List_push(acceptor_hits,(void *) hit);
-	    } else {
-	      Stage3end_free(&hit);
-	    }
-	  }
-	}
-
-	if (donor_hits != NULL) {
-	  hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
-	  qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
-	  i = 0;
-	  while (i < n) {
-	    hit = hitarray[i];
-	    donor = Stage3end_substring_donor(hit);
-	    donor_length = Substring_match_length_orig(donor);
-	    j = i + 1;
-	    while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
-	      j++;
-	    }
-	    if (j == i + 1) {
-	      *singlesplicing = List_push(*singlesplicing,(void *) hit);
-	    } else {
-	      ambcoords = (Uintlist_T) NULL;
-	      amb_knowni = (Intlist_T) NULL;
-	      amb_nmismatches = (Intlist_T) NULL;
-	      amb_probs = (Doublelist_T) NULL;
-
-	      for (k = i; k < j; k++) {
-		acceptor = Stage3end_substring_acceptor(hitarray[k]);
-#ifdef LARGE_GENOMES
-		ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
-#else
-		ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
-#endif
-		amb_knowni = Intlist_push(amb_knowni,-1);
-		amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
-		amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
-	      }
-
-	      nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
-	      prob = best_prob - Substring_siteD_prob(donor);
-	      *ambiguous = List_push(*ambiguous,
-				     (void *) Stage3end_new_splice(&(*found_score),
-								   /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
-								   donor,/*acceptor*/NULL,/*distance*/0U,
-								   /*shortdistancep*/false,/*penalty*/0,querylength,
-								   /*ambcoords_donor*/NULL,ambcoords,
-								   /*amb_knowni_donor*/NULL,amb_knowni,
-								   /*amb_nmismatches_donor*/NULL,amb_nmismatches,
-								   /*amb_probs_donor*/NULL,amb_probs,
-								   /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
-								   Stage3end_sensedir(hit),/*sarrayp*/true));
-	      twopartp = true;
-	      Doublelist_free(&amb_probs);
-	      Intlist_free(&amb_nmismatches);
-	      Intlist_free(&amb_knowni);
-	      Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
-	      for (k = i; k < j; k++) {
-		hit = hitarray[k];
-		Stage3end_free(&hit);
-	      }
-	    }
-
-	    i = j;
-	  }
-	  FREE(hitarray);
-	  List_free(&donor_hits);
-	}
-
-	if (acceptor_hits != NULL) {
-	  hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
-	  qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
-	  i = 0;
-	  while (i < n) {
-	    hit = hitarray[i];
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    acceptor_length = Substring_match_length_orig(acceptor);
-	    j = i + 1;
-	    while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
-	      j++;
-	    }
-	    if (j == i + 1) {
-	      *singlesplicing = List_push(*singlesplicing,(void *) hit);
-	    } else {
-	      ambcoords = (Uintlist_T) NULL;
-	      amb_knowni = (Intlist_T) NULL;
-	      amb_nmismatches = (Intlist_T) NULL;
-	      amb_probs = (Doublelist_T) NULL;
-
-	      for (k = i; k < j; k++) {
-		donor = Stage3end_substring_donor(hitarray[k]);
-#ifdef LARGE_GENOMES
-		ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
-#else
-		ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
-#endif
-		amb_knowni = Intlist_push(amb_knowni,-1);
-		amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
-		amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
-	      }
-	    
-	      nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
-	      prob = best_prob - Substring_siteA_prob(acceptor);
-	      *ambiguous = List_push(*ambiguous,
-				     (void *) Stage3end_new_splice(&(*found_score),
-								   nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
-								   /*donor*/NULL,acceptor,/*distance*/0U,
-								   /*shortdistancep*/false,/*penalty*/0,querylength,
-								   ambcoords,/*ambcoords_acceptor*/NULL,
-								   amb_knowni,/*amb_knowni_acceptor*/NULL,
-								   amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
-								   amb_probs,/*amb_probs_acceptor*/NULL,
-								   /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
-								   Stage3end_sensedir(hit),/*sarrayp*/true));
-	      twopartp = true;
-	      Doublelist_free(&amb_probs);
-	      Intlist_free(&amb_nmismatches);
-	      Intlist_free(&amb_knowni);
-	      Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
-	      for (k = i; k < j; k++) {
-		hit = hitarray[k];
-		Stage3end_free(&hit);
-	      }
-	    }
-
-	    i = j;
-	  }
-	  FREE(hitarray);
-	  List_free(&acceptor_hits);
-	}
-
-	List_free(&accepted_hits);
-      }
-    }
-
-    if (spliceends_antisense != NULL) {
-      /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
-      best_nmismatches = querylength;
-      best_prob = 0.0;
-      for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
-		      Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
-		      Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
-		      Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
-		      Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
-		      Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
-		      Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
-	  best_nmismatches = nmismatches;
-	}
-	if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
-	  best_prob = prob;
-	}
-      }
-
-      n_good_spliceends = 0;
-      accepted_hits = rejected_hits = (List_T) NULL;
-      for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
-	    Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
-	  debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
-			Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
-			Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
-			Substring_siteD_prob(Stage3end_substring_donor(hit)),
-			Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	  n_good_spliceends += 1;
-	  accepted_hits = List_push(accepted_hits,(void *) hit);
-	} else {
-	  rejected_hits = List_push(rejected_hits,(void *) hit);
-	}
-      }
-
-      if (n_good_spliceends == 0) {
-	/* Conjunction is too strict.  Allow for disjunction instead. */
-	List_free(&rejected_hits);
-	for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
-	  hit = (Stage3end_T) List_head(p);
-	  if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
-	      Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
-	    debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
-			  Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
-			  Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
-			  Substring_siteD_prob(Stage3end_substring_donor(hit)),
-			  Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	    n_good_spliceends += 1;
-	    accepted_hits = List_push(accepted_hits,(void *) hit);
-	  } else {
-	    rejected_hits = List_push(rejected_hits,(void *) hit);
-	  }
-	}
-      }
-
-      for (p = rejected_hits; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	Stage3end_free(&hit);
-      }
-      List_free(&rejected_hits);
-      List_free(&spliceends_antisense);
-
-      if (n_good_spliceends == 1) {
-	*singlesplicing = List_push(*singlesplicing,List_head(accepted_hits));
-	nhits += 1;
-	List_free(&accepted_hits);
-
-      } else {
-	/* 2.  Multiple hits, antisense, left1 */
-	debug7(printf("multiple hits with best prob, antisense\n"));
-	donor_hits = acceptor_hits = (List_T) NULL;
-	if (plusp == true) {
-	  for (p = accepted_hits; p != NULL; p = List_next(p)) {
-	    hit = (Stage3end_T) List_head(p);
-	    donor = Stage3end_substring_donor(hit);
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    if (Substring_genomicstart(donor) == left1) {
-	      donor_hits = List_push(donor_hits,(void *) hit);
-	    } else if (Substring_genomicstart(acceptor) == left1) {
-	      acceptor_hits = List_push(acceptor_hits,(void *) hit);
-	    } else {
-	      Stage3end_free(&hit);
-	    }
-	  }
-	} else {
-	  for (p = accepted_hits; p != NULL; p = List_next(p)) {
-	    hit = (Stage3end_T) List_head(p);
-	    donor = Stage3end_substring_donor(hit);
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    if (Substring_genomicend(donor) == left1) {
-	      donor_hits = List_push(donor_hits,(void *) hit);
-	    } else if (Substring_genomicend(acceptor) == left1) {
-	      acceptor_hits = List_push(acceptor_hits,(void *) hit);
-	    } else {
-	      Stage3end_free(&hit);
-	    }
-	  }
-	}
-
-	if (donor_hits != NULL) {
-	  hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
-	  qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
-	  i = 0;
-	  while (i < n) {
-	    hit = hitarray[i];
-	    donor = Stage3end_substring_donor(hit);
-	    donor_length = Substring_match_length_orig(donor);
-	    j = i + 1;
-	    while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
-	      j++;
-	    }
-	    if (j == i + 1) {
-	      *singlesplicing = List_push(*singlesplicing,(void *) hit);
-	    } else {
-	      ambcoords = (Uintlist_T) NULL;
-	      amb_knowni = (Intlist_T) NULL;
-	      amb_nmismatches = (Intlist_T) NULL;
-	      amb_probs = (Doublelist_T) NULL;
-	      
-	      for (k = i; k < j; k++) {
-		acceptor = Stage3end_substring_acceptor(hitarray[k]);
-#ifdef LARGE_GENOMES
-		ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
-#else
-		ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
-#endif
-		amb_knowni = Intlist_push(amb_knowni,-1);
-		amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
-		amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
-	      }
-	      
-	      nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
-	      prob = best_prob - Substring_siteD_prob(donor);
-	      *ambiguous = List_push(*ambiguous,
-				     (void *) Stage3end_new_splice(&(*found_score),
-								   /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
-								   donor,/*acceptor*/NULL,/*distance*/0U,
-								   /*shortdistancep*/false,/*penalty*/0,querylength,
-								   /*ambcoords_donor*/NULL,ambcoords,
-								   /*amb_knowni_donor*/NULL,amb_knowni,
-								   /*amb_nmismatches_donor*/NULL,amb_nmismatches,
-								   /*amb_probs_donor*/NULL,amb_probs,
-								   /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
-								   Stage3end_sensedir(hit),/*sarrayp*/true));
-	      twopartp = true;
-	      Doublelist_free(&amb_probs);
-	      Intlist_free(&amb_nmismatches);
-	      Intlist_free(&amb_knowni);
-	      Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
-	      for (k = i; k < j; k++) {
-		hit = hitarray[k];
-		Stage3end_free(&hit);
-	      }
-	    }
-
-	    i = j;
-	  }
-	  FREE(hitarray);
-	  List_free(&donor_hits);
-	}
-
-	if (acceptor_hits != NULL) {
-	  hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
-	  qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
-	  i = 0;
-	  while (i < n) {
-	    hit = hitarray[i];
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    acceptor_length = Substring_match_length_orig(acceptor);
-	    j = i + 1;
-	    while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
-	      j++;
-	    }
-	    if (j == i + 1) {
-	      *singlesplicing = List_push(*singlesplicing,(void *) hit);
-	    } else {
-	      ambcoords = (Uintlist_T) NULL;
-	      amb_knowni = (Intlist_T) NULL;
-	      amb_nmismatches = (Intlist_T) NULL;
-	      amb_probs = (Doublelist_T) NULL;
-
-	      for (k = i; k < j; k++) {
-		donor = Stage3end_substring_donor(hitarray[k]);
-#ifdef LARGE_GENOMES
-		ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
-#else
-		ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
-#endif
-		amb_knowni = Intlist_push(amb_knowni,-1);
-		amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
-		amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
-	      }
-	    
-	      nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
-	      prob = best_prob - Substring_siteA_prob(acceptor);
-	      *ambiguous = List_push(*ambiguous,
-				     (void *) Stage3end_new_splice(&(*found_score),
-								   nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
-								   /*donor*/NULL,acceptor,/*distance*/0U,
-								   /*shortdistancep*/false,/*penalty*/0,querylength,
-								   ambcoords,/*ambcoords_acceptor*/NULL,
-								   amb_knowni,/*amb_knowni_acceptor*/NULL,
-								   amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
-								   amb_probs,/*amb_probs_acceptor*/NULL,
-								   /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
-								   Stage3end_sensedir(hit),/*sarrayp*/true));
-	      twopartp = true;
-	      Doublelist_free(&amb_probs);
-	      Intlist_free(&amb_nmismatches);
-	      Intlist_free(&amb_knowni);
-	      Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
-	      for (k = i; k < j; k++) {
-		hit = hitarray[k];
-		Stage3end_free(&hit);
-	      }
-	    }
-
-	    i = j;
-	  }
-	  FREE(hitarray);
-	  List_free(&acceptor_hits);
-	}
-
-	List_free(&accepted_hits);
-      }
-    }
-
-    /* Don't use lowprob in suffix array stage */
-    debug7(printf("freeing lowprobs\n"));
-    for (p = lowprob; p != NULL; p = List_next(p)) {
-      hit = (Stage3end_T) List_head(p);
-      Stage3end_free(&hit);
-    }
-    List_free(&lowprob);
-
-#ifdef HAVE_ALLOCA
-    FREEA(array);
-#else
-    FREE(array);
-#endif
-
-  } else if (querystart_diff == 0 && queryend_same == querylength - 1) {
-    left2 = left;
-    indel_pos = querystart_same;
-    debug7(printf("same is at %u from %d to %d\n",left,querystart_same,queryend_same));
-    
-    n = Uintlist_length(difflist);
-#ifdef HAVE_ALLOCA
-#ifdef USE_QSORT
-    array = (UINT4 *) MALLOCA(n * sizeof(UINT4));
-#else
-    array = (UINT4 *) MALLOCA((n + 1) * sizeof(UINT4));
-#endif
-#else
-#ifdef USE_QSORT
-    array = (UINT4 *) MALLOC(n * sizeof(UINT4));
-#else
-    array = (UINT4 *) MALLOC((n + 1) * sizeof(UINT4));
-#endif
-#endif
-
-    Uintlist_fill_array_and_free(array,&difflist);
-#ifdef USE_QSORT
-    qsort(array,n,sizeof(Univcoord_T),Univcoord_compare);
-#else
-    Sedgesort_uint4(array,n);
-#endif
-    debug7(printf("Have %d matching diffs\n",n));
-
-    spliceends_sense = spliceends_antisense = (List_T) NULL;
-    lowprob = (List_T) NULL;
-    for (i = 0; i < n; i++) {
-      left1 = array[i];
-      debug7(printf("diff %d/%d is at %u, from %d to %d\n",i,n,left1,querystart_diff,queryend_diff));
-
-      if (i > 0 && left1 == array[i-1]) {
-	/* Already processed */
-
-      } else if (left2 + querylength >= chrhigh) {
-	/* Splice or deletion would extend to next chromosome */
-
-      } else if (left2 > left1 + max_deletionlen) {
-	debug7(printf("A splice..."));
-
-	segmenti_donor_nknown = segmenti_antiacceptor_nknown = 0;
-	if (nsplicesites > 0 &&
-	    Splicetrie_splicesite_p(left1,/*pos5*/1,/*pos3*/querylength) == true) {
-	  j = binary_search(0,nsplicesites,splicesites,left1);
-	  while (j < nsplicesites && splicesites[j] < left1 + querylength) {
-	    if (splicetypes[j] == DONOR) {
-	      debug4s(printf("Setting known donor %d for segmenti at %u\n",j,splicesites[j]));
-	      segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - left1;
-	      segmenti_donor_knowni[segmenti_donor_nknown++] = j;
-	    } else if (splicetypes[j] == ANTIACCEPTOR) {
-	      debug4s(printf("Setting known antiacceptor %d for segmenti at %u\n",j,splicesites[j]));
-	      segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - left1;
-	      segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
-	    }
-	    j++;
-	  }
-	}
-	segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
-	segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
-	  
-	segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
-	if (nsplicesites > 0 &&
-	    Splicetrie_splicesite_p(left2,/*pos5*/1,/*pos3*/querylength) == true) {
-	  j = binary_search(0,nsplicesites,splicesites,left2);
-	  while (j < nsplicesites && splicesites[j] < left2 + querylength) {
-	    if (splicetypes[j] == ACCEPTOR) {
-	      debug4s(printf("Setting known acceptor %d for segmentj at %u\n",j,splicesites[j]));
-	      segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - left2;
-	      segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
-	    } else if (splicetypes[j] == ANTIDONOR) {
-	      debug4s(printf("Setting known antidonor %d for segmentj at %u\n",j,splicesites[j]));
-	      segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - left2;
-	      segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
-	    }
-	    j++;
-	  }
-	}
-	segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
-	segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
-
-	/* nspliceends = 0; */
-	spliceends_sense =
-	  Splice_solve_single_sense(&(*found_score),&nspliceends_sense,spliceends_sense,&lowprob,
-				    &segmenti_usedp,&segmentj_usedp,
-				    /*segmenti_left*/left1,/*segmentj_left*/left2,
-				    chrnum,chroffset,chrhigh,chrlength,
-				    chrnum,chroffset,chrhigh,chrlength,
-				    querylength,query_compress,
-				    segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-				    segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-				    segmenti_donor_knowni,segmentj_acceptor_knowni,
-				    segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-				    segmenti_donor_nknown,segmentj_acceptor_nknown,
-				    segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-				    splicing_penalty,/*max_mismatches_allowed*/1000,
-				    plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
-				    /*sarrayp*/true);
-	spliceends_antisense =
-	  Splice_solve_single_antisense(&(*found_score),&nspliceends_antisense,spliceends_antisense,&lowprob,
-				    &segmenti_usedp,&segmentj_usedp,
-				    /*segmenti_left*/left1,/*segmentj_left*/left2,
-				    chrnum,chroffset,chrhigh,chrlength,
-				    chrnum,chroffset,chrhigh,chrlength,
-				    querylength,query_compress,
-				    segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-				    segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-				    segmenti_donor_knowni,segmentj_acceptor_knowni,
-				    segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-				    segmenti_donor_nknown,segmentj_acceptor_nknown,
-				    segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-				    splicing_penalty,/*max_mismatches_allowed*/1000,
-				    plusp,genestrand,first_read_p,/*subs_or_indels_p*/false,
-				    /*sarrayp*/true);
-
-      } else if (left2 > left1) {
-	nindels = left2 - left1;
-	debug7(printf("B deletion of %d bp relative to max_deletionlen %d (nmisses allowed %d)...",
-		      nindels,max_deletionlen,nmisses_allowed));
-	if ((indel_pos < 17 || querylength - indel_pos < 17) && nindels > max_end_deletions) {
-	  /* Allow regular GSNAP algorithm to find this */
-	  debug7(printf("too long for end deletion"));
-	} else {
-#if 0
-	  nmismatches1 = Genome_count_mismatches_substring(query_compress,left1,/*pos5*/0,/*pos3*/indel_pos,
-							   plusp,genestrand);
-	  nmismatches2 = Genome_count_mismatches_substring(query_compress,left2,/*pos5*/indel_pos,
-							   /*pos3*/querylength,plusp,genestrand);
-	  if (plusp == true) {
-	    query_indel_pos = indel_pos;
-	  } else {
-	    query_indel_pos = querylength - indel_pos;
-	  }
-	  /* genomiclength = querylength+nindels; */
-	  if ((hit = Stage3end_new_deletion(&(*found_score),nindels,query_indel_pos,
-					    nmismatches1,nmismatches2,left1,
-					    query_compress,querylength,plusp,genestrand,first_read_p,
-					    chrnum,chroffset,chrhigh,chrlength,
-					    /*indel_penalty*/2,/*sarrayp*/true)) != NULL) {
-	    debug7(printf("successful"));
-	    *indels = List_push(*indels,(void *) hit);
-	    twopartp = true;
-	  }
-#else
-	  *indels = Indel_solve_middle_deletion(&foundp,&(*found_score),&nhits,*indels,
-						/*left*/left1,chrnum,chroffset,chrhigh,chrlength,
-						/*indels*/-nindels,query_compress,querylength,
-						nmisses_allowed,plusp,genestrand,/*sarray*/true);
-	  debug7(
-		 if (foundp == true) {
-		   printf("successful");
-		 }
-		 );
-#endif
-	}
-	debug7(printf("\n"));
-      
-      } else if (left2 < left1) {
-	nindels = left1 - left2;
-	if (nindels >= indel_pos || indel_pos + nindels >= querylength) {
-	  debug7(printf("X insertion of %d bp too long\n",nindels));
-	} else {
-	  debug7(printf("C insertion of %d bp (nmisses allowed %d)...",nindels,nmisses_allowed));
-#if 0      
-	  nmismatches1 = Genome_count_mismatches_substring(query_compress,left1,/*pos5*/0,/*pos3*/indel_pos-nindels,
-							   plusp,genestrand);
-	  nmismatches2 = Genome_count_mismatches_substring(query_compress,left2,/*pos5*/indel_pos+nindels,
-							   /*pos3*/querylength,plusp,genestrand);
-	  if (plusp == true) {
-	    query_indel_pos = indel_pos;
-	  } else {
-	    query_indel_pos = querylength - indel_pos - nindels;
-	  }
-	  /*genomiclength = querylength-nindels; */
-	  if ((hit = Stage3end_new_insertion(&(*found_score),nindels,query_indel_pos,
-					     nmismatches1,nmismatches2,left1,
-					     query_compress,querylength,plusp,genestrand,first_read_p,
-					     chrnum,chroffset,chrhigh,chrlength,
-					     /*indel_penalty*/2,/*sarrayp*/true)) != NULL) {
-	    debug7(printf("successful"));
-	    *indels = List_push(*indels,(void *) hit);
-	    twopartp = true;
-	  }
-#else
-	  *indels = Indel_solve_middle_insertion(&foundp,&(*found_score),&nhits,*indels,
-						 /*left*/left1,chrnum,chroffset,chrhigh,chrlength,
-						 /*indels*/+nindels,query_compress,querylength,nmisses_allowed,
-						 plusp,genestrand,/*sarrayp*/true);
-	  debug7(
-		 if (foundp == true) {
-		   printf("successful");
-		 }
-		 );
-#endif
-	  debug7(printf("\n"));
-	}
-      }
-    }
-
-    if (spliceends_sense != NULL) {
-      /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
-      best_nmismatches = querylength;
-      best_prob = 0.0;
-      for (p = spliceends_sense; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
-		      Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
-		      Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
-		      Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
-		      Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
-		      Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
-		      Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
-	  best_nmismatches = nmismatches;
-	}
-	if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
-	  best_prob = prob;
-	}
-      }
-
-      n_good_spliceends = 0;
-      accepted_hits = rejected_hits = (List_T) NULL;
-      for (p = spliceends_sense; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
-	    Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
-	  debug7(printf("accepting distance %d, probabilities %f and %f\n",
-			Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
-			Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	  n_good_spliceends += 1;
-	  accepted_hits = List_push(accepted_hits,(void *) hit);
-	} else {
-	  rejected_hits = List_push(rejected_hits,(void *) hit);
-	}
-      }
-      
-      if (n_good_spliceends == 0) {
-	/* Conjunction is too strict.  Allow for disjunction instead. */
-	List_free(&rejected_hits);
-	for (p = spliceends_sense; p != NULL; p = List_next(p)) {
-	  hit = (Stage3end_T) List_head(p);
-	  if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
-	      Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
-	    debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
-			  Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
-			  Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
-			  Substring_siteD_prob(Stage3end_substring_donor(hit)),
-			  Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	    n_good_spliceends += 1;
-	    accepted_hits = List_push(accepted_hits,(void *) hit);
-	  } else {
-	    rejected_hits = List_push(rejected_hits,(void *) hit);
-	  }
-	}
-      }
-
-      for (p = rejected_hits; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	Stage3end_free(&hit);
-      }
-      List_free(&rejected_hits);
-      List_free(&spliceends_sense);
-
-      if (n_good_spliceends == 1) {
-	*singlesplicing = List_push(*singlesplicing,List_head(accepted_hits));
-	nhits += 1;
-	List_free(&accepted_hits);
-
-      } else {
-	/* 3.  Multiple hits, sense, left2 */
-	debug7(printf("multiple hits with best prob, sense\n"));
-	donor_hits = acceptor_hits = (List_T) NULL;
-	if (plusp == true) {
-	  for (p = accepted_hits; p != NULL; p = List_next(p)) {
-	    hit = (Stage3end_T) List_head(p);
-	    donor = Stage3end_substring_donor(hit);
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    if (Substring_genomicstart(donor) == left2) {
-	      donor_hits = List_push(donor_hits,(void *) hit);
-	    } else if (Substring_genomicstart(acceptor) == left1) {
-	      acceptor_hits = List_push(acceptor_hits,(void *) hit);
-	    } else {
-	      Stage3end_free(&hit);
-	    }
-	  }
-	} else {
-	  for (p = accepted_hits; p != NULL; p = List_next(p)) {
-	    hit = (Stage3end_T) List_head(p);
-	    donor = Stage3end_substring_donor(hit);
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    if (Substring_genomicend(donor) == left2) {
-	      donor_hits = List_push(donor_hits,(void *) hit);
-	    } else if (Substring_genomicend(acceptor) == left1) {
-	      acceptor_hits = List_push(acceptor_hits,(void *) hit);
-	    } else {
-	      Stage3end_free(&hit);
-	    }
-	  }
-	}
-	  
-	if (donor_hits != NULL) {
-	  hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
-	  qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
-	  i = 0;
-	  while (i < n) {
-	    hit = hitarray[i];
-	    donor = Stage3end_substring_donor(hit);
-	    donor_length = Substring_match_length_orig(donor);
-	    j = i + 1;
-	    while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
-	      j++;
-	    }
-	    if (j == i + 1) {
-	      *singlesplicing = List_push(*singlesplicing,(void *) hit);
-	    } else {
-	      ambcoords = (Uintlist_T) NULL;
-	      amb_knowni = (Intlist_T) NULL;
-	      amb_nmismatches = (Intlist_T) NULL;
-	      amb_probs = (Doublelist_T) NULL;
-
-	      for (k = i; k < j; k++) {
-		acceptor = Stage3end_substring_acceptor(hitarray[k]);
-#ifdef LARGE_GENOMES
-		ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
-#else
-		ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
-#endif
-		amb_knowni = Intlist_push(amb_knowni,-1);
-		amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
-		amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
-	      }
-
-	      nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
-	      prob = best_prob - Substring_siteD_prob(donor);
-	      *ambiguous = List_push(*ambiguous,
-				     (void *) Stage3end_new_splice(&(*found_score),
-								   /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
-								   donor,/*acceptor*/NULL,/*distance*/0U,
-								   /*shortdistancep*/false,/*penalty*/0,querylength,
-								   /*ambcoords_donor*/NULL,ambcoords,
-								   /*amb_knowni_donor*/NULL,amb_knowni,
-								   /*amb_nmismatches_donor*/NULL,amb_nmismatches,
-								   /*amb_probs_donor*/NULL,amb_probs,
-								   /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
-								   Stage3end_sensedir(hit),/*sarrayp*/true));
-	      twopartp = true;
-	      Doublelist_free(&amb_probs);
-	      Intlist_free(&amb_nmismatches);
-	      Intlist_free(&amb_knowni);
-	      Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
-
-	      for (k = i; k < j; k++) {
-		hit = hitarray[k];
-		Stage3end_free(&hit);
-	      }
-	    }
-
-	    i = j;
-	  }
-	  FREE(hitarray);
-	  List_free(&donor_hits);
-	}
-
-	if (acceptor_hits != NULL) {
-	  hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
-	  qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
-	  i = 0;
-	  while (i < n) {
-	    hit = hitarray[i];
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    acceptor_length = Substring_match_length_orig(acceptor);
-	    j = i + 1;
-	    while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
-	      j++;
-	    }
-	    if (j == i + 1) {
-	      *singlesplicing = List_push(*singlesplicing,(void *) hit);
-	    } else {
-	      ambcoords = (Uintlist_T) NULL;
-	      amb_knowni = (Intlist_T) NULL;
-	      amb_nmismatches = (Intlist_T) NULL;
-	      amb_probs = (Doublelist_T) NULL;
-
-	      for (k = i; k < j; k++) {
-		donor = Stage3end_substring_donor(hitarray[k]);
-#ifdef LARGE_GENOMES
-		ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
-#else
-		ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
-#endif
-		amb_knowni = Intlist_push(amb_knowni,-1);
-		amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
-		amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
-	      }
-
-	      nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
-	      prob = best_prob - Substring_siteA_prob(acceptor);
-	      *ambiguous = List_push(*ambiguous,
-				     (void *) Stage3end_new_splice(&(*found_score),
-								   nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
-								   /*donor*/NULL,acceptor,/*distance*/0U,
-								   /*shortdistancep*/false,/*penalty*/0,querylength,
-								   ambcoords,/*ambcoords_acceptor*/NULL,
-								   amb_knowni,/*amb_knowni_acceptor*/NULL,
-								   amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
-								   amb_probs,/*amb_probs_acceptor*/NULL,
-								   /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
-								   Stage3end_sensedir(hit),/*sarrayp*/true));
-	      twopartp = true;
-	      Doublelist_free(&amb_probs);
-	      Intlist_free(&amb_nmismatches);
-	      Intlist_free(&amb_knowni);
-	      Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
-	      for (k = i; k < j; k++) {
-		hit = hitarray[k];
-		Stage3end_free(&hit);
-	      }
-	    }
-
-	    i = j;
-	  }
-	  FREE(hitarray);
-	  List_free(&acceptor_hits);
-	}
-
-	List_free(&accepted_hits);
-      }
-    }
-
-    if (spliceends_antisense != NULL) {
-      /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
-      best_nmismatches = querylength;
-      best_prob = 0.0;
-      for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	debug7(printf("analyzing distance %d, donor length %d (%llu..%llu) and acceptor length %d (%llu..%llu), nmismatches %d, probabilities %f and %f\n",
-		      Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
-		      Substring_genomicstart(Stage3end_substring_donor(hit)),Substring_genomicend(Stage3end_substring_donor(hit)),
-		      Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
-		      Substring_genomicstart(Stage3end_substring_acceptor(hit)),Substring_genomicend(Stage3end_substring_acceptor(hit)),
-		      Stage3end_nmismatches_whole(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
-		      Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	if ((nmismatches = Stage3end_nmismatches_whole(hit)) < best_nmismatches) {
-	  best_nmismatches = nmismatches;
-	}
-	if ((prob = Stage3end_chimera_prob(hit)) > best_prob) {
-	  best_prob = prob;
-	}
-      }
-
-      n_good_spliceends = 0;
-      accepted_hits = rejected_hits = (List_T) NULL;
-      for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP &&
-	    Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
-	  debug7(printf("accepting distance %d, probabilities %f and %f\n",
-			Stage3end_distance(hit),Substring_siteD_prob(Stage3end_substring_donor(hit)),
-			Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	  n_good_spliceends += 1;
-	  accepted_hits = List_push(accepted_hits,(void *) hit);
-	} else {
-	  rejected_hits = List_push(rejected_hits,(void *) hit);
-	}
-      }
-      
-      if (n_good_spliceends == 0) {
-	/* Conjunction is too strict.  Allow for disjunction instead. */
-	List_free(&rejected_hits);
-	for (p = spliceends_antisense; p != NULL; p = List_next(p)) {
-	  hit = (Stage3end_T) List_head(p);
-	  if (Stage3end_nmismatches_whole(hit) <= best_nmismatches + LOCALSPLICING_NMATCHES_SLOP ||
-	      Stage3end_chimera_prob(hit) >= best_prob - LOCALSPLICING_PROB_SLOP) {
-	    debug7(printf("accepting distance %d, donor length %d and acceptor length %d, probabilities %f and %f\n",
-			  Stage3end_distance(hit),Substring_match_length_orig(Stage3end_substring_donor(hit)),
-			  Substring_match_length_orig(Stage3end_substring_acceptor(hit)),
-			  Substring_siteD_prob(Stage3end_substring_donor(hit)),
-			  Substring_siteA_prob(Stage3end_substring_acceptor(hit))));
-	    n_good_spliceends += 1;
-	    accepted_hits = List_push(accepted_hits,(void *) hit);
-	  } else {
-	    rejected_hits = List_push(rejected_hits,(void *) hit);
-	  }
-	}
-      }
-
-      for (p = rejected_hits; p != NULL; p = List_next(p)) {
-	hit = (Stage3end_T) List_head(p);
-	Stage3end_free(&hit);
-      }
-      List_free(&rejected_hits);
-      List_free(&spliceends_antisense);
-
-      if (n_good_spliceends == 1) {
-	*singlesplicing = List_push(*singlesplicing,List_head(accepted_hits));
-	nhits += 1;
-	List_free(&accepted_hits);
-
-      } else {
-	/* 4.  Multiple hits, antisense, left2 */
-	debug7(printf("multiple hits with best prob, antisense\n"));
-	donor_hits = acceptor_hits = (List_T) NULL;
-	if (plusp == true) {
-	  for (p = accepted_hits; p != NULL; p = List_next(p)) {
-	    hit = (Stage3end_T) List_head(p);
-	    donor = Stage3end_substring_donor(hit);
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    if (Substring_genomicstart(donor) == left2) {
-	      donor_hits = List_push(donor_hits,(void *) hit);
-	    } else if (Substring_genomicstart(acceptor) == left2) {
-	      acceptor_hits = List_push(acceptor_hits,(void *) hit);
-	    } else {
-	      Stage3end_free(&hit);
-	    }
-	  }
-	} else {
-	  for (p = accepted_hits; p != NULL; p = List_next(p)) {
-	    hit = (Stage3end_T) List_head(p);
-	    donor = Stage3end_substring_donor(hit);
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    if (Substring_genomicend(donor) == left2) {
-	      donor_hits = List_push(donor_hits,(void *) hit);
-	    } else if (Substring_genomicend(acceptor) == left2) {
-	      acceptor_hits = List_push(acceptor_hits,(void *) hit);
-	    } else {
-	      Stage3end_free(&hit);
-	    }
-	  }
-	}
-
-	if (donor_hits != NULL) {
-	  hitarray = (Stage3end_T *) List_to_array_n(&n,donor_hits);
-	  qsort(hitarray,n,sizeof(Stage3end_T),donor_match_length_cmp);
-	  i = 0;
-	  while (i < n) {
-	    hit = hitarray[i];
-	    donor = Stage3end_substring_donor(hit);
-	    donor_length = Substring_match_length_orig(donor);
-	    j = i + 1;
-	    while (j < n && Substring_match_length_orig(Stage3end_substring_donor(hitarray[j])) == donor_length) {
-	      j++;
-	    }
-	    if (j == i + 1) {
-	      *singlesplicing = List_push(*singlesplicing,(void *) hit);
-	    } else {
-	      ambcoords = (Uintlist_T) NULL;
-	      amb_knowni = (Intlist_T) NULL;
-	      amb_nmismatches = (Intlist_T) NULL;
-	      amb_probs = (Doublelist_T) NULL;
-
-	      for (k = i; k < j; k++) {
-		acceptor = Stage3end_substring_acceptor(hitarray[k]);
-#ifdef LARGE_GENOMES
-		ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_A(acceptor));
-#else
-		ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_A(acceptor));
-#endif
-		amb_knowni = Intlist_push(amb_knowni,-1);
-		amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(acceptor));
-		amb_probs = Doublelist_push(amb_probs,Substring_siteA_prob(acceptor));
-	      }
-
-	      nmismatches_acceptor = best_nmismatches - Substring_nmismatches_whole(donor);
-	      prob = best_prob - Substring_siteD_prob(donor);
-	      *ambiguous = List_push(*ambiguous,
-				     (void *) Stage3end_new_splice(&(*found_score),
-								   /*nmismatches_donor*/Substring_nmismatches_whole(donor),nmismatches_acceptor,
-								   donor,/*acceptor*/NULL,/*distance*/0U,
-								   /*shortdistancep*/false,/*penalty*/0,querylength,
-								   /*ambcoords_donor*/NULL,ambcoords,
-								   /*amb_knowni_donor*/NULL,amb_knowni,
-								   /*amb_nmismatches_donor*/NULL,amb_nmismatches,
-								   /*amb_probs_donor*/NULL,amb_probs,
-								   /*copy_donor_p*/true,/*copy_acceptor_p*/false,first_read_p,
-								   Stage3end_sensedir(hit),/*sarrayp*/true));
-	      twopartp = true;
-	      Doublelist_free(&amb_probs);
-	      Intlist_free(&amb_nmismatches);
-	      Intlist_free(&amb_knowni);
-	      Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
-	      for (k = i; k < j; k++) {
-		hit = hitarray[k];
-		Stage3end_free(&hit);
-	      }
-	    }
-
-	    i = j;
-	  }
-	  FREE(hitarray);
-	  List_free(&donor_hits);
-	}
-
-	if (acceptor_hits != NULL) {
-	  hitarray = (Stage3end_T *) List_to_array_n(&n,acceptor_hits);
-	  qsort(hitarray,n,sizeof(Stage3end_T),acceptor_match_length_cmp);
-	  i = 0;
-	  while (i < n) {
-	    hit = hitarray[i];
-	    acceptor = Stage3end_substring_acceptor(hit);
-	    acceptor_length = Substring_match_length_orig(acceptor);
-	    j = i + 1;
-	    while (j < n && Substring_match_length_orig(Stage3end_substring_acceptor(hitarray[j])) == acceptor_length) {
-	      j++;
-	    }
-	    if (j == i + 1) {
-	      *singlesplicing = List_push(*singlesplicing,(void *) hit);
-	    } else {
-	      ambcoords = (Uintlist_T) NULL;
-	      amb_knowni = (Intlist_T) NULL;
-	      amb_nmismatches = (Intlist_T) NULL;
-	      amb_probs = (Doublelist_T) NULL;
-
-	      for (k = i; k < j; k++) {
-		donor = Stage3end_substring_donor(hitarray[k]);
-#ifdef LARGE_GENOMES
-		ambcoords = Uint8list_push(ambcoords,Substring_splicecoord_D(donor));
-#else
-		ambcoords = Uintlist_push(ambcoords,Substring_splicecoord_D(donor));
-#endif
-		amb_knowni = Intlist_push(amb_knowni,-1);
-		amb_nmismatches = Intlist_push(amb_nmismatches,Substring_nmismatches_whole(donor));
-		amb_probs = Doublelist_push(amb_probs,Substring_siteD_prob(donor));
-	      }
-
-	      nmismatches_donor = best_nmismatches - Substring_nmismatches_whole(acceptor);
-	      prob = best_prob - Substring_siteA_prob(acceptor);
-	      *ambiguous = List_push(*ambiguous,
-				     (void *) Stage3end_new_splice(&(*found_score),
-								   nmismatches_donor,/*nmismatches_acceptor*/Substring_nmismatches_whole(acceptor),
-								   /*donor*/NULL,acceptor,/*distance*/0U,
-								   /*shortdistancep*/false,/*penalty*/0,querylength,
-								   ambcoords,/*ambcoords_acceptor*/NULL,
-								   amb_knowni,/*amb_knowni_acceptor*/NULL,
-								   amb_nmismatches,/*amb_nmismatches_acceptor*/NULL,
-								   amb_probs,/*amb_probs_acceptor*/NULL,
-								   /*copy_donor_p*/false,/*copy_acceptor_p*/true,first_read_p,
-								   Stage3end_sensedir(hit),/*sarrayp*/true));
-	      twopartp = true;
-	      Doublelist_free(&amb_probs);
-	      Intlist_free(&amb_nmismatches);
-	      Intlist_free(&amb_knowni);
-	      Uintlist_free(&ambcoords); /* LARGE_GENOMES not possible with suffix array */
-
-	      for (k = i; k < j; k++) {
-		hit = hitarray[k];
-		Stage3end_free(&hit);
-	      }
-	    }
-
-	    i = j;
-	  }
-	  FREE(hitarray);
-	  List_free(&acceptor_hits);
-	}
-
-	List_free(&accepted_hits);
-      }
-    }
-
-
-    /* Don't use lowprob in suffix array stage */
-    debug7(printf("freeing lowprobs\n"));
-    for (p = lowprob; p != NULL; p = List_next(p)) {
-      hit = (Stage3end_T) List_head(p);
-      Stage3end_free(&hit);
-    }
-    List_free(&lowprob);
-
-#ifdef HAVE_ALLOCA
-    FREEA(array);
-#else
-    FREE(array);
-#endif
-
-  } else {
-    Uintlist_free(&difflist);
-  }
-
-
-#ifdef HAVE_ALLOCA
-  if (querylength <= MAX_STACK_READLENGTH) {
-    FREEA(segmenti_donor_knownpos);
-    FREEA(segmentj_acceptor_knownpos);
-    FREEA(segmentj_antidonor_knownpos);
-    FREEA(segmenti_antiacceptor_knownpos);
-    FREEA(segmenti_donor_knowni);
-    FREEA(segmentj_acceptor_knowni);
-    FREEA(segmentj_antidonor_knowni);
-    FREEA(segmenti_antiacceptor_knowni);
-  } else {
-    FREE(segmenti_donor_knownpos);
-    FREE(segmentj_acceptor_knownpos);
-    FREE(segmentj_antidonor_knownpos);
-    FREE(segmenti_antiacceptor_knownpos);
-    FREE(segmenti_donor_knowni);
-    FREE(segmentj_acceptor_knowni);
-    FREE(segmentj_antidonor_knowni);
-    FREE(segmenti_antiacceptor_knowni);
-  }
-#else
-  FREE(segmenti_donor_knownpos);
-  FREE(segmentj_acceptor_knownpos);
-  FREE(segmentj_antidonor_knownpos);
-  FREE(segmenti_antiacceptor_knownpos);
-  FREE(segmenti_donor_knowni);
-  FREE(segmentj_acceptor_knowni);
-  FREE(segmentj_antidonor_knowni);
-  FREE(segmenti_antiacceptor_knowni);
-#endif
-
-  return twopartp;
-}
-#endif
-
-
-static int
-get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T *best_left_diagonals, 
-	       List_T *all_right_diagonals, List_T *all_left_diagonals,
-	       T sarray, char *queryptr, int querylength, Compress_T query_compress,
-	       Univcoord_T chroffset, Univcoord_T chrhigh,
-	       Univcoord_T goal, Elt_T *original_elt_array, int best_i, int nelts,
-	       bool plusp, int genestrand, char conversion[]) {
-  int best_score_right, best_score_left, best_score, score;
-  Elt_T elt, right_elt;
-  List_T *elt_tree;
-  Univcoord_T low, high;
-  int max_leftward, min_leftward, skip_left;
-  int querystart, queryend;
-
-  Sarrayptr_T initptr, finalptr;
-  bool successp;
-  UINT4 nmatches;
-
-  int i, j, k;
-  List_T p;
-
-  Univdiag_T *diagonal_array, diagonal, prev_diagonal;
-  int querypos;
-  int ndiagonals;
-  List_T left_diagonals, right_diagonals;
-
-#ifdef SUBDIVIDE_NOMATCHES
-  Chrpos_T low_chrpos, high_chrpos;
-  List_T sub_diagonals;
-  Diag_T sub_diagonal;
-  int nfound;
-  Univcoord_T mappingstart, mappingend;
-  int maxnconsecutive = 0;
-
-  Oligoindex_T oligoindex;
-  Chrpos_T **mappings, chrstart, chrend;
-  bool *coveredp;
-  int *npositions, totalpositions = 0;
-#endif
-
-  int max_insertionlen;
-
-  if (max_middle_insertions_default >= 0) {
-    max_insertionlen = max_insertionlen_default;
-  } else {
-    max_insertionlen = querylength;
-  }
-
-  debug13(printf("\n***Entered get_diagonals, plusp %d, with goal %u\n",plusp,goal));
-
-  /* Make elt tree, which allows for subdivisions of an elt */
-  elt_tree = (List_T *) MALLOC(nelts*sizeof(List_T));
-  for (i = 0; i < nelts; i++) {
-    elt_tree[i] = List_push(NULL,(void *) original_elt_array[i]);
-  }
-
-
-  /* Compute leftward extensions for right side */
-  debug13(printf("Performing leftward extensions for right side.  Subtracting %d and adding %d\n",
-		 max_insertionlen,overall_max_distance));
-  low = subtract_bounded(goal,/*minusterm*/max_insertionlen,chroffset);
-  high = add_bounded(goal,/*plusterm*/overall_max_distance,chrhigh);
-  for (i = best_i + 1; i < nelts; i++) {
-    elt = (Elt_T) elt_tree[i]->first;
-    Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-    if (elt->npositions > 0) {
-      /* Success: Update low and high for next search */
-      low = subtract_bounded(elt->positions[0],/*minusterm*/max_insertionlen,chroffset);
-      high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/overall_max_distance,chrhigh);
-    } else {
-      debug13(printf("Elt %d..%d (leftward %d..%d) has no positions, so trying to reduce elt->queryend\n",
-		     elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend));
-      if (i + 1 < nelts) {
-	/* A.  Try moving boundary to the left */
-	right_elt = (Elt_T) elt_tree[i+1]->first;
-	Elt_fill_positions_filtered(right_elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	if ((max_leftward = Elt_extend_leftward(&min_leftward,right_elt,query_compress,
-						plusp,genestrand,/*skip_left*/0)) > 0) {
-	  debug13(printf("Can extend %d..%d leftward by max %d, min %d\n",
-			 right_elt->querystart,right_elt->queryend,max_leftward,min_leftward));
-	  right_elt->querystart_leftward -= min_leftward; /* Using min_leftward is conservative */
-	  queryend = right_elt->querystart_leftward - 2;
-
-	  j = i;
-	  while (j >= best_i && ((Elt_T) elt_tree[j]->first)->querystart_leftward >= queryend) {
-	    debug13(printf("Left-extension of elt %d..%d => %d..%d obliterates elt %d..%d => %d..%d\n",
-			   right_elt->querystart,right_elt->queryend,right_elt->querystart_leftward,right_elt->queryend_leftward,
-			   ((Elt_T) elt_tree[j]->first)->querystart,((Elt_T) elt_tree[j]->first)->queryend,((Elt_T) elt_tree[j]->first)->querystart_leftward,queryend));
-	    --j;
-	  }
-
-	  if (j >= best_i) {
-	    /* Create a new elt with new positions */
-	    querystart = ((Elt_T) elt_tree[j]->first)->querystart_leftward;
-	    /* queryend was computed above */
-	    sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
-			  /*querylength*/(queryend + 1) - querystart,/*queryoffset*/querystart,
-			  query_compress,sarray,plusp,genestrand,conversion);
-	    elt_tree[j] = List_pop(elt_tree[j],(void **) &elt);
-	    if (elt->temporaryp == true) {
-	      Elt_free(&elt);
-	    }
-	    elt = Elt_new(querystart,nmatches,initptr,finalptr,/*temporaryp*/true);
-	    elt_tree[j] = List_push(NULL,(void *) elt);
-	    Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	  }
-	}
-      }
-
-      if (elt->npositions > 0) {
-	/* Success: Update low and high for next search */
-	low = subtract_bounded(elt->positions[0],/*minusterm*/max_insertionlen,chroffset);
-	high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/overall_max_distance,chrhigh);
-      }
-    }
-  }
-
-
-  /* Compute leftward extensions for left side */
-  debug13(printf("Performing leftward extensions for left side.  Subtracting %d and adding %d\n",
-		 overall_max_distance,max_insertionlen));
-  low = subtract_bounded(goal,/*minusterm*/overall_max_distance,chroffset);
-  high = add_bounded(goal,/*plusterm*/max_insertionlen,chrhigh);
-  for (i = best_i - 1; i >= 0; --i) {
-    elt = (Elt_T) elt_tree[i]->first;
-    Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-    if (elt->npositions > 0) {
-      /* Success: Update low and high for next search */
-      low = subtract_bounded(elt->positions[0],/*minusterm*/overall_max_distance,chroffset);
-      high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/max_insertionlen,chrhigh);
-    } else {
-      /* A.  Try moving boundary to the left */
-      debug13(printf("Elt %d..%d has no positions, so trying to reduce elt->queryend\n",
-		     elt->querystart,elt->queryend));
-      if (i + 1 < nelts) {
-	right_elt = (Elt_T) elt_tree[i+1]->first;
-	skip_left = 0;
-	if ((max_leftward = Elt_extend_leftward(&min_leftward,right_elt,query_compress,
-						plusp,genestrand,/*skip_left*/0)) == 0) {
-	  skip_left = 1;
-	  max_leftward = Elt_extend_leftward(&min_leftward,right_elt,query_compress,
-					     plusp,genestrand,skip_left);
-	  debug13(printf("On second try, min_leftward is %d, max_leftward is %d\n",min_leftward,max_leftward));
-	}
-
-	if (max_leftward > 0) {
-	  debug13(printf("Can extend %d..%d leftward by max %d, min %d\n",
-			 right_elt->querystart,right_elt->queryend,max_leftward,min_leftward));
-	  right_elt->querystart_leftward -= min_leftward + skip_left; /* Using min_leftward is conservative */
-	  queryend = right_elt->querystart_leftward - 2;
-	  
-	  j = i;
-	  while (j >= best_i && ((Elt_T) elt_tree[j]->first)->querystart_leftward >= queryend) {
-	    debug13(printf("Left-extension of elt %d..%d => %d..%d obliterates elt %d..%d => %d..%d\n",
-			   right_elt->querystart,right_elt->queryend,right_elt->querystart_leftward,right_elt->querystart_leftward,
-			   ((Elt_T) elt_tree[j]->first)->querystart,((Elt_T) elt_tree[j]->first)->queryend,((Elt_T) elt_tree[j]->first)->querystart_leftward,queryend));
-	    --j;
-	  }
-	  
-	  if (j >= 0) {
-	    /* Create a new elt with new positions */
-	    querystart = ((Elt_T) elt_tree[j]->first)->querystart_leftward;
-	    /* queryend was computed above */
-	    sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
-			  /*querylength*/(queryend + 1) - querystart,/*queryoffset*/querystart,
-			  query_compress,sarray,plusp,genestrand,conversion);
-	    elt_tree[j] = List_pop(elt_tree[j],(void **) &elt);
-	    if (elt->temporaryp == true) {
-	      Elt_free(&elt);
-	    }
-	    elt = Elt_new(querystart,nmatches,initptr,finalptr,/*temporaryp*/true);
-	    elt_tree[j] = List_push(NULL,(void *) elt);
-	    Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	  }
-	}
-      }
-
-      if (elt->npositions > 0) {
-	/* Success: Update low and high for next search */
-	low = subtract_bounded(elt->positions[0],/*minusterm*/overall_max_distance,chroffset);
-	high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/max_insertionlen,chrhigh);
-      }
-    }
-  }
-    
-#ifdef SUBDIVIDE_NOMATCHES
-  /* Try to subdivide elts that have no matches */
-#ifdef HAVE_ALLOCA
-  coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
-  mappings = (Chrpos_T **) ALLOCA(querylength * sizeof(Chrpos_T *));
-  npositions = (int *) CALLOCA(querylength,sizeof(int));
-#else
-  coveredp = (bool *) CALLOC(querylength,sizeof(bool));
-  mappings = (Chrpos_T **) MALLOC(querylength * sizeof(Chrpos_T *));
-  npositions = (int *) CALLOC(querylength,sizeof(int));
-#endif
-  oligoindex = Oligoindex_array_elt(oligoindices_minor,/*source*/0);
-  indexsize = Oligoindex_indexsize(oligoindex);
-
-
-  debug13(printf("Starting subdivisions on right side\n"));
-  low = subtract_bounded(goal,/*minusterm*/max_insertionlen,chroffset);
-  high = add_bounded(goal,/*plusterm*/overall_max_distance,chrhigh);
-  i = best_i + 1;
-  while (i < nelts) {
-    elt = (Elt_T) elt_tree[i]->first;
-    debug13(printf("Elt #%d at %d..%d has %d matching positions\n",i,elt->querystart,elt->queryend,elt->npositions));
-
-    if (elt->npositions > 0) {
-      low = subtract_bounded(elt->positions[0],/*minusterm*/max_insertionlen,chroffset);
-      high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/overall_max_distance,chrhigh);
-      i++;
-    } else {
-      j = i;
-      querystart = elt->querystart_leftward;
-      while (j + 1 < nelts && ((Elt_T) elt_tree[j+1]->first)->npositions <= 0) {
-	j = j + 1;
-      }
-      elt = (Elt_T) elt_tree[j]->first;
-      queryend = elt->queryend_leftward;
-      debug13(printf("Elts from %d through %d have no matching positions\n",i,j));
-
-#if 0
-      nfound = 0;
-      /* B.  Try subdividing elt using 16-mers every 8 */
-      debug13(printf("B.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
-      for (querypos = queryend - 16; querypos >= querystart; querypos -= 8) {
-	sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-		      /*querylength*/16,/*queryoffset*/querypos,
-		      query_compress,sarray,plusp,genestrand,conversion);
-	elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
-	elt_tree[i] = List_push(elt_tree[i],(void *) elt);
-	Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	nfound += elt->npositions;
-	debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
-      }
-
-      if (nfound == 0) {
-	/* C.  Try subdividing elt using 16-mers every 1 */
-	debug13(printf("C.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
-	for (querypos = queryend - 16; querypos >= querystart; querypos -= 1) {
-	  sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-			/*querylength*/16,/*queryoffset*/querypos,
-			query_compress,sarray,plusp,genestrand,conversion);
-	  elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
-	  elt_tree[i] = List_push(elt_tree[i],(void *) elt);
-	  Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	  nfound += elt->npositions;
-	  debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
-	}
-      }
-
-      if (nfound == 0) {
-	/* D.  Try subdividing elt using 8-mers every 1 */
-	debug13(printf("D.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
-	for (querypos = queryend - 8; querypos >= querystart; querypos -= 1) {
-	  sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-			/*querylength*/8,/*queryoffset*/querypos,
-			query_compress,sarray,plusp,genestrand,conversion);
-	  elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
-	  elt_tree[i] = List_push(elt_tree[i],(void *) elt);
-	  Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	  nfound += elt->npositions;
-	  debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
-	}
-      }
-
-#else
-
-      mappingstart = low + querystart;
-      mappingend = high + queryend;
-      chrstart = mappingstart - chroffset;
-      chrend = mappingend - chroffset;
-
-      Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,/*plusp:true*/true,
-			  queryptr,querystart,queryend,/*chrpos*/chrstart,genestrand);
-      sub_diagonals = Oligoindex_get_mappings(NULL,coveredp,mappings,npositions,&totalpositions,
-					      &oned_matrix_p,&maxnconsecutive,oligoindices_minor,oligoindex,
-					      queryptr,querystart,queryend,querylength,
-					      chrstart,chrend,chroffset,chrhigh,/*plusp:true*/true,diagpool);
-      Oligoindex_untally(oligoindex,queryptr,querylength);
-
-      debug14(printf("Got %d sub diagonals\n",List_length(sub_diagonals)));
-      for (p = sub_diagonals; p != NULL; p = List_next(p)) {
-	sub_diagonal = (Diag_T) List_head(p);
-	debug14(printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend + indexsize - 1,chrstart + sub_diagonal->diagonal));
-	elt = Elt_new_fillin(sub_diagonal->querystart,sub_diagonal->queryend,indexsize,chroffset + chrstart + sub_diagonal->diagonal);
-	elt_tree[i] = List_push(elt_tree[i],(void *) elt);
-      }
-
-#endif
-
-      i = j + 1;
-    }
-  }
-
-
-  debug13(printf("Starting subdivisions on left side\n"));
-  low = subtract_bounded(goal,/*minusterm*/overall_max_distance,chroffset);
-  high = add_bounded(goal,/*plusterm*/max_insertionlen,chrhigh);
-  i = best_i - 1;
-  while (i >= 0) {
-    elt = (Elt_T) elt_tree[i]->first;
-    debug13(printf("Elt #%d at %d..%d has %d matching positions\n",i,elt->querystart,elt->queryend,elt->npositions));
-
-    if (elt->npositions > 0) {
-      low = subtract_bounded(elt->positions[0],/*minusterm*/overall_max_distance,chroffset);
-      high = add_bounded(elt->positions[elt->npositions-1],/*plusterm*/max_insertionlen,chrhigh);
-      --i;
-
-    } else {
-      j = i;
-      queryend = elt->queryend_leftward;
-      while (j - 1 >= 0 && ((Elt_T) elt_tree[j-1]->first)->npositions <= 0) {
-	j = j - 1;
-      }
-      elt = (Elt_T) elt_tree[j]->first;
-      querystart = elt->querystart_leftward;
-      debug13(printf("Elts from %d through %d have no matching positions\n",i,j));
-
-#if 0
-      nfound = 0;
-      /* B.  Try subdividing elt using 16-mers every 8 */
-      debug13(printf("B.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
-      for (querypos = queryend - 16; querypos >= querystart; querypos -= 8) {
-	sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
-		      /*querylength*/16,/*queryoffset*/querystart,
-		      query_compress,sarray,plusp,genestrand,conversion);
-	elt = Elt_new(querystart,nmatches,initptr,finalptr,/*temporaryp*/true);
-	elt_tree[i] = List_push(elt_tree[i],(void *) elt);
-	Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	nfound += elt->npositions;
-	debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
-      }
-
-      if (nfound == 0) {
-	/* C.  Try subdividing elt using 16-mers every 1 */
-	debug13(printf("C.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
-	for (querypos = queryend - 16; querypos >= querystart; querypos -= 1) {
-	  sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-			/*querylength*/16,/*queryoffset*/querypos,
-			query_compress,sarray,plusp,genestrand,conversion);
-	  elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
-	  elt_tree[i] = List_push(elt_tree[i],(void *) elt);
-	  Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	  nfound += elt->npositions;
-	  debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
-	}
-      }
-
-      if (nfound == 0) {
-	/* D.  Try subdividing elt using 8-mers every 1 */
-	debug13(printf("D.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
-	for (querypos = queryend - 8; querypos >= querystart; querypos -= 1) {
-	  sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-			/*querylength*/8,/*queryoffset*/querypos,
-			query_compress,sarray,plusp,genestrand,conversion);
-	  elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
-	  elt_tree[i] = List_push(elt_tree[i],(void *) elt);
-	  Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
-	  nfound += elt->npositions;
-	  debug13(printf("Subelt at %d..%d has %d matching positions\n",elt->querystart,elt->queryend,elt->npositions));
-	}
-      }
-
-#else
-
-      mappingstart = low + querystart;
-      mappingend = high + queryend;
-      chrstart = mappingstart - chroffset;
-      chrend = mappingend - chroffset;
-
-      Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,/*plusp:true*/true,
-			  queryptr,querystart,queryend,/*chrpos*/chrstart,genestrand);
-      sub_diagonals = Oligoindex_get_mappings(NULL,coveredp,mappings,npositions,&totalpositions,
-					      &oned_matrix_p,&maxnconsecutive,oligoindices_minor,oligoindex,
-					      queryptr,querystart,queryend,querylength,
-					      chrstart,chrend,chroffset,chrhigh,/*plusp:true*/true,diagpool);
-      Oligoindex_untally(oligoindex,queryptr,querylength);
-
-      debug14(printf("Got %d sub diagonals\n",List_length(sub_diagonals)));
-      for (p = sub_diagonals; p != NULL; p = List_next(p)) {
-	sub_diagonal = (Diag_T) List_head(p);
-	debug14(printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend + indexsize - 1,chrstart + sub_diagonal->diagonal));
-	elt = Elt_new_fillin(sub_diagonal->querystart,sub_diagonal->queryend,indexsize,chroffset + chrstart + sub_diagonal->diagonal);
-	elt_tree[i] = List_push(elt_tree[i],(void *) elt);
-      }
-#endif
-
-      i = j - 1;
-    }
-  }
-#endif
-
-
-  /* Create diagonals.  We give a bonus of +1 for being on the same
-     diagonal.  This means that we should count consecutive regions
-     within each diagonal as 2 points.  Then an indel or gap will
-     give only 1 point, or a relative penalty. */
-  assert(List_length(elt_tree[best_i]) == 1);
-  elt = (Elt_T) elt_tree[best_i]->first;
-  /* Don't use leftward values */
-  *middle_diagonal = Univdiag_new(elt->querystart,elt->queryend,/*univdiagonal*/goal);
-  (*middle_diagonal)->intscore = 2*(elt->queryend - elt->querystart + 1);
-  debug13(printf("Creating middle diagonal: query %d..%d, diagonal %u = goal %u - chroffset %u\n",
-		 elt->querystart,elt->queryend,goal - chroffset,goal,chroffset));
-  if (elt->temporaryp == true) {
-    Elt_free(&elt);
-  } else {
-    Elt_reset(elt);
-  }
-  List_free(&(elt_tree[best_i]));
-
-
-  right_diagonals = (List_T) NULL;
-  for (i = nelts - 1; i > best_i; --i) { /* Go in this order to avoid reversing list at the end */
-    for (p = elt_tree[i]; p != NULL; p = List_next(p)) {
-      elt = (Elt_T) p->first;
-      if (elt->fillin_p == true) {
-	/* Created by oligoindex */
-	diagonal = Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,/*univdiagonal*/elt->positions[0]);
-	diagonal->nmismatches_known_p = false;
-	right_diagonals = List_push(right_diagonals,(void *) diagonal);
-      } else if (elt->querystart_leftward < elt->queryend_leftward) {
-	for (j = elt->npositions - 1; j >= 0; --j) {  /* Go in this order to avoid reversing list at the end */
-	  debug13(printf("Creating right diagonal: query %d..%d (leftward %d..%d), diagonal %u\n",
-			 elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset));
-	  right_diagonals = List_push(right_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,
-								   /*univdiagonal*/elt->positions[j]));
-	}
-      }
-      if (elt->temporaryp == true) {
-	Elt_free(&elt);
-      } else {
-	Elt_reset(elt);
-      }
-    }
-    List_free(&(elt_tree[i]));
-  }
-
-
-  left_diagonals = (List_T) NULL;
-  for (i = 0; i < best_i; i++) { /* Go in this order to avoid reversing list at the end */
-    for (p = elt_tree[i]; p != NULL; p = List_next(p)) {
-      elt = (Elt_T) p->first;
-      if (elt->fillin_p == true) {
-	/* Created by oligoindex */
-	diagonal = Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,/*univdiagonal*/elt->positions[0]);
-	diagonal->nmismatches_known_p = false; /* Signifies that we don't know the number of mismatches */
-	left_diagonals = List_push(left_diagonals,(void *) diagonal);
-      } else if (elt->querystart_leftward < elt->queryend_leftward) {
-	for (j = 0; j < elt->npositions; j++) {	/* Go in this order to avoid reversing list at the end */
-	  debug13(printf("Creating left diagonal: query %d..%d (leftward %d..%d), diagonal %u\n",
-			 elt->querystart,elt->queryend,elt->querystart_leftward,elt->queryend_leftward,elt->positions[j] - chroffset));
-	  left_diagonals = List_push(left_diagonals,Univdiag_new(elt->querystart_leftward,elt->queryend_leftward,
-								 /*univdiagonal*/elt->positions[j]));
-	}
-      }
-      if (elt->temporaryp == true) {
-	Elt_free(&elt);
-      } else {
-	Elt_reset(elt);
-      }
-    }
-    List_free(&(elt_tree[i]));
-  }
-
-  FREE(elt_tree);
-
-
-
-  /* A.  Compute right diagonals */
-  /* A1.  Scoring for dynamic programming */
-  diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,right_diagonals);
-  List_free(&right_diagonals);
-#ifdef DEBUG12
-  printf("Right side before consolidating\n");
-  for (i = 0; i < ndiagonals; i++) {
-    diagonal = diagonal_array[i];
-    printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
-  }
-#endif
-
-  *all_right_diagonals = (List_T) NULL;
-  qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_diagonal_cmp);
-  i = 0;
-  while (i < ndiagonals) {
-    j = i;
-    while (j < ndiagonals && diagonal_array[j]->univdiagonal == diagonal_array[i]->univdiagonal) {
-      j++;
-    }
-    if (j == i) {
-      *all_right_diagonals = List_push(*all_right_diagonals,(void *) diagonal_array[i]);
-    } else {
-      *all_right_diagonals = List_push(*all_right_diagonals,
-				      (void *) Univdiag_new(diagonal_array[i]->querystart,
-							    diagonal_array[j-1]->queryend,
-							    diagonal_array[i]->univdiagonal));
-      for (k = i; k < j; k++) {
-	Univdiag_free(&(diagonal_array[k]));
-      }
-    }
-    i = j;
-  }
-  FREE(diagonal_array);
-
-  /* TODO: May be able to skip this sorting step */
-  diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,*all_right_diagonals);
-  qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_ascending_cmp);
-#ifdef DEBUG12
-  printf("Right side after consolidating and sorting\n");
-  for (i = 0; i < ndiagonals; i++) {
-    diagonal = diagonal_array[i];
-    printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
-  }
-#endif
-
-
-  for (i = 0; i < ndiagonals; i++) {
-    diagonal = diagonal_array[i];
-    debug13(printf("%d: %d..%d at %u\n",i,diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
-
-    low = subtract_bounded(diagonal->univdiagonal,overall_max_distance,chroffset);
-    high = add_bounded(diagonal->univdiagonal,max_insertionlen,chrhigh);
-    querypos = diagonal->querystart;
-    best_score = 0;
-
-    for (j = i - 1; j >= 0; --j) {
-      prev_diagonal = diagonal_array[j];
-      debug13(printf("  %d: %d..%d at %u  ",j,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
-
-      if (prev_diagonal->queryend >= querypos) {
-	debug13(printf("Skipping because queryend %d >= querypos %d\n",prev_diagonal->queryend,querypos));
-      } else if (prev_diagonal->univdiagonal < low) {
-	debug13(printf("Skipping because diagonal %u < low_chrpos %u\n",prev_diagonal->univdiagonal,low));
-      } else if (prev_diagonal->univdiagonal > high) {
-	debug13(printf("Skipping because diagonal %u > high_chrpos %u\n",prev_diagonal->univdiagonal,high));
-      } else {
-	score = prev_diagonal->intscore;
-	if (prev_diagonal->univdiagonal == diagonal->univdiagonal) {
-	  score += 1;
-	}
-	if (score <= best_score) {
-	  debug13(printf("Skipping because score %d <= best_score %d\n",score,best_score));
-	} else {
-	  best_score = score;
-	  diagonal->prev = prev_diagonal;
-	  debug13(printf("Updating best score to be %d.  Prev diagonal is %d..%d at %u\n",
-			 best_score,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
-	}
-      }
-    }
-
-    /* Handle links to middle diagonal */
-    prev_diagonal = *middle_diagonal;
-    debug13(printf("  Middle: %d..%d at %u  ",prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
-    if (prev_diagonal->queryend >= querypos) {
-      debug13(printf("Skipping because queryend %d >= querypos %d\n",prev_diagonal->queryend,querypos));
-    } else if (prev_diagonal->univdiagonal < low) {
-      debug13(printf("Skipping because diagonal %u < low_chrpos %u\n",prev_diagonal->univdiagonal,low));
-    } else if (prev_diagonal->univdiagonal > high) {
-      debug13(printf("Skipping because diagonal %u > high_chrpos %u\n",prev_diagonal->univdiagonal,high));
-    } else {
-      score = prev_diagonal->intscore;
-      if (prev_diagonal->univdiagonal == diagonal->univdiagonal) {
-	score += 1;		/* This bonus means we should double count contiguous region within each segment */
-      }
-      if (score <= best_score) {
-	debug13(printf("Skipping because score %d <= best_score %d\n",score,best_score));
-      } else {
-	best_score = score;
-	/* diagonal->prev = (Univdiag_T) NULL; */
-	debug13(printf("Updating best score (for link to middle diagonal) to be %d\n",best_score));
-      }
-    }
-
-    diagonal->intscore = best_score + 2*diagonal->nconsecutive;
-    debug13(printf("Right diagonal %d..%d at %u gets score %d\n",
-		   diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->intscore));
-  }
-  FREE(diagonal_array);
-
-
-  /* A2.  Optimizing for dynamic programming */
-  best_score_right = 0;
-  *best_right_diagonals = (List_T) NULL;
-  for (p = *all_right_diagonals; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    if (diagonal->intscore > best_score_right) {
-      best_score_right = diagonal->intscore;
-      List_free(&(*best_right_diagonals));
-      *best_right_diagonals = List_push(NULL,(void *) diagonal);
-    } else if (diagonal->intscore == best_score_right) {
-      *best_right_diagonals = List_push(*best_right_diagonals,(void *) diagonal);
-    }
-  }
-
-
-  /* C.  Compute left diagonals */
-  /* C1.  Scoring for dynamic programming */
-  diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,left_diagonals);
-  List_free(&left_diagonals);
-#ifdef DEBUG12
-  printf("Left side before consolidating\n");
-  for (i = 0; i < ndiagonals; i++) {
-    diagonal = diagonal_array[i];
-    printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
-  }
-#endif
-
-  *all_left_diagonals = (List_T) NULL;
-  qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_diagonal_cmp);
-  i = 0;
-  while (i < ndiagonals) {
-    j = i;
-    while (j < ndiagonals && diagonal_array[j]->univdiagonal == diagonal_array[i]->univdiagonal) {
-      j++;
-    }
-    if (j == i) {
-      *all_left_diagonals = List_push(*all_left_diagonals,(void *) diagonal_array[i]);
-    } else {
-      *all_left_diagonals = List_push(*all_left_diagonals,
-				      (void *) Univdiag_new(diagonal_array[i]->querystart,
-							    diagonal_array[j-1]->queryend,
-							    diagonal_array[i]->univdiagonal));
-      for (k = i; k < j; k++) {
-	Univdiag_free(&(diagonal_array[k]));
-      }
-    }
-    i = j;
-  }
-  FREE(diagonal_array);
-
-  /* TODO: May be able to skip this sorting step */
-  diagonal_array = (Univdiag_T *) List_to_array_n(&ndiagonals,*all_left_diagonals);
-  qsort(diagonal_array,ndiagonals,sizeof(Univdiag_T),Univdiag_descending_cmp);
-#ifdef DEBUG12
-  printf("Left side after consolidating and sorting\n");
-  for (i = 0; i < ndiagonals; i++) {
-    diagonal = diagonal_array[i];
-    printf("%d..%d at %u\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal);
-  }
-#endif
-
-
-  for (i = 0; i < ndiagonals; i++) {
-    diagonal = diagonal_array[i];
-    debug13(printf("%d: %d..%d at %u\n",i,diagonal->querystart,diagonal->queryend,diagonal->univdiagonal));
-
-    low = subtract_bounded(diagonal->univdiagonal,max_insertionlen,chroffset);
-    high = add_bounded(diagonal->univdiagonal,overall_max_distance,chrhigh);
-    querypos = diagonal->queryend;
-    best_score = 0;
-
-    for (j = i - 1; j >= 0; --j) {
-      prev_diagonal = diagonal_array[j];
-      debug13(printf("  %d: %d..%d at %u  ",j,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
-
-      if (prev_diagonal->querystart <= querypos) {
-	debug13(printf("Skipping because querystart %d <= querypos %d\n",prev_diagonal->querystart,querypos));
-      } else if (prev_diagonal->univdiagonal < low) {
-	debug13(printf("Skipping because diagonal %u < low %u\n",prev_diagonal->univdiagonal,low));
-      } else if (prev_diagonal->univdiagonal > high) {
-	debug13(printf("Skipping because diagonal %u > high %u\n",prev_diagonal->univdiagonal,high));
-      } else {
-	score = prev_diagonal->intscore;
-	if (prev_diagonal->univdiagonal == diagonal->univdiagonal) {
-	  score += 1;
-	}
-	if (score <= best_score) {
-	  debug13(printf("Skipping because score %d <= best_score %d\n",score,best_score));
-	} else {
-	  best_score = score;
-	  diagonal->prev = prev_diagonal;
-	  debug13(printf("Updating best score to be %d.  Prev diagonal is %d..%d at %u\n",
-			 best_score,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
-	}
-      }
-    }
-
-    /* Handle links to middle diagonal */
-    prev_diagonal = *middle_diagonal;
-    debug13(printf("  Middle: %d..%d at %u  ",prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->univdiagonal));
-    if (prev_diagonal->querystart <= querypos) {
-      debug13(printf("Skipping because querystart %d <= querypos %d\n",prev_diagonal->querystart,querypos));
-    } else if (prev_diagonal->univdiagonal < low) {
-      debug13(printf("Skipping because diagonal %u < low_chrpos %u\n",prev_diagonal->univdiagonal,low));
-    } else if (prev_diagonal->univdiagonal > high) {
-      debug13(printf("Skipping because diagonal %u > high_chrpos %u\n",prev_diagonal->univdiagonal,high));
-    } else {
-      score = prev_diagonal->intscore;
-      if (prev_diagonal->univdiagonal == diagonal->univdiagonal) {
-	score += 1;		/* This bonus means we should double count contiguous region within each segment */
-      }
-      if (score <= best_score) {
-	debug13(printf("Skipping because score %d <= best_score %d\n",prev_diagonal->intscore,best_score));
-      } else {
-	best_score = score;
-	/* diagonal->prev = (Univdiag_T) NULL; */
-	debug13(printf("Updating best score (for link to middle diagonal) to be %d\n",best_score));
-      }
-    }
-
-    diagonal->intscore = best_score + 2*diagonal->nconsecutive;
-    debug13(printf("Left diagonal %d..%d at %u gets score %d\n",
-		   diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->intscore));
-  }
-  FREE(diagonal_array);
-
-
-  /* C2.  Optimizing for dynamic programming */
-  best_score_left = 0;
-  *best_left_diagonals = (List_T) NULL;
-  for (p = *all_left_diagonals; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    if (diagonal->intscore > best_score_left) {
-      best_score_left = diagonal->intscore;
-      List_free(&(*best_left_diagonals));
-      *best_left_diagonals = List_push(NULL,(void *) diagonal);
-    } else if (diagonal->intscore == best_score_left) {
-      *best_left_diagonals = List_push(*best_left_diagonals,(void *) diagonal);
-    }
-  }
-
-#if 0
-  printf("Best on the left\n");
-  for (p = *best_left_diagonals; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    printf("Score %d: %d..%d at %u\n",diagonal->intscore,diagonal->querystart,diagonal->queryend,diagonal->diagonal);
-  }
-#endif
-
-
-  if (best_score_left == 0 && best_score_right == 0) {
-    return (*middle_diagonal)->intscore;
-  } else if (best_score_left == 0) {
-    return best_score_right;
-  } else if (best_score_right == 0) {
-    return best_score_left;
-  } else {
-    /* middle_diagonal score is double counted */
-    return best_score_left + best_score_right - (*middle_diagonal)->intscore;
-  }
-}
-
-
-static List_T
-find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T *right_endpoints_antisense,
-		Intlist_T *right_queryends_sense, Intlist_T *right_queryends_antisense,
-		Uintlist_T *right_ambcoords_sense, Uintlist_T *right_ambcoords_antisense,
-		Intlist_T *right_amb_knowni_sense, Intlist_T *right_amb_knowni_antisense,
-		Intlist_T *right_amb_nmismatchesi_sense, Intlist_T *right_amb_nmismatchesi_antisense,
-		Intlist_T *right_amb_nmismatchesj_sense, Intlist_T *right_amb_nmismatchesj_antisense,
-		Doublelist_T *right_amb_probsi_sense, Doublelist_T *right_amb_probsi_antisense,
-		Doublelist_T *right_amb_probsj_sense, Doublelist_T *right_amb_probsj_antisense,
-
-		List_T *left_paths, Intlist_T *left_endpoints_sense, Intlist_T *left_endpoints_antisense,
-		Intlist_T *left_querystarts_sense, Intlist_T *left_querystarts_antisense,
-		Uintlist_T *left_ambcoords_sense, Uintlist_T *left_ambcoords_antisense,
-		Intlist_T *left_amb_knowni_sense, Intlist_T *left_amb_knowni_antisense,
-		Intlist_T *left_amb_nmismatchesi_sense, Intlist_T *left_amb_nmismatchesi_antisense,
-		Intlist_T *left_amb_nmismatchesj_sense, Intlist_T *left_amb_nmismatchesj_antisense,
-		Doublelist_T *left_amb_probsi_sense, Doublelist_T *left_amb_probsi_antisense,
-		Doublelist_T *left_amb_probsj_sense, Doublelist_T *left_amb_probsj_antisense,
-
-		List_T *fillin_diagonals,
-
-		Univdiag_T middle_diagonal, List_T best_right_diagonals, List_T best_left_diagonals,
-
-		int querylength, Compress_T query_compress, Univcoord_T chroffset,
-		bool plusp, int genestrand, int max_mismatches_allowed) {
-  List_T middle_path;
-  List_T p;
-
-  List_T diagonal_path, ambig_path;
-  Univdiag_T diagonal, common_diagonal, prev_diagonal, right_indel_diagonal = NULL, left_indel_diagonal = NULL;
-  int nbest;
-
-#ifdef SUBDIVIDE_ENDS
-  int indexsize;
-  bool oned_matrix_p;
-  Chrpos_T **mappings, chrstart, chrend;
-  int maxnconsecutive = 0;
-  int *npositions, totalpositions = 0;
-  bool *coveredp;
-  int querystart, queryend;
-  Univcoord_T mappingstart, mappingend;
-  List_T sub_diagonals;
-  Diag_T sub_diagonal;
-  Oligoindex_T oligoindex;
-#endif
-  Univcoord_T left, prev_left;
-  
-  /* Chrpos_T splice_distance; */
-  int splice_pos;
-  int best_knowni_i, best_knowni_j, best_nmismatches_i, best_nmismatches_j;
-  double best_prob_i, best_prob_j;
-
-  Chrpos_T first_dist_sense, second_dist_sense, first_dist_antisense, second_dist_antisense;
-  double first_prob_sense, second_prob_sense, first_prob_antisense, second_prob_antisense;
-  int firsti_sense, secondi_sense, firsti_antisense, secondi_antisense;
-  int sensei, antisensei;
-
-  int segmenti_donor_nknown, segmentj_acceptor_nknown,
-    segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
-  int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos,
-    *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni;
-  int j;
-
-#ifdef HAVE_ALLOCA
-  if (querylength <= MAX_STACK_READLENGTH) {
-    segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-  } else {
-    segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-  }
-#else
-  segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-#endif
-
-
-  debug13(printf("***Entered find_best_path\n"));
-
-#ifdef SUBDIVIDE_ENDS
-  mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
-  indexsize = Oligoindex_indexsize(oligoindex);
-  coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
-  npositions = (int *) CALLOCA(querylength,sizeof(int));
-  oligoindex = Oligoindex_array_elt(oligoindices_minor,/*source*/0);
-#endif
-
-  /* A3.  Traceback for dynamic programming */
-  *right_endpoints_sense = *right_endpoints_antisense = (Intlist_T) NULL;
-  *right_queryends_sense = *right_queryends_antisense = (Intlist_T) NULL;
-  *right_ambcoords_sense = *right_ambcoords_antisense = (Uintlist_T) NULL;
-  *right_amb_knowni_sense = *right_amb_knowni_antisense = (Intlist_T) NULL;
-  *right_amb_nmismatchesi_sense = *right_amb_nmismatchesi_antisense = (Intlist_T) NULL;
-  *right_amb_nmismatchesj_sense = *right_amb_nmismatchesj_antisense = (Intlist_T) NULL;
-  *right_amb_probsi_sense = *right_amb_probsi_antisense = (Doublelist_T) NULL;
-  *right_amb_probsj_sense = *right_amb_probsj_antisense = (Doublelist_T) NULL;
-
-  *right_paths = (List_T) NULL;
-  if ((nbest = List_length(best_right_diagonals)) == 0) {
-    common_diagonal = (Univdiag_T) NULL;
-
-    /* querystart = middle_diagonal->queryend + 1; */
-    left = middle_diagonal->univdiagonal;
-
-  } else if (nbest == 1) {
-    common_diagonal = (Univdiag_T) List_head(best_right_diagonals);
-
-    /* querystart = common_diagonal->queryend + 1; */
-    left = common_diagonal->univdiagonal;
-
-  } else {
-    debug13(printf("Multiple (%d) best right diagonals\n",nbest));
-
-    /* Distinguish between common and divergent diagonals */
-    for (p = best_right_diagonals; p != NULL; p = List_next(p)) {
-      diagonal = (Univdiag_T) List_head(p);
-      while (diagonal != NULL) {
-	diagonal->nlinked += 1;
-	diagonal = diagonal->prev;
-      }
-    }
-
-    /* Handle divergent diagonals */
-    /* Now that we are running oligoindex, we may need to obtain only the last common_diagonal */
-    for (p = best_right_diagonals; p != NULL; p = List_next(p)) {
-      ambig_path = (List_T) NULL;
-      diagonal = (Univdiag_T) List_head(p);
-      while (diagonal != NULL && diagonal->nlinked < nbest) {
-	ambig_path = List_push(ambig_path,(void *) diagonal);
-	diagonal = diagonal->prev;
-      }
-      *right_paths = List_push(*right_paths,(void *) ambig_path);
-
-      common_diagonal = diagonal; /* Last elt on prev path.  Save for later */
-    }
-
-    if (common_diagonal == NULL) {
-      /* All paths connect directly to the middle diagonal, so there is no common diagonal */
-      prev_diagonal = middle_diagonal;
-      /* querystart = middle_diagonal->queryend + 1; */
-      prev_left = middle_diagonal->univdiagonal;
-    } else {
-      prev_diagonal = common_diagonal;
-      /* querystart = common_diagonal->queryend + 1; */
-      prev_left = common_diagonal->univdiagonal;
-    }
-
-    /* Distinguish right paths by looking for indel (which wins) or splicing */
-    debug13(printf("Have %d right_paths.  Distinguish by looking for indels\n",List_length(*right_paths)));
-    for (p = *right_paths; p != NULL; p = List_next(p)) {
-      ambig_path = (List_T) List_head(p);
-      diagonal = (Univdiag_T) List_head(ambig_path);
-      left = diagonal->univdiagonal;
-      debug13(printf("left %u, prev_left %u, difference %d\n",left,prev_left,(int) left - prev_left));
-      if (left < prev_left) {
-	/* Insertion */
-	debug13(printf("Found insertion\n"));
-	right_indel_diagonal = diagonal;
-      } else if (left - prev_left < MIN_INTRONLEN) {
-	/* Deletion */
-	debug13(printf("Found deletion\n"));
-	right_indel_diagonal = diagonal;
-      }
-    }
-
-    if (right_indel_diagonal != NULL) {
-      /* Push onto middle path later */
-      /* querystart = right_indel_diagonal->queryend + 1; */
-      left = right_indel_diagonal->univdiagonal;
-
-    } else {
-      debug13(printf("Still have %d right_paths.  Distinguish by looking for best splice\n",List_length(*right_paths)));
-      first_dist_sense = second_dist_sense = 0;
-      first_prob_sense = second_prob_sense = 0.0;
-      firsti_sense = secondi_sense = -1;
-      first_dist_antisense = second_dist_antisense = 0;
-      first_prob_antisense = second_prob_antisense = 0.0;
-      firsti_antisense = secondi_antisense = -1;
-      sensei = antisensei = 0;
-
-      for (p = *right_paths; p != NULL; p = List_next(p)) {
-	ambig_path = (List_T) List_head(p);
-	diagonal = (Univdiag_T) List_head(ambig_path);
-	left = diagonal->univdiagonal;
-
-	segmenti_donor_nknown = segmenti_antiacceptor_nknown = 0;
-	if (nsplicesites > 0 &&
-	    Splicetrie_splicesite_p(prev_left,/*pos5*/1,/*pos3*/querylength) == true) {
-	  j = binary_search(0,nsplicesites,splicesites,prev_left);
-	  while (j < nsplicesites && splicesites[j] < prev_left + querylength) {
-	    if (splicetypes[j] == DONOR) {
-	      debug4s(printf("Setting known donor %d for segmenti at %u\n",j,splicesites[j]));
-	      segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - prev_left;
-	      segmenti_donor_knowni[segmenti_donor_nknown++] = j;
-	    } else if (splicetypes[j] == ANTIACCEPTOR) {
-	      debug4s(printf("Setting known antiacceptor %d for segmenti at %u\n",j,splicesites[j]));
-	      segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - prev_left;
-	      segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
-	    }
-	    j++;
-	  }
-	}
-	segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
-	segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
-	  
-	segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
-	if (nsplicesites > 0 &&
-	    Splicetrie_splicesite_p(left,/*pos5*/1,/*pos3*/querylength) == true) {
-	  j = binary_search(0,nsplicesites,splicesites,left);
-	  while (j < nsplicesites && splicesites[j] < left + querylength) {
-	    if (splicetypes[j] == ACCEPTOR) {
-	      debug4s(printf("Setting known acceptor %d for segmentj at %u\n",j,splicesites[j]));
-	      segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - left;
-	      segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
-	    } else if (splicetypes[j] == ANTIDONOR) {
-	      debug4s(printf("Setting known antidonor %d for segmentj at %u\n",j,splicesites[j]));
-	      segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - left;
-	      segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
-	    }
-	    j++;
-	  }
-	}
-	segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
-	segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
-      
-	/* splice_distance = left - prev_left; */
-#if 0
-	max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
-	debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
-	if (prev_diagonal->intscore > 0) {
-	  max_mismatches_allowed += 1;
-	}
-	if (diagonal->intscore > 0) {
-	  max_mismatches_allowed += 1;
-	}
-#endif
-      
-	if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
-					       &best_prob_i,&best_prob_j,
-					       /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
-					       prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
-					       segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-					       segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-					       segmenti_donor_knowni,segmentj_acceptor_knowni,
-					       segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-					       segmenti_donor_nknown,segmentj_acceptor_nknown,
-					       segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-					       max_mismatches_allowed,plusp,genestrand)) >= 0) {
-	  debug13(printf("Found sense splice_pos %d with probs %f and %f\n",splice_pos,best_prob_i,best_prob_j));
-	  if (best_prob_i + best_prob_j > first_prob_sense) {
-	    second_dist_sense = first_dist_sense;
-	    second_prob_sense = first_prob_sense;
-	    secondi_sense = firsti_sense;
-	    first_dist_sense = left - prev_left;
-	    first_prob_sense = best_prob_i + best_prob_j;
-	    firsti_sense = sensei;
-	  } else if (best_prob_i + best_prob_j > second_prob_sense) {
-	    second_dist_sense = left - prev_left;
-	    second_prob_sense = best_prob_i + best_prob_j;
-	    secondi_sense = sensei;
-	  }
-
-	  *right_endpoints_sense = Intlist_push(*right_endpoints_sense,splice_pos);
-	  *right_queryends_sense = Intlist_push(*right_queryends_sense,diagonal->queryend + 1);
-	  *right_ambcoords_sense = Uintlist_push(*right_ambcoords_sense,left + splice_pos);
-	  *right_amb_knowni_sense = Intlist_push(*right_amb_knowni_sense,best_knowni_j);
-	  *right_amb_nmismatchesi_sense = Intlist_push(*right_amb_nmismatchesi_sense,best_nmismatches_i);
-	  *right_amb_nmismatchesj_sense = Intlist_push(*right_amb_nmismatchesj_sense,best_nmismatches_j);
-	  *right_amb_probsi_sense = Doublelist_push(*right_amb_probsi_sense,best_prob_i);
-	  *right_amb_probsj_sense = Doublelist_push(*right_amb_probsj_sense,best_prob_j);
-	  sensei++;
-	}
-
-	if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
-						   &best_prob_i,&best_prob_j,
-						   /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
-						   prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
-						   segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-						   segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-						   segmenti_donor_knowni,segmentj_acceptor_knowni,
-						   segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-						   segmenti_donor_nknown,segmentj_acceptor_nknown,
-						   segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-						   max_mismatches_allowed,plusp,genestrand)) >= 0) {
-	  debug13(printf("Found antisense splice_pos %d with probs %f and %f\n",splice_pos,best_prob_i,best_prob_j));
-	  if (best_prob_i + best_prob_j > first_prob_antisense) {
-	    second_dist_antisense = first_dist_antisense;
-	    second_prob_antisense = first_prob_antisense;
-	    secondi_antisense = firsti_antisense;
-	    first_dist_antisense = left - prev_left;
-	    first_prob_antisense = best_prob_i + best_prob_j;
-	    firsti_antisense = antisensei;
-	  } else if (best_prob_i + best_prob_j > second_prob_antisense) {
-	    second_dist_antisense = left - prev_left;
-	    second_prob_antisense = best_prob_i + best_prob_j;
-	    secondi_antisense = antisensei;
-	  }
-
-	  *right_endpoints_antisense = Intlist_push(*right_endpoints_antisense,splice_pos);
-	  *right_queryends_antisense = Intlist_push(*right_queryends_antisense,diagonal->queryend + 1);
-	  *right_ambcoords_antisense = Uintlist_push(*right_ambcoords_antisense,left + splice_pos);
-	  *right_amb_knowni_antisense = Intlist_push(*right_amb_knowni_antisense,best_knowni_j);
-	  *right_amb_nmismatchesi_antisense = Intlist_push(*right_amb_nmismatchesi_antisense,best_nmismatches_i);
-	  *right_amb_nmismatchesj_antisense = Intlist_push(*right_amb_nmismatchesj_antisense,best_nmismatches_j);
-	  *right_amb_probsi_antisense = Doublelist_push(*right_amb_probsi_antisense,best_prob_i);
-	  *right_amb_probsj_antisense = Doublelist_push(*right_amb_probsj_antisense,best_prob_j);
-	  antisensei++;
-	}
-      }
-
-      if (Intlist_length(*right_endpoints_sense) > 1) {
-	if (first_dist_sense < second_dist_sense/2) {
-	  debug13(printf("first dist sense %u is significantly shorter than second dist sense %u.  Keeping %d from end\n",
-			 first_dist_sense,second_dist_sense,firsti_sense));
-	  firsti_sense = Intlist_length(*right_endpoints_sense) - 1 - firsti_sense; /* Because we don't reverse lists */
-	  *right_endpoints_sense = Intlist_keep_one(*right_endpoints_sense,firsti_sense);
-	  *right_queryends_sense = Intlist_keep_one(*right_queryends_sense,firsti_sense);
-	  *right_ambcoords_sense = Uintlist_keep_one(*right_ambcoords_sense,firsti_sense);
-	  *right_amb_knowni_sense = Intlist_keep_one(*right_amb_knowni_sense,firsti_sense);
-	  *right_amb_nmismatchesi_sense = Intlist_keep_one(*right_amb_nmismatchesi_sense,firsti_sense);
-	  *right_amb_nmismatchesj_sense = Intlist_keep_one(*right_amb_nmismatchesj_sense,firsti_sense);
-	  *right_amb_probsi_sense = Doublelist_keep_one(*right_amb_probsi_sense,firsti_sense);
-	  *right_amb_probsj_sense = Doublelist_keep_one(*right_amb_probsj_sense,firsti_sense);
-	}
-      }
-
-      if (Intlist_length(*right_endpoints_antisense) > 1) {
-	if (first_dist_antisense < second_dist_antisense/2) {
-	  debug13(printf("first dist antisense %u is significantly shorter than second dist antisense %u.  Keeping %d from end\n",
-			 first_dist_antisense,second_dist_antisense,firsti_antisense));
-	  firsti_antisense = Intlist_length(*right_endpoints_antisense) - 1 - firsti_antisense; /* Because we don't reverse lists */
-	  *right_endpoints_antisense = Intlist_keep_one(*right_endpoints_antisense,firsti_antisense);
-	  *right_queryends_antisense = Intlist_keep_one(*right_queryends_antisense,firsti_antisense);
-	  *right_ambcoords_antisense = Uintlist_keep_one(*right_ambcoords_antisense,firsti_antisense);
-	  *right_amb_knowni_antisense = Intlist_keep_one(*right_amb_knowni_antisense,firsti_antisense);
-	  *right_amb_nmismatchesi_antisense = Intlist_keep_one(*right_amb_nmismatchesi_antisense,firsti_antisense);
-	  *right_amb_nmismatchesj_antisense = Intlist_keep_one(*right_amb_nmismatchesj_antisense,firsti_antisense);
-	  *right_amb_probsi_antisense = Doublelist_keep_one(*right_amb_probsi_antisense,firsti_antisense);
-	  *right_amb_probsj_antisense = Doublelist_keep_one(*right_amb_probsj_antisense,firsti_antisense);
-	}
-      }
-    }
-  }
-
-
-#ifdef SUBDIVIDE_ENDS
-  sub_diagonals = (List_T) NULL;
-
-  if (querystart + MIN_ENDLENGTH >= querylength) {
-  } else {
-    /* Run oligoindex here to right of common_diagonal */
-    mappingstart = subtract_bounded(left + querystart,/*minusterm*/max_insertionlen,chroffset);
-    mappingend = add_bounded(left + querylength,/*plusterm*/overall_max_distance,chrhigh);
-    chrstart = mappingstart - chroffset;
-    chrend = mappingend - chroffset;
-
-    Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,/*plusp:true*/true,
-			queryptr,querystart,/*queryend*/querylength,/*chrpos*/chrstart,genestrand);
-    sub_diagonals = Oligoindex_get_mappings(NULL,coveredp,mappings,npositions,&totalpositions,
-					    &oned_matrix_p,&maxnconsecutive,oligoindices_minor,oligoindex,
-					    queryptr,querystart,/*queryend*/querylength,querylength,
-					    chrstart,chrend,chroffset,chrhigh,/*plusp:true*/true,diagpool);
-    Oligoindex_untally(oligoindex,queryptr,querylength);
-
-    debug14(printf("Got %d sub diagonals\n",List_length(sub_diagonals)));
-#ifdef DEBUG14
-    for (p = sub_diagonals; p != NULL; p = List_next(p)) {
-      sub_diagonal = (Diag_T) List_head(p);
-      /* Need to alter oligoindex diagonal for our needs */
-      printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend + indexsize - 1,chrstart + sub_diagonal->diagonal);
-    }
-#endif
-
-#if 0
-    /* Perform dynamic programming on these diagonals */
-    for (p = sub_diagonals; p != NULL; p = List_next(p)) {
-      diagonal = List_head(p);
-      querypos = diagonal->querystart;
-      best_score = 0;
-
-      for (q = sub_diagonals; q != p; q = List_next(q)) {
-	prev_diagonal = List_head(q);
-	if (prev_diagonal->queryend >= querypos) {
-	  debug13(printf("Skipping because queryend %d >= querypos %d\n",prev_diagonal->queryend,querypos));
-	} else if (prev_diagonal->univdiagonal < low) {
-	  debug13(printf("Skipping because diagonal %u < low_chrpos %u\n",prev_diagonal->diagonal,low_chrpos));
-	} else if (prev_diagonal->diagonal > high_chrpos) {
-	  debug13(printf("Skipping because diagonal %u > high_chrpos %u\n",prev_diagonal->diagonal,high_chrpos));
-	} else {
-	  score = prev_diagonal->intscore;
-	  if (prev_diagonal->diagonal == diagonal->diagonal) {
-	    score += 1;
-	  }
-	  if (score <= best_score) {
-	    debug13(printf("Skipping because score %d <= best_score %d\n",score,best_score));
-	  } else {
-	    best_score = score;
-	    diagonal->prev = prev_diagonal;
-	    debug13(printf("Updating best score to be %d.  Prev diagonal is %d..%d at %u\n",
-			   best_score,prev_diagonal->querystart,prev_diagonal->queryend,prev_diagonal->diagonal));
-	  }
-	}
-      }
-    }
-#endif
-
-  }
-#endif	/* SUBDIVIDE_ENDS */
-
-
-  *fillin_diagonals = (List_T) NULL;
-  middle_path = (List_T) NULL;
-
-#ifdef SUBDIVIDE_ENDS
-  /* Without SUBDIVIDE_ENDS, sub_diagonals is guaranteed to be NULL */
-  /* A4.  Process oligoindex diagonals from right */
-  if (List_length(sub_diagonals) == 0) {
-    /* Skip */
-  } else if (List_length(sub_diagonals) == 1) {
-    sub_diagonal = List_head(sub_diagonals);
-    diagonal = Univdiag_new_fillin(sub_diagonal->querystart,sub_diagonal->queryend,indexsize,
-				   /*univdiagonal*/chroffset + chrstart + sub_diagonal->diagonal);
-    *fillin_diagonals = List_push(*fillin_diagonals,(void *) diagonal);
-    middle_path = List_push(middle_path,(void *) diagonal);
-  } else {
-#ifdef DEBUG13
-    printf("Have %d sub_diagonals\n",List_length(sub_diagonals));
-    for (p = sub_diagonals; p != NULL; p = List_next(p)) {
-      sub_diagonal = List_head(p);
-      printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend,chrstart + sub_diagonal->diagonal);
-    }
-#endif
-  }
-#endif
-
-  if (right_indel_diagonal != NULL) {
-    debug13(printf("Pushing right indel diagonal onto middle: query %d..%d, diagonal %u\n",
-		   right_indel_diagonal->querystart,right_indel_diagonal->queryend,right_indel_diagonal->univdiagonal - chroffset));
-    middle_path = List_push(middle_path,(void *) right_indel_diagonal);
-  }
-
-  /* A5. Process common diagonal from right */
-  while (common_diagonal != NULL) {
-    middle_path = List_push(middle_path,(void *) common_diagonal);
-    debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
-		   common_diagonal->querystart,common_diagonal->queryend,common_diagonal->univdiagonal - chroffset));
-    common_diagonal = common_diagonal->prev;
-  }
-
-  /* B. Process original middle diagonal */
-  middle_path = List_push(middle_path,(void *) middle_diagonal);
-  debug13(printf("Pushing middle diagonal onto middle: query %d..%d, diagonal %u\n",
-		 middle_diagonal->querystart,middle_diagonal->queryend,middle_diagonal->univdiagonal - chroffset));
-
-
-  /* C3.  Traceback for dynamic programming */
-  *left_endpoints_sense = *left_endpoints_antisense = (Intlist_T) NULL;
-  *left_querystarts_sense = *left_querystarts_antisense = (Intlist_T) NULL;
-  *left_ambcoords_sense = *left_ambcoords_antisense = (Uintlist_T) NULL;
-  *left_amb_knowni_sense = *left_amb_knowni_antisense = (Intlist_T) NULL;
-  *left_amb_nmismatchesi_sense = *left_amb_nmismatchesi_antisense = (Intlist_T) NULL;
-  *left_amb_nmismatchesj_sense = *left_amb_nmismatchesj_antisense = (Intlist_T) NULL;
-  *left_amb_probsi_sense = *left_amb_probsi_antisense = (Doublelist_T) NULL;
-  *left_amb_probsj_sense = *left_amb_probsj_antisense = (Doublelist_T) NULL;
-
-  *left_paths = (List_T) NULL;
-  debug13(printf("On left, have %d best_left_diagonals\n",List_length(best_left_diagonals)));
-  if ((nbest = List_length(best_left_diagonals)) == 0) {
-    common_diagonal = (Univdiag_T) NULL;
-
-    /* queryend = middle_diagonal->querystart; */
-    left = middle_diagonal->univdiagonal;
-
-  } else if (nbest == 1) {
-    common_diagonal = (Univdiag_T) List_head(best_left_diagonals);
-
-    /* queryend = common_diagonal->querystart; */
-    left = common_diagonal->univdiagonal;
-
-  } else {
-    debug13(printf("Multiple (%d) best left diagonals\n",nbest));
-
-    /* Distinguish between common and divergent diagonals */
-    for (p = best_left_diagonals; p != NULL; p = List_next(p)) {
-      diagonal = (Univdiag_T) List_head(p);
-      while (diagonal != NULL) {
-	diagonal->nlinked += 1;
-	diagonal = diagonal->prev;
-      }
-    }
-
-    /* Handle divergent diagonals */
-    /* Now that we are running oligoindex, we may need to obtain only the last common_diagonal */
-    for (p = best_left_diagonals; p != NULL; p = List_next(p)) {
-      ambig_path = (List_T) NULL;
-      diagonal = (Univdiag_T) List_head(p);
-      while (diagonal != NULL && diagonal->nlinked < nbest) {
-	ambig_path = List_push(ambig_path,(void *) diagonal);
-	diagonal = diagonal->prev;
-      }
-      *left_paths = List_push(*left_paths,(void *) ambig_path);
-
-      common_diagonal = diagonal; /* Last elt on prev path.  Save for later */
-    }
-
-    if (common_diagonal == NULL) {
-      /* All paths connect directly to the middle diagonal, so there is no common diagonal */
-      diagonal = middle_diagonal;
-      /* queryend = middle_diagonal->querystart; */
-      left = middle_diagonal->univdiagonal;
-    } else {
-      diagonal = common_diagonal;
-      /* queryend = common_diagonal->querystart; */
-      left = common_diagonal->univdiagonal;
-    }
-
-    /* Distinguish left paths by looking for indel (which wins) or splicing */
-    debug13(printf("Have %d left_paths.  Distinguish by looking for indel\n",List_length(*left_paths)));
-    for (p = *left_paths; p != NULL; p = List_next(p)) {
-      ambig_path = (List_T) List_head(p);
-      prev_diagonal = (Univdiag_T) List_head(ambig_path);
-      prev_left = prev_diagonal->univdiagonal;
-      debug13(printf("left %u, prev_left %u, difference %d\n",left,prev_left,(int) left - prev_left));
-      if (left < prev_left) {
-	/* Insertion */
-	debug13(printf("Found insertion\n"));
-	left_indel_diagonal = prev_diagonal;
-      } else if (left - prev_left < MIN_INTRONLEN) {
-	/* Deletion */
-	debug13(printf("Found deletion\n"));
-	left_indel_diagonal = prev_diagonal;
-      }
-    }
-
-    if (left_indel_diagonal != NULL) {
-      /* Push onto middle path later */
-      left = left_indel_diagonal->univdiagonal;
-      /* queryend = left_indel_diagonal->querystart; */
-
-    } else {
-      debug13(printf("Still have %d left_paths.  Distinguish by looking for best splice\n",List_length(*left_paths)));
-      first_dist_sense = second_dist_sense = 0;
-      first_prob_sense = second_prob_sense = 0.0;
-      firsti_sense = secondi_sense = -1;
-      first_dist_antisense = second_dist_antisense = 0;
-      first_prob_antisense = second_prob_antisense = 0.0;
-      firsti_antisense = secondi_antisense = -1;
-      sensei = antisensei = 0;
-
-      for (p = *left_paths; p != NULL; p = List_next(p)) {
-	ambig_path = (List_T) List_head(p);
-	prev_diagonal = (Univdiag_T) List_head(ambig_path);
-	prev_left = prev_diagonal->univdiagonal;
-
-	segmenti_donor_nknown = segmenti_antiacceptor_nknown = 0;
-	if (nsplicesites > 0 &&
-	    Splicetrie_splicesite_p(prev_left,/*pos5*/1,/*pos3*/querylength) == true) {
-	  j = binary_search(0,nsplicesites,splicesites,prev_left);
-	  while (j < nsplicesites && splicesites[j] < prev_left + querylength) {
-	    if (splicetypes[j] == DONOR) {
-	      debug4s(printf("Setting known donor %d for segmenti at %u\n",j,splicesites[j]));
-	      segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - prev_left;
-	      segmenti_donor_knowni[segmenti_donor_nknown++] = j;
-	    } else if (splicetypes[j] == ANTIACCEPTOR) {
-	      debug4s(printf("Setting known antiacceptor %d for segmenti at %u\n",j,splicesites[j]));
-	      segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - prev_left;
-	      segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
-	    }
-	    j++;
-	  }
-	}
-	segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
-	segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
-	  
-	segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
-	if (nsplicesites > 0 &&
-	    Splicetrie_splicesite_p(left,/*pos5*/1,/*pos3*/querylength) == true) {
-	  j = binary_search(0,nsplicesites,splicesites,left);
-	  while (j < nsplicesites && splicesites[j] < left + querylength) {
-	    if (splicetypes[j] == ACCEPTOR) {
-	      debug4s(printf("Setting known acceptor %d for segmentj at %u\n",j,splicesites[j]));
-	      segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - left;
-	      segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
-	    } else if (splicetypes[j] == ANTIDONOR) {
-	      debug4s(printf("Setting known antidonor %d for segmentj at %u\n",j,splicesites[j]));
-	      segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - left;
-	      segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
-	    }
-	    j++;
-	  }
-	}
-	segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
-	segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
-      
-	/* splice_distance = left - prev_left; */
-#if 0
-	max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
-	debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
-	if (prev_diagonal->intscore > 0) {
-	  max_mismatches_allowed += 1;
-	}
-	if (diagonal->intscore > 0) {
-	  max_mismatches_allowed += 1;
-	}
-#endif
-      
-	if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
-					       &best_prob_i,&best_prob_j,
-					       /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
-					       prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
-					       segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-					       segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-					       segmenti_donor_knowni,segmentj_acceptor_knowni,
-					       segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-					       segmenti_donor_nknown,segmentj_acceptor_nknown,
-					       segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-					       max_mismatches_allowed,plusp,genestrand)) >= 0) {
-	  debug13(printf("Found sense splice_pos %d with probs %f and %f\n",splice_pos,best_prob_i,best_prob_j));
-	  if (best_prob_i + best_prob_j > first_prob_sense) {
-	    second_dist_sense = first_dist_sense;
-	    second_prob_sense = first_prob_sense;
-	    secondi_sense = firsti_sense;
-	    first_dist_sense = left - prev_left;
-	    first_prob_sense = best_prob_i + best_prob_j;
-	    firsti_sense = sensei;
-	  } else if (best_prob_i + best_prob_j > second_prob_sense) {
-	    second_dist_sense = left - prev_left;
-	    second_prob_sense = best_prob_i + best_prob_j;
-	    secondi_sense = sensei;
-	  }
-
-	  *left_endpoints_sense = Intlist_push(*left_endpoints_sense,splice_pos);
-	  *left_querystarts_sense = Intlist_push(*left_querystarts_sense,prev_diagonal->querystart);
-	  *left_ambcoords_sense = Uintlist_push(*left_ambcoords_sense,prev_left + splice_pos);
-	  *left_amb_knowni_sense = Intlist_push(*left_amb_knowni_sense,best_knowni_i);
-	  *left_amb_nmismatchesi_sense = Intlist_push(*left_amb_nmismatchesi_sense,best_nmismatches_i);
-	  *left_amb_nmismatchesj_sense = Intlist_push(*left_amb_nmismatchesj_sense,best_nmismatches_j);
-	  *left_amb_probsi_sense = Doublelist_push(*left_amb_probsi_sense,best_prob_i);
-	  *left_amb_probsj_sense = Doublelist_push(*left_amb_probsj_sense,best_prob_j);
-	  sensei++;
-	}
-
-	if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
-						   &best_prob_i,&best_prob_j,
-						   /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
-						   prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
-						   segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-						   segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-						   segmenti_donor_knowni,segmentj_acceptor_knowni,
-						   segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-						   segmenti_donor_nknown,segmentj_acceptor_nknown,
-						   segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-						   max_mismatches_allowed,plusp,genestrand)) >= 0) {
-	  debug13(printf("Found antisense splice_pos %d with probs %f and %f\n",splice_pos,best_prob_i,best_prob_j));
-	  if (best_prob_i + best_prob_j > first_prob_antisense) {
-	    second_dist_antisense = first_dist_antisense;
-	    second_prob_antisense = first_prob_antisense;
-	    secondi_antisense = firsti_antisense;
-	    first_dist_antisense = left - prev_left;
-	    first_prob_antisense = best_prob_i + best_prob_j;
-	    firsti_antisense = antisensei;
-	  } else if (best_prob_i + best_prob_j > second_prob_antisense) {
-	    second_dist_antisense = left - prev_left;
-	    second_prob_antisense = best_prob_i + best_prob_j;
-	    secondi_antisense = antisensei;
-	  }
-
-	  *left_endpoints_antisense = Intlist_push(*left_endpoints_antisense,splice_pos);
-	  *left_querystarts_antisense = Intlist_push(*left_querystarts_antisense,prev_diagonal->querystart);
-	  *left_ambcoords_antisense = Uintlist_push(*left_ambcoords_antisense,prev_left + splice_pos);
-	  *left_amb_knowni_antisense = Intlist_push(*left_amb_knowni_antisense,best_knowni_i);
-	  *left_amb_nmismatchesi_antisense = Intlist_push(*left_amb_nmismatchesi_antisense,best_nmismatches_i);
-	  *left_amb_nmismatchesj_antisense = Intlist_push(*left_amb_nmismatchesj_antisense,best_nmismatches_j);
-	  *left_amb_probsi_antisense = Doublelist_push(*left_amb_probsi_antisense,best_prob_i);
-	  *left_amb_probsj_antisense = Doublelist_push(*left_amb_probsj_antisense,best_prob_j);
-	  antisensei++;
-	}
-      }
-
-      if (Intlist_length(*left_endpoints_sense) > 1) {
-	if (first_dist_sense < second_dist_sense/2) {
-	  debug13(printf("first dist sense %u is significantly shorter than second dist sense %u.  Keeping %d from end\n",
-			 first_dist_sense,second_dist_sense,firsti_sense));
-	  firsti_sense = Intlist_length(*left_endpoints_sense) - 1 - firsti_sense; /* Because we don't reverse lists */
-	  *left_endpoints_sense = Intlist_keep_one(*left_endpoints_sense,firsti_sense);
-	  *left_querystarts_sense = Intlist_keep_one(*left_querystarts_sense,firsti_sense);
-	  *left_ambcoords_sense = Uintlist_keep_one(*left_ambcoords_sense,firsti_sense);
-	  *left_amb_knowni_sense = Intlist_keep_one(*left_amb_knowni_sense,firsti_sense);
-	  *left_amb_nmismatchesi_sense = Intlist_keep_one(*left_amb_nmismatchesi_sense,firsti_sense);
-	  *left_amb_nmismatchesj_sense = Intlist_keep_one(*left_amb_nmismatchesj_sense,firsti_sense);
-	  *left_amb_probsi_sense = Doublelist_keep_one(*left_amb_probsi_sense,firsti_sense);
-	  *left_amb_probsj_sense = Doublelist_keep_one(*left_amb_probsj_sense,firsti_sense);
-	}
-      }
-
-      if (Intlist_length(*left_endpoints_antisense) > 1) {
-	if (first_dist_antisense < second_dist_antisense/2) {
-	  debug13(printf("first dist antisense %u is significantly shorter than second dist antisense %u.  Keeping %d from end\n",
-			 first_dist_antisense,second_dist_antisense,firsti_antisense));
-	  firsti_antisense = Intlist_length(*left_endpoints_antisense) - 1 - firsti_antisense; /* Because we don't reverse lists */
-	  *left_endpoints_antisense = Intlist_keep_one(*left_endpoints_antisense,firsti_antisense);
-	  *left_querystarts_antisense = Intlist_keep_one(*left_querystarts_antisense,firsti_antisense);
-	  *left_ambcoords_antisense = Uintlist_keep_one(*left_ambcoords_antisense,firsti_antisense);
-	  *left_amb_knowni_antisense = Intlist_keep_one(*left_amb_knowni_antisense,firsti_antisense);
-	  *left_amb_nmismatchesi_antisense = Intlist_keep_one(*left_amb_nmismatchesi_antisense,firsti_antisense);
-	  *left_amb_nmismatchesj_antisense = Intlist_keep_one(*left_amb_nmismatchesj_antisense,firsti_antisense);
-	  *left_amb_probsi_antisense = Doublelist_keep_one(*left_amb_probsi_antisense,firsti_antisense);
-	  *left_amb_probsj_antisense = Doublelist_keep_one(*left_amb_probsj_antisense,firsti_antisense);
-	}
-      }
-    }
-  }
-
-
-#ifdef SUBDIVIDE_ENDS
-  sub_diagonals = (List_T) NULL;
-
-  /* Run oligoindex here to left of common_diagonal */
-  if (queryend < MIN_ENDLENGTH) {
-  } else {
-    mappingstart = subtract_bounded(left + 0,/*minusterm*/overall_max_distance,chroffset);
-    mappingend = add_bounded(left + queryend,/*plusterm*/max_insertionlen,chrhigh);
-    chrstart = mappingstart - chroffset;
-    chrend = mappingend - chroffset;
-
-    Oligoindex_hr_tally(oligoindex,mappingstart,mappingend,/*plusp:true*/true,
-			queryptr,/*querystart*/0,queryend,/*chrpos*/chrstart,genestrand);
-    sub_diagonals = Oligoindex_get_mappings(NULL,coveredp,mappings,npositions,&totalpositions,
-					    &oned_matrix_p,&maxnconsecutive,oligoindices_minor,oligoindex,
-					    queryptr,/*querystart*/0,queryend,querylength,
-					    chrstart,chrend,chroffset,chrhigh,/*plusp:true*/true,diagpool);
-    Oligoindex_untally(oligoindex,queryptr,querylength);
-
-    debug14(printf("Got %d sub diagonals\n",List_length(sub_diagonals)));
-#ifdef DEBUG14
-    for (p = sub_diagonals; p != NULL; p = List_next(p)) {
-      sub_diagonal = (Diag_T) List_head(p);
-      /* Need to alter oligoindex diagonal for our needs */
-      printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend + indexsize - 1,chrstart + sub_diagonal->diagonal);
-    }
-#endif
-    /* Need to perform dynamic programming on these diagonals, or select one */
-  }
-#endif	/* SUBDIVIDE_ENDS */
-
-
-  diagonal_path = (List_T) NULL;
-
-  /* C5. Process left diagonals in reverse */
-  while (common_diagonal != NULL) {
-    diagonal_path = List_push(diagonal_path,(void *) common_diagonal);
-    common_diagonal = common_diagonal->prev;
-  }
-  /* Pops off in reverse */
-  for (p = diagonal_path; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
-		   diagonal->querystart,diagonal->queryend,diagonal->univdiagonal - chroffset));
-    middle_path = List_push(middle_path,(void *) diagonal);
-  }
-  List_free(&diagonal_path);
-
-
-  if (left_indel_diagonal != NULL) {
-    debug13(printf("Pushing left indel diagonal onto middle: query %d..%d, diagonal %u\n",
-		   left_indel_diagonal->querystart,left_indel_diagonal->queryend,left_indel_diagonal->univdiagonal - chroffset));
-    middle_path = List_push(middle_path,(void *) left_indel_diagonal);
-  }
-
-
-#ifdef SUBDIVIDE_ENDS
-  /* Without SUBDIVIDE_ENDS, sub_diagonals is guaranteed to be NULL */
-  /* C4. Process oligoindex diagonals from left */
-  if (List_length(sub_diagonals) == 0) {
-    /* Skip */
-  } else if (List_length(sub_diagonals) == 1) {
-    sub_diagonal = List_head(sub_diagonals);
-    diagonal = Univdiag_new_fillin(sub_diagonal->querystart,sub_diagonal->queryend,indexsize,
-				   /*univdiagonal*/chroffset + chrstart + sub_diagonal->diagonal);
-    *fillin_diagonals = List_push(*fillin_diagonals,(void *) diagonal);
-    middle_path = List_push(middle_path,(void *) diagonal);
-  } else {
-#ifdef DEBUG13
-    printf("Have %d sub_diagonals\n",List_length(sub_diagonals));
-    for (p = sub_diagonals; p != NULL; p = List_next(p)) {
-      sub_diagonal = (Diag_T) List_head(p);
-      printf("%d..%d %u\n",sub_diagonal->querystart,sub_diagonal->queryend,chrstart + sub_diagonal->diagonal);
-    }
-#endif
-  }
-#endif
-
-  debug13(printf("***Exiting find_best_path\n"));
-
-#ifdef SUBDIVIDE_ENDS
-#ifdef HAVE_ALLOCA
-  FREEA(npositions);
-  FREEA(coveredp);
-  FREEA(mappings);
-#else
-  FREE(npositions);
-  FREE(coveredp);
-  FREE(mappings);
-#endif
-#endif
-
-
-#ifdef HAVE_ALLOCA
-  if (querylength <= MAX_STACK_READLENGTH) {
-    FREEA(segmenti_donor_knownpos);
-    FREEA(segmentj_acceptor_knownpos);
-    FREEA(segmentj_antidonor_knownpos);
-    FREEA(segmenti_antiacceptor_knownpos);
-    FREEA(segmenti_donor_knowni);
-    FREEA(segmentj_acceptor_knowni);
-    FREEA(segmentj_antidonor_knowni);
-    FREEA(segmenti_antiacceptor_knowni);
-  } else {
-    FREE(segmenti_donor_knownpos);
-    FREE(segmentj_acceptor_knownpos);
-    FREE(segmentj_antidonor_knownpos);
-    FREE(segmenti_antiacceptor_knownpos);
-    FREE(segmenti_donor_knowni);
-    FREE(segmentj_acceptor_knowni);
-    FREE(segmentj_antidonor_knowni);
-    FREE(segmenti_antiacceptor_knowni);
-  }
-#else
-  FREE(segmenti_donor_knownpos);
-  FREE(segmentj_acceptor_knownpos);
-  FREE(segmentj_antidonor_knownpos);
-  FREE(segmenti_antiacceptor_knownpos);
-  FREE(segmenti_donor_knowni);
-  FREE(segmentj_acceptor_knowni);
-  FREE(segmentj_antidonor_knowni);
-  FREE(segmenti_antiacceptor_knowni);
-#endif
-
-  return middle_path;
-}
-
-
-
-/* Note: This GMAP from sarray suffers from relying on middle_path and
-end paths to get stage2.  Would be better to run oligoindex_hr to get
-a better stage2, or to run GMAP from GSNAP or pairsearch */
-
-#if 0
-static List_T
-run_gmap_plus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_paths,
-	       Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
-	       Chrpos_T chrlength, char *queryuc_ptr, int querylength,
-	       int genestrand, bool first_read_p,
-	       int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
-	       Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
-  Stage3end_T hit;
-  List_T stage2pairs, all_stage2_starts, all_stage2_ends;
-  List_T p, q;
-
-  int sensedir;
-
-  struct Pair_T *pairarray;
-  List_T pairs;
-  List_T diagonal_path;
-  Univdiag_T diagonal, prev_diagonal;
-  int querypos;
-  Chrpos_T genomepos;
-  int c;
-
-  int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
-    max_match_length, ambig_end_length_5, ambig_end_length_3,
-    unknowns, mismatches, qopens, qindels, topens, tindels,
-    ncanonical, nsemicanonical, nnoncanonical;
-  double ambig_prob_5, ambig_prob_3, min_splice_prob;
-  Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
-  Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
-  Univcoord_T start, end;
-  int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
-
-
-  /* D.  Make all_stage2_starts (paths) */
-  all_stage2_starts = (List_T) NULL;
-  diagonal = (Univdiag_T) List_head(middle_path);
-  for (q = start_paths; q != NULL; q = List_next(q)) {
-    q->first = diagonal_path = List_reverse((List_T) List_head(q));
-    prev_diagonal = (Univdiag_T) List_head(diagonal_path);
-    if (diagonal->univdiagonal > prev_diagonal->univdiagonal) {
-      debug13(printf("START, PLUS\n"));
-      stage2pairs = (List_T) NULL;
-      for (p = diagonal_path; p != NULL; p = List_next(p)) {
-	diagonal = (Univdiag_T) List_head(p);
-	debug13(printf("Diagonal %d..%d at %u [%u]\n",
-		       diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset));
-	querypos = diagonal->querystart;
-	genomepos = diagonal->univdiagonal + diagonal->querystart - chroffset;
-	while (querypos <= diagonal->queryend) {
-	  c = queryuc_ptr[querypos];
-	  stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
-				      /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
-				      /*dynprogindex*/0);
-	  debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
-	  querypos++;
-	  genomepos++;
-	}
-	debug13(printf("\n"));
-      }
-      all_stage2_starts = List_push(all_stage2_starts,(void *) stage2pairs);
-    }
-  }
-
-
-  /* E.  Make all_stage2_ends (pairs) */
-  all_stage2_ends = (List_T) NULL;
-  prev_diagonal = (Univdiag_T) List_last_value(middle_path);
-  for (q = end_paths; q != NULL; q = List_next(q)) {
-    diagonal_path = (List_T) List_head(q);
-    diagonal = (Univdiag_T) List_head(diagonal_path);
-    if (diagonal->univdiagonal > prev_diagonal->univdiagonal) {
-      debug13(printf("END, PLUS\n"));
-      stage2pairs = (List_T) NULL;
-      for (p = diagonal_path; p != NULL; p = List_next(p)) {
-	diagonal = (Univdiag_T) List_head(p);
-	debug13(printf("Diagonal %d..%d at %u [%u]\n",
-		       diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset));
-	querypos = diagonal->querystart;
-	genomepos = diagonal->univdiagonal + diagonal->querystart - chroffset;
-	while (querypos <= diagonal->queryend) {
-	  c = queryuc_ptr[querypos];
-	  stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
-				      /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
-				      /*dynprogindex*/0);
-	  debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
-	  querypos++;
-	  genomepos++;
-	}
-	debug13(printf("\n"));
-      }
-      all_stage2_ends = List_push(all_stage2_ends,(void *) List_reverse(stage2pairs));
-    }
-  }
-
-
-#ifdef DEBUG13
-  printf("MIDDLE DIAGONALS, PLUS\n");
-  for (p = middle_path; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    printf("Diagonal %d..%d at %u [%u]\n",
-	   diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset);
-  }
-#endif
-
-  /* F.  Make stage2pairs */
-  stage2pairs = (List_T) NULL;
-  for (p = middle_path; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    querypos = diagonal->querystart;
-    genomepos = diagonal->univdiagonal + diagonal->querystart - chroffset;
-    while (querypos <= diagonal->queryend) {
-      c = queryuc_ptr[querypos];
-      stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
-				  /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
-				  /*dynprogindex*/0);
-      debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
-      querypos++;
-      genomepos++;
-    }
-    debug13(printf("\n"));
-  }
-
-
-  knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + chroffset;
-  stage2pairs = List_reverse(stage2pairs);
-  knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + chroffset;
-
-  if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
-				  &matches,&nmatches_posttrim,&max_match_length,
-				  &ambig_end_length_5,&ambig_end_length_3,
-				  &ambig_splicetype_5,&ambig_splicetype_3,
-				  &ambig_prob_5,&ambig_prob_3,
-				  &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
-				  &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
-				  stage2pairs,all_stage2_starts,all_stage2_ends,
-#ifdef END_KNOWNSPLICING_SHORTCUT
-				  cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
-				  watsonp ? query_compress_fwd : query_compress_rev,
-#endif
-				  /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
-#ifdef EXTRACT_GENOMICSEG
-				  /*query_subseq_offset*/0,
-#else
-				  /*query_subseq_offset*/0,
-#endif
-				  chrnum,chroffset,chrhigh,
-				  knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
-				  /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
-				  /*sense_try*/0,/*sense_filter*/0,
-				  oligoindices_minor,diagpool,cellpool)) == NULL) {
-
-  } else {
-    nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
-				     pairarray,npairs);
-    start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
-			     /*minusterm*/Pair_querypos(&(pairarray[0])),chroffset);
-    end = add_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
-		      /*plusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chrhigh);
-    if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
-				  ambig_end_length_5,ambig_end_length_3,
-				  ambig_splicetype_5,ambig_splicetype_3,
-				  min_splice_prob,
-				  pairarray,npairs,nsegments,nintrons,nindelbreaks,
-				  /*left*/start,/*genomiclength*/end - start + 1,
-				  /*plusp*/true,genestrand,first_read_p,
-				  /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
-				  cdna_direction,sensedir,/*sarrayp*/true)) == NULL) {
-      FREE_OUT(pairarray);
-    } else {
-      gmap = List_push(gmap,(void *) hit);
-    }
-  }
-
-  List_free(&all_stage2_ends);
-  List_free(&all_stage2_starts);
-
-  return gmap;
-}
-#endif
-
-
-#if 0
-static List_T
-run_gmap_minus (List_T gmap, List_T middle_path, List_T start_paths, List_T end_paths,
-		Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
-		Chrpos_T chrlength, char *queryuc_ptr, int querylength,
-		int genestrand, bool first_read_p,
-		int maxpeelback, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
-		Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
-  Stage3end_T hit;
-  List_T stage2pairs, all_stage2_starts, all_stage2_ends;
-
-  List_T p, q;
-
-  int sensedir;
-
-  struct Pair_T *pairarray;
-  List_T pairs;
-  List_T diagonal_path;
-  Univdiag_T diagonal, prev_diagonal;
-  int querypos;
-  Chrpos_T genomepos;
-  int c;
-
-  int npairs, goodness, cdna_direction, matches, nmatches_posttrim,
-    max_match_length, ambig_end_length_5, ambig_end_length_3,
-    unknowns, mismatches, qopens, qindels, topens, tindels,
-    ncanonical, nsemicanonical, nnoncanonical;
-  double ambig_prob_5, ambig_prob_3, min_splice_prob;
-  Splicetype_T ambig_splicetype_5, ambig_splicetype_3;
-  Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
-  Univcoord_T start, end;
-  int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
-
-
-  /* D.  Make all_stage2_starts (paths) */
-  all_stage2_starts = (List_T) NULL;
-  diagonal = (Univdiag_T) List_head(middle_path);
-  for (q = start_paths; q != NULL; q = List_next(q)) {
-    q->first = diagonal_path = List_reverse((List_T) List_head(q));
-    prev_diagonal = (Univdiag_T) List_head(diagonal_path);
-    if (diagonal->univdiagonal < prev_diagonal->univdiagonal) {
-      debug13(printf("START, MINUS\n"));
-      stage2pairs = (List_T) NULL;
-      for (p = diagonal_path; p != NULL; p = List_next(p)) {
-	diagonal = (Univdiag_T) List_head(p);
-	debug13(printf("Diagonal %d..%d at %u [%u]\n",
-		       diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset));
-	querypos = querylength - 1 - diagonal->queryend;
-	genomepos = chrhigh - (diagonal->univdiagonal + diagonal->queryend);
-	while (querypos <= querylength - 1 - diagonal->querystart) {
-	  c = queryuc_ptr[querypos];
-	  stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
-				      /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
-				      /*dynprogindex*/0);
-	  debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
-	  querypos++;
-	  genomepos++;
-	}
-	debug13(printf("\n"));
-      }
-      all_stage2_starts = List_push(all_stage2_starts,(void *) stage2pairs);
-    }
-  }
-
-
-  /* E.  Make all_stage2_ends (pairs) */
-  all_stage2_ends = (List_T) NULL;
-  prev_diagonal = (Univdiag_T) List_last_value(middle_path);
-  for (q = end_paths; q != NULL; q = List_next(q)) {
-    diagonal_path = (List_T) List_head(q);
-    diagonal = (Univdiag_T) List_head(diagonal_path);
-    if (diagonal->univdiagonal < prev_diagonal->univdiagonal) {
-      debug13(printf("END, MINUS\n"));
-      stage2pairs = (List_T) NULL;
-      for (p = diagonal_path; p != NULL; p = List_next(p)) {
-	diagonal = (Univdiag_T) List_head(p);
-	debug13(printf("Diagonal %d..%d at %u [%u]\n",
-		       diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset));
-	querypos = querylength - 1 - diagonal->queryend;
-	genomepos = chrhigh - (diagonal->univdiagonal + diagonal->queryend);
-	while (querypos <= querylength - 1 - diagonal->querystart) {
-	  c = queryuc_ptr[querypos];
-	  stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
-				      /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
-				      /*dynprogindex*/0);
-	  debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
-	  querypos++;
-	  genomepos++;
-	}
-	debug13(printf("\n"));
-      }
-      all_stage2_ends = List_push(all_stage2_ends,(void *) List_reverse(stage2pairs));
-    }
-  }
-
-
-#ifdef DEBUG13
-  printf("MIDDLE DIAGONALS, MINUS\n");
-  for (p = middle_path; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    printf("Diagonal %d..%d at %u [%u]\n",
-	   diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset);
-  }
-#endif
-
-  /* F.  Make stage2pairs */
-  stage2pairs = (List_T) NULL;
-  middle_path = List_reverse(middle_path); /* For minus */
-  for (p = middle_path; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    querypos = querylength - 1 - diagonal->queryend;
-    assert(chrhigh > diagonal->univdiagonal + diagonal->queryend);
-    genomepos = chrhigh - (diagonal->univdiagonal + diagonal->queryend);
-    while (querypos <= querylength - 1 - diagonal->querystart) {
-      c = queryuc_ptr[querypos];
-      stage2pairs = Pairpool_push(stage2pairs,pairpool,querypos,genomepos,
-				  /*cdna*/c,MATCH_COMP,/*genome*/c,/*genomealt*/c,
-				  /*dynprogindex*/0);
-      debug13(printf("Pushing %c | %c at %d,%d\n",queryuc_ptr[querypos],queryuc_ptr[querypos],querypos,genomepos));
-      querypos++;
-      genomepos++;
-    }
-    debug13(printf("\n"));
-  }
-
-
-  knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + chroffset;
-  stage2pairs = List_reverse(stage2pairs);
-  knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + chroffset;
-
-
-  if ((pairarray = Stage3_compute(&pairs,&npairs,&goodness,&cdna_direction,&sensedir,
-				  &matches,&nmatches_posttrim,&max_match_length,
-				  &ambig_end_length_5,&ambig_end_length_3,
-				  &ambig_splicetype_5,&ambig_splicetype_3,
-				  &ambig_prob_5,&ambig_prob_3,
-				  &unknowns,&mismatches,&qopens,&qindels,&topens,&tindels,
-				  &ncanonical,&nsemicanonical,&nnoncanonical,&min_splice_prob,
-				  stage2pairs,all_stage2_starts,all_stage2_ends,
-#ifdef END_KNOWNSPLICING_SHORTCUT
-				  cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
-				  watsonp ? query_compress_fwd : query_compress_rev,
-#endif
-				  /*queryseq_ptr*/queryuc_ptr,queryuc_ptr,querylength,/*skiplength*/0,
-#ifdef EXTRACT_GENOMICSEG
-				  /*query_subseq_offset*/0,
-#else
-				  /*query_subseq_offset*/0,
-#endif
-				  chrnum,chroffset,chrhigh,
-				  knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
-				  /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
-				  /*sense_try*/0,/*sense_filter*/0,
-				  oligoindices_minor,diagpool,cellpool)) == NULL) {
-
-  } else {
-    nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
-				     pairarray,npairs);
-    start = add_bounded(chroffset + Pair_genomepos(&(pairarray[0])),
-			/*plusterm*/Pair_querypos(&(pairarray[0])),chrhigh);
-    end = subtract_bounded(chroffset + Pair_genomepos(&(pairarray[npairs-1])),
-			   /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray[npairs-1])),chroffset);
-    if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim,max_match_length,
-				  ambig_end_length_5,ambig_end_length_3,
-				  ambig_splicetype_5,ambig_splicetype_3,
-				  min_splice_prob,
-				  pairarray,npairs,nsegments,nintrons,nindelbreaks,
-				  /*left*/end,/*genomiclength*/start - end + 1,
-				  /*plusp*/false,genestrand,first_read_p,
-				  /*accession*/NULL,querylength,chrnum,chroffset,chrhigh,chrlength,
-				  cdna_direction,sensedir,/*sarrayp*/true)) == NULL) {
-      FREE_OUT(pairarray);
-    } else {
-      gmap = List_push(gmap,(void *) hit);
-    }
-  }
-
-  List_free(&all_stage2_ends);
-  List_free(&all_stage2_starts);
-
-  return gmap;
-}
-#endif
-
-
-#if 0
-static bool
-find_sense (int *sensedir, List_T sense_junctions, List_T antisense_junctions,
-	    Intlist_T sense_endpoints, Intlist_T antisense_endpoints) {
-  bool sense_acceptable_p = true, antisense_acceptable_p = true;
-  double sense_prob = 0.0, antisense_prob = 0.0;
-  Junction_T sense_junction, antisense_junction;
-  List_T p;
-  Intlist_T a;
-  int last_endpoint;
-
-  last_endpoint = -1;
-  for (a = sense_endpoints; a != NULL; a = Intlist_next(a)) {
-    if (Intlist_head(a) <= last_endpoint) {
-      sense_acceptable_p = false;
-    }
-    last_endpoint = Intlist_head(a);
-  }
-
-  last_endpoint = -1;
-  for (a = antisense_endpoints; a != NULL; a = Intlist_next(a)) {
-    if (Intlist_head(a) <= last_endpoint) {
-      antisense_acceptable_p = false;
-    }
-    last_endpoint = Intlist_head(a);
-  }
-
-  for (p = sense_junctions; p != NULL; p = List_next(p)) {
-    sense_junction = (Junction_T) List_head(p);
-    if (sense_junction == NULL) {
-      sense_acceptable_p = false;
-    } else if (Junction_type(sense_junction) == AMB_JUNCTION) {
-      /* Ignore */
-    } else {
-      sense_prob += Junction_prob(sense_junction);
-    }
-  }
-
-  for (p = antisense_junctions; p != NULL; p = List_next(p)) {
-    antisense_junction = (Junction_T) List_head(p);
-    if (antisense_junction == NULL) {
-      antisense_acceptable_p = false;
-    } else if (Junction_type(antisense_junction) == AMB_JUNCTION) {
-      /* Ignore */
-    } else {
-      antisense_prob += Junction_prob(antisense_junction);
-    }
-  }
-
-  if (sense_acceptable_p == false && antisense_acceptable_p == false) {
-    return false;
-  } else if (sense_acceptable_p == false) {
-    *sensedir = SENSE_ANTI;
-    return true;
-  } else if (antisense_acceptable_p == false) {
-    *sensedir = SENSE_FORWARD;
-    return true;
-  } else if (sense_prob > antisense_prob) {
-    *sensedir = SENSE_FORWARD;
-    return true;
-  } else if (antisense_prob > sense_prob) {
-    *sensedir = SENSE_ANTI;
-    return true;
-  } else {
-    *sensedir = SENSE_NULL;
-    return true;
-  }
-}
-#endif
-
-
-static bool
-endpoints_acceptable_p (bool *intronp, List_T junctions, Intlist_T endpoints) {
-  bool acceptable_p = true;
-  Junction_T junction;
-  List_T p;
-  Intlist_T a;
-  int last_endpoint;
-
-  last_endpoint = -1;
-  for (a = endpoints; a != NULL; a = Intlist_next(a)) {
-    if (Intlist_head(a) <= last_endpoint) {
-      acceptable_p = false;
-    }
-    last_endpoint = Intlist_head(a);
-  }
-
-  *intronp = false;
-  for (p = junctions; p != NULL; p = List_next(p)) {
-    junction = (Junction_T) List_head(p);
-    if (junction == NULL) {
-      acceptable_p = false;
-    } else if (Junction_type(junction) == SPLICE_JUNCTION) {
-      *intronp = true;
-    }
-  }
-
-  return acceptable_p;
-}
-
-
-
-#if 0
-static bool
-incomplete_result_p (List_T middle_path, int querylength) {
-  Univdiag_T diagonal;
-  int querystart, queryend;
-
-  diagonal = (Univdiag_T) List_head(middle_path);
-  querystart = diagonal->querystart;
-
-  diagonal = (Univdiag_T) List_last_value(middle_path);
-  queryend = diagonal->queryend;
-
-  if (querystart > 8 || queryend < querylength - 8) {
-    return true;
-  } else {
-    return false;
-  }
-}
-#endif
-
-
-/* Always solves against plus strand of genome.  Just provide either
-   queryuc/query_compress_fwd (coords measured from beginning of
-   sequence) or queryrc/query_compress_rev (coords measured from end
-   of sequence).  All coordinates measured from low end.
-   Sense/antisense is with respect to the plus strand.  But to
-   interface with Stage3end_new_substring command, need to flip
-   coordinates for case where queryrc aligns to plus strand. */
-
-static List_T
-solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middle_path,
-
-		    Intlist_T right_endpoints_sense, Intlist_T right_endpoints_antisense,
-		    Intlist_T right_queryends_sense, Intlist_T right_queryends_antisense,
-		    Uintlist_T right_ambcoords_sense, Uintlist_T right_ambcoords_antisense,
-		    Intlist_T right_amb_knowni_sense, Intlist_T right_amb_knowni_antisense,
-		    Intlist_T right_amb_nmismatchesi_sense, Intlist_T right_amb_nmismatchesi_antisense,
-		    Intlist_T right_amb_nmismatchesj_sense, Intlist_T right_amb_nmismatchesj_antisense,
-		    Doublelist_T right_amb_probsi_sense, Doublelist_T right_amb_probsi_antisense,
-		    Doublelist_T right_amb_probsj_sense, Doublelist_T right_amb_probsj_antisense,
-
-		    Intlist_T left_endpoints_sense, Intlist_T left_endpoints_antisense,
-		    Intlist_T left_querystarts_sense, Intlist_T left_querystarts_antisense,
-		    Uintlist_T left_ambcoords_sense, Uintlist_T left_ambcoords_antisense,
-		    Intlist_T left_amb_knowni_sense, Intlist_T left_amb_knowni_antisense,
-		    Intlist_T left_amb_nmismatchesi_sense, Intlist_T left_amb_nmismatchesi_antisense,
-		    Intlist_T left_amb_nmismatchesj_sense, Intlist_T left_amb_nmismatchesj_antisense,
-		    Doublelist_T left_amb_probsi_sense, Doublelist_T left_amb_probsi_antisense,
-		    Doublelist_T left_amb_probsj_sense, Doublelist_T left_amb_probsj_antisense,
-
-		    Chrnum_T chrnum, Univcoord_T chroffset, Univcoord_T chrhigh,
-		    Chrpos_T chrlength, int querylength, Compress_T query_compress,
-		    bool plusp, int genestrand, int max_mismatches_allowed) {
-  List_T super_path;
-  Stage3end_T hit;
-  int sensedir, sense_sensedir, antisense_sensedir;
-
-  List_T p;
-  Univdiag_T diagonal, prev_diagonal, new_diagonal;
-  Chrpos_T splice_distance;
-  int querystart_for_merge, querystart, queryend;
-  int max_leftward;
-  int nmismatches, prev_nmismatches;
-  bool fillin_p;
-
-  int indel_pos;
-  int nindels;
-  Univcoord_T deletionpos;
-
-  int splice_pos;
-  double donor_prob, acceptor_prob;
-
-  bool sense_acceptable_p, antisense_acceptable_p, sense_intronp, antisense_intronp;
-  Univcoord_T left, prev_left;
-  Uintlist_T sense_lefts = NULL, antisense_lefts = NULL;
-  Intlist_T sense_nmismatches = NULL, antisense_nmismatches = NULL;
-  Intlist_T sense_endpoints = NULL, antisense_endpoints = NULL;
-  List_T sense_junctions = NULL, antisense_junctions = NULL;
-
-  int best_knowni_i, best_knowni_j, best_nmismatches_i, best_nmismatches_j;
-  double best_prob_i, best_prob_j;
-
-  Substring_T right_ambig_sense, right_ambig_antisense,
-    left_ambig_sense, left_ambig_antisense;
-  int segmenti_donor_nknown, segmentj_acceptor_nknown,
-    segmentj_antidonor_nknown, segmenti_antiacceptor_nknown;
-  int *segmenti_donor_knownpos, *segmentj_acceptor_knownpos, *segmentj_antidonor_knownpos, *segmenti_antiacceptor_knownpos,
-    *segmenti_donor_knowni, *segmentj_acceptor_knowni, *segmentj_antidonor_knowni, *segmenti_antiacceptor_knowni;
-  int j;
-
-#ifdef HAVE_ALLOCA
-  if (querylength <= MAX_STACK_READLENGTH) {
-    segmenti_donor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmentj_acceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmentj_antidonor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knownpos = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmenti_donor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmentj_acceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmentj_antidonor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knowni = (int *) ALLOCA((querylength+1)*sizeof(int));
-  } else {
-    segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-    segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-  }
-#else
-  segmenti_donor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmentj_acceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmentj_antidonor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmenti_antiacceptor_knownpos = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmenti_donor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmentj_acceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmentj_antidonor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-  segmenti_antiacceptor_knowni = (int *) MALLOC((querylength+1)*sizeof(int));
-#endif
-
-
-#ifdef DEBUG13
-  printf("\n");
-  printf("Original diagonals:\n");
-  for (p = middle_path; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    printf("%d..%d at %u [%u]\n",diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,diagonal->univdiagonal - chroffset);
-  }
-  printf("\n");
-#endif
-
-  /*  Step 1:  Handle mismatches */
-  *completep = false;
-  super_path = (List_T) NULL;
-
-  p = middle_path;
-  prev_diagonal = (Univdiag_T) List_head(p);
-  querystart_for_merge = prev_diagonal->querystart;
-  prev_left = prev_diagonal->univdiagonal;
-  nmismatches = 0;
-  fillin_p = false;
-
-  for (p = List_next(p); p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    if ((left = diagonal->univdiagonal) == prev_left) {
-      /* Mismatch */
-      nmismatches += (diagonal->querystart - prev_diagonal->queryend - 1); /* This could be an overestimate */
-      debug13(printf("We have mismatch or mismatches between %d..%d and %d..%d.  Incrementing mismatches by %d => %d\n",
-		     prev_diagonal->querystart,prev_diagonal->queryend,diagonal->querystart,diagonal->queryend,
-		     (diagonal->querystart - prev_diagonal->queryend - 1),nmismatches));
-      if (diagonal->nmismatches_known_p == false) {
-	fillin_p = true;
-      }
-	     
-    } else {
-      /* Indel or splice */
-
-      /* Handle previous segment (for prev_left) */
-      new_diagonal = Univdiag_new(querystart_for_merge,prev_diagonal->queryend,prev_diagonal->univdiagonal);
-      if (fillin_p == true || prev_diagonal->nmismatches_known_p == false) {
-	new_diagonal->intscore = 100; /* Positive score allows for many mismatches in indel/splice routines */
-      } else {
-	new_diagonal->intscore = nmismatches;
-      }
-      super_path = List_push(super_path,(void *) new_diagonal);
-
-      prev_left = left;
-      querystart_for_merge = diagonal->querystart;
-      nmismatches = 0;
-      fillin_p = false;
-    }
-
-    prev_diagonal = diagonal;
-  }
-
-  new_diagonal = Univdiag_new(querystart_for_merge,prev_diagonal->queryend,prev_diagonal->univdiagonal);
-  if (fillin_p == true || prev_diagonal->nmismatches_known_p == false) {
-    new_diagonal->intscore = 100; /* Positive score allows for many mismatches in indel/splice routines */
-  } else {
-    new_diagonal->intscore = nmismatches;
-  }
-  super_path = List_push(super_path,(void *) new_diagonal);
-
-  super_path = List_reverse(super_path);
-
-#ifdef DEBUG13
-  printf("\n");
-  printf("Super diagonals on chrnum %d:\n",chrnum);
-  for (p = super_path; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    printf("%d..%d at %u [%u] with %d mismatches\n",
-	   diagonal->querystart,diagonal->queryend,diagonal->univdiagonal,
-	   diagonal->univdiagonal - chroffset,diagonal->intscore);
-  }
-  printf("\n");
-#endif
-
-
-  /*  Step 2:  Handle indels and splices */
-
-  p = super_path;
-  prev_diagonal = (Univdiag_T) List_head(p);
-  prev_left = prev_diagonal->univdiagonal;
-
-  debug13(printf("Diagonal %d..%d for left %u [%u]\n",
-		 prev_diagonal->querystart,prev_diagonal->queryend,prev_left,prev_left - chroffset));
-
-  sense_endpoints = Intlist_push(NULL,prev_diagonal->querystart);
-  antisense_endpoints = Intlist_push(NULL,prev_diagonal->querystart);
-
-  /* Previously pushed prev_diagonal->intscore, but that is not
-     correct (unless there are no mismatches).  Pushing -1 indicates
-     that we need to compute the value */
-  if (prev_diagonal->intscore == 0) {
-    sense_nmismatches = Intlist_push(NULL,0);
-    antisense_nmismatches = Intlist_push(NULL,0);
-  } else {
-    sense_nmismatches = Intlist_push(NULL,-1);
-    antisense_nmismatches = Intlist_push(NULL,-1);
-  }
-
-  for (p = List_next(p); p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    left = diagonal->univdiagonal;
-    assert(left != prev_left); /* Because we already handled mismatches above */
-
-    debug13(printf("Diagonal %d..%d at leftpos %u [%u], diff %d\n",
-		   diagonal->querystart,diagonal->queryend,left,left - chroffset,left - prev_left));
-
-    if (left < prev_left) {
-      /* Insertion */
-      nindels = prev_left - left;
-#if 0
-      max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
-      debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
-      if (prev_diagonal->intscore > 0) {
-	max_mismatches_allowed += 1;
-      }
-      if (diagonal->intscore > 0) {
-	max_mismatches_allowed += 1;
-      }
-#endif
-      if ((indel_pos = Indel_resolve_middle_insertion(&best_nmismatches_i,&best_nmismatches_j,
-						      /*left*/prev_left,/*indels*/+nindels,query_compress,
-						      prev_diagonal->querystart,diagonal->queryend,querylength,
-						      max_mismatches_allowed,/*plusp:true*/true,genestrand)) < 0) {
-	sense_junctions = List_push(sense_junctions,NULL);
-	antisense_junctions = List_push(antisense_junctions,NULL);
-      } else {
-	sense_junctions = List_push(sense_junctions,Junction_new_insertion(nindels));
-	antisense_junctions = List_push(antisense_junctions,Junction_new_insertion(nindels));
-	diagonal->querystart += nindels; /* Needed for subsequent indel computation */
-      }
-
-      if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
-	/* Still need to compute */
-	sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
-      } else if (Intlist_head(sense_endpoints) != prev_diagonal->querystart) {
-	/* Endpoints not quite the same, so need to recompute */
-	Intlist_head_set(sense_nmismatches,-1);
-	sense_nmismatches = Intlist_push(sense_nmismatches,-1);
-      } else {
-	Intlist_head_set(sense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
-	sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
-      }
-
-      if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
-	/* Still need to compute */
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
-      } else if (Intlist_head(antisense_endpoints) != prev_diagonal->querystart) {
-	/* Endpoints not quite the same, so need to recompute */
-	Intlist_head_set(antisense_nmismatches,-1);
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
-      } else {
-	Intlist_head_set(antisense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
-      }
-
-      sense_lefts = Uintlist_push(sense_lefts,prev_left);
-      antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
-
-      sense_endpoints = Intlist_push(sense_endpoints,indel_pos);
-      antisense_endpoints = Intlist_push(antisense_endpoints,indel_pos);
-      debug13(printf("insertion pos in range %d..%d is %d with nmismatches %d+%d\n",
-		     prev_diagonal->querystart,diagonal->queryend,indel_pos,best_nmismatches_i,best_nmismatches_j));
-      
-    } else if (left <= prev_left + max_deletionlen) {
-      /* Deletion */
-      nindels = left - prev_left;
-#if 0
-      max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
-      debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
-      if (prev_diagonal->intscore > 0) {
-	max_mismatches_allowed += 1;
-      }
-      if (diagonal->intscore > 0) {
-	max_mismatches_allowed += 1;
-      }
-#endif
-      if ((indel_pos = Indel_resolve_middle_deletion(&best_nmismatches_i,&best_nmismatches_j,
-						     /*left*/prev_left,/*indels*/-nindels,query_compress,
-						     prev_diagonal->querystart,diagonal->queryend,querylength,
-						     max_mismatches_allowed,/*plusp:true*/true,genestrand)) < 0) {
-	sense_junctions = List_push(sense_junctions,NULL);
-	antisense_junctions = List_push(antisense_junctions,NULL);
-      } else {
-	deletionpos = prev_left + indel_pos;
-	sense_junctions = List_push(sense_junctions,Junction_new_deletion(nindels,deletionpos));
-	antisense_junctions = List_push(antisense_junctions,Junction_new_deletion(nindels,deletionpos));
-      }
-
-      if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
-	/* Still need to compute */
-	sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
-      } else if (Intlist_head(sense_endpoints) != prev_diagonal->querystart) {
-	/* Endpoints not quite the same, so need to recompute */
-	Intlist_head_set(sense_nmismatches,-1);
-	sense_nmismatches = Intlist_push(sense_nmismatches,-1);
-      } else {
-	Intlist_head_set(sense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
-	sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
-      }
-
-      if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
-	/* Still need to compute */
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
-      } else if (Intlist_head(antisense_endpoints) != prev_diagonal->querystart) {
-	/* Endpoints not quite the same, so need to recompute */
-	Intlist_head_set(antisense_nmismatches,-1);
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
-      } else {
-	Intlist_head_set(antisense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
-      }
-
-      sense_lefts = Uintlist_push(sense_lefts,prev_left);
-      antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
-
-      sense_endpoints = Intlist_push(sense_endpoints,indel_pos);
-      antisense_endpoints = Intlist_push(antisense_endpoints,indel_pos);
-      debug13(printf("deletion pos in range %d..%d is %d with nmismatches %d+%d\n",
-		     prev_diagonal->querystart,diagonal->queryend,indel_pos,best_nmismatches_i,best_nmismatches_j));
-      
-    } else {
-      /* Splice */
-      segmenti_donor_nknown = segmenti_antiacceptor_nknown = 0;
-      if (nsplicesites > 0 &&
-	  Splicetrie_splicesite_p(prev_left,/*pos5*/1,/*pos3*/querylength) == true) {
-	j = binary_search(0,nsplicesites,splicesites,prev_left);
-	while (j < nsplicesites && splicesites[j] < prev_left + querylength) {
-	  if (splicetypes[j] == DONOR) {
-	    debug4s(printf("Setting known donor %d for segmenti at %u\n",j,splicesites[j]));
-	    segmenti_donor_knownpos[segmenti_donor_nknown] = splicesites[j] - prev_left;
-	    segmenti_donor_knowni[segmenti_donor_nknown++] = j;
-	  } else if (splicetypes[j] == ANTIACCEPTOR) {
-	    debug4s(printf("Setting known antiacceptor %d for segmenti at %u\n",j,splicesites[j]));
-	    segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = splicesites[j] - prev_left;
-	    segmenti_antiacceptor_knowni[segmenti_antiacceptor_nknown++] = j;
-	  }
-	  j++;
-	}
-      }
-      segmenti_donor_knownpos[segmenti_donor_nknown] = querylength + 100;
-      segmenti_antiacceptor_knownpos[segmenti_antiacceptor_nknown] = querylength + 100;
-	  
-      segmentj_acceptor_nknown = segmentj_antidonor_nknown = 0;
-      if (nsplicesites > 0 &&
-	  Splicetrie_splicesite_p(left,/*pos5*/1,/*pos3*/querylength) == true) {
-	j = binary_search(0,nsplicesites,splicesites,left);
-	while (j < nsplicesites && splicesites[j] < left + querylength) {
-	  if (splicetypes[j] == ACCEPTOR) {
-	    debug4s(printf("Setting known acceptor %d for segmentj at %u\n",j,splicesites[j]));
-	    segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = splicesites[j] - left;
-	    segmentj_acceptor_knowni[segmentj_acceptor_nknown++] = j;
-	  } else if (splicetypes[j] == ANTIDONOR) {
-	    debug4s(printf("Setting known antidonor %d for segmentj at %u\n",j,splicesites[j]));
-	    segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = splicesites[j] - left;
-	    segmentj_antidonor_knowni[segmentj_antidonor_nknown++] = j;
-	  }
-	  j++;
-	}
-      }
-      segmentj_acceptor_knownpos[segmentj_acceptor_nknown] = querylength + 100;
-      segmentj_antidonor_knownpos[segmentj_antidonor_nknown] = querylength + 100;
-      
-      splice_distance = left - prev_left;
-#if 0
-      max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
-      debug13(printf("max_mismatches %d = %d - %d - 1\n",max_mismatches_allowed,diagonal->querystart,prev_diagonal->queryend));
-      if (prev_diagonal->intscore > 0) {
-	max_mismatches_allowed += 1;
-      }
-      if (diagonal->intscore > 0) {
-	max_mismatches_allowed += 1;
-      }
-#endif
-
-      if ((splice_pos = Splice_resolve_sense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
-					     &best_prob_i,&best_prob_j,
-					     /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
-					     prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
-					     segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-					     segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-					     segmenti_donor_knowni,segmentj_acceptor_knowni,
-					     segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-					     segmenti_donor_nknown,segmentj_acceptor_nknown,
-					     segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-					     max_mismatches_allowed,plusp,genestrand)) < 0) {
-	sense_endpoints = Intlist_push(sense_endpoints,-1); /* Mark as invalid */
-	sense_junctions = List_push(sense_junctions,NULL);
-      } else if (plusp == true) {
-	sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
-	sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
-									/*donor_prob*/best_prob_i,/*acceptor_prob*/best_prob_j));
-      } else {
-	sense_endpoints = Intlist_push(sense_endpoints,splice_pos);
-	sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
-									/*donor_prob*/best_prob_j,/*acceptor_prob*/best_prob_i));
-      }
-      debug13(printf("sense splice_pos in range %d..%d is %d with mismatches %d+%d\n",
-		     prev_diagonal->querystart,diagonal->queryend,splice_pos,best_nmismatches_i,best_nmismatches_j));
-      if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
-	/* Still need to compute */
-	sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
-      } else if (Intlist_head(sense_endpoints) != prev_diagonal->querystart) {
-	/* Endpoints not quite the same, so need to recompute */
-	Intlist_head_set(sense_nmismatches,-1);
-	sense_nmismatches = Intlist_push(sense_nmismatches,-1);
-      } else {
-	Intlist_head_set(sense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
-	sense_nmismatches = Intlist_push(sense_nmismatches,best_nmismatches_j);
-      }
-      sense_lefts = Uintlist_push(sense_lefts,prev_left);
-
-      if ((splice_pos = Splice_resolve_antisense(&best_knowni_i,&best_knowni_j,&best_nmismatches_i,&best_nmismatches_j,
-						 &best_prob_i,&best_prob_j,
-						 /*segmenti_left*/prev_left,/*segmentj_left*/left,chroffset,chroffset,
-						 prev_diagonal->querystart,diagonal->queryend+1,querylength,query_compress,
-						 segmenti_donor_knownpos,segmentj_acceptor_knownpos,
-						 segmentj_antidonor_knownpos,segmenti_antiacceptor_knownpos,
-						 segmenti_donor_knowni,segmentj_acceptor_knowni,
-						 segmentj_antidonor_knowni,segmenti_antiacceptor_knowni,
-						 segmenti_donor_nknown,segmentj_acceptor_nknown,
-						 segmentj_antidonor_nknown,segmenti_antiacceptor_nknown,
-						 max_mismatches_allowed,plusp,genestrand)) < 0) {
-	antisense_endpoints = Intlist_push(antisense_endpoints,-1); /* Mark as invalid */
-	antisense_junctions = List_push(antisense_junctions,NULL);
-      } else if (plusp == true) {
-	antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
-	antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
-										/*donor_prob*/best_prob_j,/*acceptor_prob*/best_prob_i));
-      } else {
-	antisense_endpoints = Intlist_push(antisense_endpoints,splice_pos);
-	antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
-										/*donor_prob*/best_prob_i,/*acceptor_prob*/best_prob_j));
-      }
-      debug13(printf("antisense splice_pos in range %d..%d is %d with nmismatches %d+%d\n",
-		     prev_diagonal->querystart,diagonal->queryend,splice_pos,best_nmismatches_i,best_nmismatches_j));
-      if ((prev_nmismatches = Intlist_head(antisense_nmismatches)) < 0) {
-	/* Still need to compute */
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
-      } else if (Intlist_head(antisense_endpoints) != prev_diagonal->querystart) {
-	/* Endpoints not quite the same, so need to recompute */
-	Intlist_head_set(antisense_nmismatches,-1);
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
-      } else {
-	Intlist_head_set(antisense_nmismatches,best_nmismatches_i /*+ prev_nmismatches*/);
-	antisense_nmismatches = Intlist_push(antisense_nmismatches,best_nmismatches_j);
-      }
-      antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
-    }
-
-    /* Handle previous segment (for prev_left) */
-    prev_left = left;
-    prev_diagonal = diagonal;
-  }
-
-  /* Finish up lists */
-  sense_lefts = Uintlist_push(sense_lefts,prev_left);
-  antisense_lefts = Uintlist_push(antisense_lefts,prev_left);
-  sense_endpoints = Intlist_push(sense_endpoints,prev_diagonal->queryend + 1);
-  antisense_endpoints = Intlist_push(antisense_endpoints,prev_diagonal->queryend + 1);
-
-
-  debug13(printf("After step 2 (indels and splices)\n"));
-  debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
-  debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
-  debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
-  debug13(printf("antisense nmismatches: %s\n",Intlist_to_string(antisense_nmismatches)));
-
-
-  /*  Step 3:  Handle ambiguous ends on right */
-  right_ambig_sense = (Substring_T) NULL;
-  if (circularp[chrnum] == true) {
-    /* Skip */
-
-  } else if (right_endpoints_sense == NULL) {
-    /* Skip */
-
-  } else if (Intlist_length(right_endpoints_sense) == 1) {
-    /* Only one splice on right */
-    debug13(printf("Only one sense splice on right, which should have %d and %d mismatches\n",
-		   Intlist_head(right_amb_nmismatchesi_sense),Intlist_head(right_amb_nmismatchesj_sense)));
-    splice_pos = Intlist_head(right_endpoints_sense);
-    queryend = Intlist_head(right_queryends_sense);
-    left = Uintlist_head(right_ambcoords_sense) - splice_pos;
-    splice_distance = left - prev_left;
-    if (plusp == true) {
-      donor_prob = Doublelist_head(right_amb_probsi_sense);
-      acceptor_prob = Doublelist_head(right_amb_probsj_sense);
-    } else {
-      acceptor_prob = Doublelist_head(right_amb_probsi_sense);
-      donor_prob = Doublelist_head(right_amb_probsj_sense);
-    }
-
-    if (Intlist_head(sense_endpoints) != splice_pos) {
-      Intlist_head_set(sense_nmismatches,-1);
-      sense_nmismatches = Intlist_push(sense_nmismatches,-1);
-      Intlist_head_set(sense_endpoints,splice_pos);
-    } else {
-      /* Only distal nmismatches is reliable */
-      /* Intlist_head_set(sense_nmismatches,Intlist_head(right_amb_nmismatchesi_sense)); */
-      sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(right_amb_nmismatchesj_sense));
-    }
-    sense_endpoints = Intlist_push(sense_endpoints,queryend);
-
-    sense_lefts = Uintlist_push(sense_lefts,left);
-    sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
-								    donor_prob,acceptor_prob));
-
-  } else if (Intlist_vary(right_endpoints_sense) == true) {
-    /* Skip */
-  } else {
-    /* Ambiguous substring on right */
-    debug13(printf("Ambiguous substring on right\n"));
-    splice_pos = Intlist_head(right_endpoints_sense);
-    queryend = Intlist_head(right_queryends_sense); /* Should all be the same */
-
-    if (Intlist_head(sense_endpoints) != splice_pos) {
-      Intlist_head_set(sense_nmismatches,-1);
-      Intlist_head_set(sense_endpoints,splice_pos);
-    }
-    /* sense_endpoints = Intlist_push(sense_endpoints,queryend); */
-
-    if (plusp == true) {
-      right_ambig_sense = Substring_new_ambig_A(/*querystart*/splice_pos,queryend,
-						/*splice_pos*/splice_pos,querylength,
-						chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
-						right_ambcoords_sense,right_amb_knowni_sense,
-						right_amb_nmismatchesj_sense,right_amb_probsj_sense,
-						/*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
-						/*substring1p*/false);
-    } else {
-      right_ambig_sense = Substring_new_ambig_D(/*querystart*/querylength - queryend,querylength - splice_pos,
-						/*splice_pos*/querylength - splice_pos,querylength,
-						chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
-						right_ambcoords_sense,right_amb_knowni_sense,
-						right_amb_nmismatchesj_sense,right_amb_probsj_sense,
-						/*amb_common_prob*/Doublelist_head(right_amb_probsi_sense),
-						/*substring1p*/true);
-    }
-  }
-
-  if (right_ambig_sense != NULL) {
-    /* Endpoints end before ambiguous substring */
-  } else if (Intlist_head(sense_endpoints) == querylength) {
-    /* Last substring already goes to the end */
-  } else {
-    Intlist_head_set(sense_endpoints,querylength);
-    Intlist_head_set(sense_nmismatches,-1); /* Recalculate */
-  }
-
-
-  right_ambig_antisense = (Substring_T) NULL;
-  if (circularp[chrnum] == true) {
-    /* Skip */
-
-  } else if (right_endpoints_antisense == NULL) {
-    /* Skip */
-
-  } else if (Intlist_length(right_endpoints_antisense) == 1) {
-    /* Only one splice on right */
-    debug13(printf("Only one antisense splice on right, which should have %d and %d mismatches\n",
-		   Intlist_head(right_amb_nmismatchesi_antisense),Intlist_head(right_amb_nmismatchesj_antisense)));
-    splice_pos = Intlist_head(right_endpoints_antisense);
-    queryend = Intlist_head(right_queryends_antisense);
-    left = Uintlist_head(right_ambcoords_antisense) - splice_pos;
-    splice_distance = left - prev_left;
-    if (plusp == true) {
-      acceptor_prob = Doublelist_head(right_amb_probsi_antisense);
-      donor_prob = Doublelist_head(right_amb_probsj_antisense);
-    } else {
-      donor_prob = Doublelist_head(right_amb_probsi_antisense);
-      acceptor_prob = Doublelist_head(right_amb_probsj_antisense);
-    }
-
-    if (Intlist_head(antisense_endpoints) != splice_pos) {
-      Intlist_head_set(antisense_nmismatches,-1);
-      antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
-      Intlist_head_set(antisense_endpoints,splice_pos);
-    } else {
-      /* Only distal nmismatches is reliable */
-      /* Intlist_head_set(antisense_nmismatches,Intlist_head(right_amb_nmismatchesi_antisense)); */
-      antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(right_amb_nmismatchesj_antisense));
-    }
-    antisense_endpoints = Intlist_push(antisense_endpoints,queryend);
-
-    antisense_lefts = Uintlist_push(antisense_lefts,left);
-    antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
-									    donor_prob,acceptor_prob));
-
-  } else if (Intlist_vary(right_endpoints_antisense) == true) {
-    /* Skip */
-  } else {
-    /* Ambiguous substring on right */
-    debug13(printf("Ambiguous substring on right\n"));
-    splice_pos = Intlist_head(right_endpoints_antisense);
-    queryend = Intlist_head(right_queryends_antisense); /* Should all be the same */
-
-    if (Intlist_head(antisense_endpoints) != splice_pos) {
-      Intlist_head_set(antisense_nmismatches,-1);
-      Intlist_head_set(antisense_endpoints,splice_pos);
-    }
-    /* antisense_endpoints = Intlist_push(antisense_endpoints,queryend); */
-
-    if (plusp == true) {
-      right_ambig_antisense = Substring_new_ambig_D(/*querystart*/splice_pos,queryend,
-						    /*splice_pos*/splice_pos,querylength,
-						    chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
-						    right_ambcoords_antisense,right_amb_knowni_antisense,
-						    right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
-						    /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
-						    /*substring1p*/false);
-    } else {
-      right_ambig_antisense = Substring_new_ambig_A(/*querystart*/querylength - queryend,querylength - splice_pos,
-						    /*splice_pos*/querylength - splice_pos,querylength,
-						    chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
-						    right_ambcoords_antisense,right_amb_knowni_antisense,
-						    right_amb_nmismatchesj_antisense,right_amb_probsj_antisense,
-						    /*amb_common_prob*/Doublelist_head(right_amb_probsi_antisense),
-						    /*substring1p*/true);
-    }
-  }
-
-  if (right_ambig_antisense != NULL) {
-    /* Endpoints end before ambiguous substring */
-  } else if (Intlist_head(antisense_endpoints) == querylength) {
-    /* Last substring already goes to the end */
-  } else {
-    Intlist_head_set(antisense_endpoints,querylength);
-    Intlist_head_set(antisense_nmismatches,-1); /* Recalculate */
-  }
-
-
-  debug13(printf("After step 3 (ambiguous ends on right)\n"));
-  debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
-  debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
-  debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
-  debug13(printf("antisense nmismatches: %s\n",Intlist_to_string(antisense_nmismatches)));
-
-  /*  Step 4:  Reverse sense and antisense alignments */
-  sense_nmismatches = Intlist_reverse(sense_nmismatches);
-  antisense_nmismatches = Intlist_reverse(antisense_nmismatches);
-  sense_lefts = Uintlist_reverse(sense_lefts);
-  antisense_lefts = Uintlist_reverse(antisense_lefts);
-  sense_endpoints = Intlist_reverse(sense_endpoints);
-  antisense_endpoints = Intlist_reverse(antisense_endpoints);
-  sense_junctions = List_reverse(sense_junctions);
-  antisense_junctions = List_reverse(antisense_junctions);
-
-  debug13(printf("After step 4 (reverse alignments)\n"));
-  debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
-  debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
-  debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
-  debug13(printf("antisense nmismatches: %s\n",Intlist_to_string(antisense_nmismatches)));
-
-
-  /*  Step 5:  Handle ambiguous ends on left */
-  left_ambig_sense = (Substring_T) NULL;
-  if (circularp[chrnum] == true) {
-    /* Skip */
-    debug13(printf("Sense: Chrnum %d is circular, so not computing mismatches\n",chrnum));
-
-  } else if (left_endpoints_sense == NULL) {
-    /* Skip, but extend leftward */
-    debug13(printf("Sense: Skip, but extend leftward\n"));
-    if (Intlist_head(sense_endpoints) > 0) {
-      querystart = Intlist_head(sense_endpoints);
-      if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
-							      /*pos5*/0,/*pos3*/querystart,plusp,genestrand)) > 0) {
-	Intlist_head_set(sense_endpoints,querystart - max_leftward);
-	Intlist_head_set(sense_nmismatches,-1);
-      } else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
-								     /*pos5*/0,/*pos3*/querystart-1,plusp,genestrand)) > 0) {
-	Intlist_head_set(sense_endpoints,querystart - max_leftward - 1);
-	Intlist_head_set(sense_nmismatches,-1);
-      } else {
-	/* Keep value as querystart */
-      }
-    }
-
-  } else if (Intlist_length(left_endpoints_sense) == 1) {
-    /* Only one splice on left */
-    debug13(printf("Only one sense splice on left, which should have %d and %d mismatches, plusp %d\n",
-		   Intlist_head(left_amb_nmismatchesi_sense),Intlist_head(left_amb_nmismatchesj_sense),plusp));
-    prev_left = Uintlist_head(sense_lefts);
-    splice_pos = Intlist_head(left_endpoints_sense);
-    querystart = Intlist_head(left_querystarts_sense);
-    left = Uintlist_head(left_ambcoords_sense) - splice_pos;
-    splice_distance = prev_left - left;
-    if (plusp == true) {
-      donor_prob = Doublelist_head(left_amb_probsi_sense);
-      acceptor_prob = Doublelist_head(left_amb_probsj_sense);
-    } else {
-      acceptor_prob = Doublelist_head(left_amb_probsi_sense);
-      donor_prob = Doublelist_head(left_amb_probsj_sense);
-    }
-
-    if (Intlist_head(sense_endpoints) != splice_pos) {
-      Intlist_head_set(sense_nmismatches,-1);
-      sense_nmismatches = Intlist_push(sense_nmismatches,-1);
-      Intlist_head_set(sense_endpoints,splice_pos);
-    } else {
-      /* Only distal nmismatches is reliable */
-      /* Intlist_head_set(sense_nmismatches,Intlist_head(left_amb_nmismatchesj_sense)); */
-      sense_nmismatches = Intlist_push(sense_nmismatches,Intlist_head(left_amb_nmismatchesi_sense));
-    }
-    sense_endpoints = Intlist_push(sense_endpoints,querystart);
-
-    sense_lefts = Uintlist_push(sense_lefts,left);
-    sense_junctions = List_push(sense_junctions,Junction_new_splice(splice_distance,SENSE_FORWARD,
-								    donor_prob,acceptor_prob));
-
-  } else if (Intlist_vary(left_endpoints_sense) == true) {
-    /* Skip, but extend leftward */
-    querystart = Intlist_head(sense_endpoints);
-    if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
-							    /*pos5*/0,/*pos3*/querystart,plusp,genestrand)) > 0) {
-      Intlist_head_set(sense_endpoints,querystart - max_leftward);
-      Intlist_head_set(sense_nmismatches,-1);
-    } else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(sense_lefts),
-								   /*pos5*/0,/*pos3*/querystart-1,plusp,genestrand)) > 0) {
-      Intlist_head_set(sense_endpoints,querystart - max_leftward - 1);
-      Intlist_head_set(sense_nmismatches,-1);
-    } else {
-      /* Keep value as querystart */
-    }
-
-  } else {
-    /* Ambiguous substring on left */
-    debug13(printf("Ambiguous substring on left\n"));
-    splice_pos = Intlist_head(left_endpoints_sense);
-    querystart = Intlist_head(left_querystarts_sense); /* Should all be the same */
-
-    if (Intlist_head(sense_endpoints) != splice_pos) {
-      Intlist_head_set(sense_nmismatches,-1);
-      Intlist_head_set(sense_endpoints,splice_pos);
-    }
-    /* sense_endpoints = Intlist_push(sense_endpoints,querystart); */
-
-    if (plusp == true) {
-      left_ambig_sense = Substring_new_ambig_D(querystart,/*queryend*/splice_pos,
-					       /*splice_pos*/splice_pos,querylength,
-					       chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
-					       left_ambcoords_sense,left_amb_knowni_sense,
-					       left_amb_nmismatchesi_sense,left_amb_probsi_sense,
-					       /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
-					       /*substring1p*/true);
-    } else {
-      left_ambig_sense = Substring_new_ambig_A(querylength - splice_pos,/*queryend*/querylength - querystart,
-					       /*splice_pos*/querylength - splice_pos,querylength,
-					       chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
-					       left_ambcoords_sense,left_amb_knowni_sense,
-					       left_amb_nmismatchesi_sense,left_amb_probsi_sense,
-					       /*amb_common_prob*/Doublelist_head(left_amb_probsj_sense),
-					       /*substring1p*/false);
-    }
-  }
-
-  if (left_ambig_sense != NULL) {
-    /* Endpoints begin after ambiguous substring */
-    debug13(printf("Sense: Endpoints begin after ambiguous substring\n"));
-  } else if (Intlist_head(sense_endpoints) == 0) {
-    /* First substring already goes to the beginning */
-    debug13(printf("Sense: First substring already goes to the beginning\n"));
-  } else {
-    Intlist_head_set(sense_endpoints,0);
-    Intlist_head_set(sense_nmismatches,-1); /* Recalculate */
-  }
-
-
-  left_ambig_antisense = (Substring_T) NULL;
-  if (circularp[chrnum] == true) {
-    /* Skip */
-    debug13(printf("Antisense: Chrnum %d is circular, so not computing mismatches\n",chrnum));
-
-  } else if (left_endpoints_antisense == NULL) {
-    /* Skip, but extend leftward */
-    debug13(printf("Antisense: Skip, but extend leftward\n"));
-    querystart = Intlist_head(antisense_endpoints);
-    if (Intlist_head(antisense_endpoints) > 0) {
-      if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
-							      /*pos5*/0,/*pos3*/querystart,plusp,genestrand)) > 0) {
-	Intlist_head_set(antisense_endpoints,querystart - max_leftward);
-	Intlist_head_set(antisense_nmismatches,-1);
-      } else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
-								     /*pos5*/0,/*pos3*/querystart-1,plusp,genestrand)) > 0) {
-	Intlist_head_set(antisense_endpoints,querystart - max_leftward - 1);
-	Intlist_head_set(antisense_nmismatches,-1);
-      } else {
-	/* Keep value as querystart */
-      }
-    }
-
-  } else if (Intlist_length(left_endpoints_antisense) == 1) {
-    /* Only one splice on left */
-    debug13(printf("Only one antisense splice on left, which should have %d and %d mismatches\n",
-		   Intlist_head(left_amb_nmismatchesi_antisense),Intlist_head(left_amb_nmismatchesj_antisense)));
-    prev_left = Uintlist_head(antisense_lefts);
-    splice_pos = Intlist_head(left_endpoints_antisense);
-    querystart = Intlist_head(left_querystarts_antisense);
-    left = Uintlist_head(left_ambcoords_antisense) - splice_pos;
-    splice_distance = prev_left - left;
-    if (plusp == true) {
-      acceptor_prob = Doublelist_head(left_amb_probsi_antisense);
-      donor_prob = Doublelist_head(left_amb_probsj_antisense);
-    } else {
-      donor_prob = Doublelist_head(left_amb_probsi_antisense);
-      acceptor_prob = Doublelist_head(left_amb_probsj_antisense);
-    }
-
-    if (Intlist_head(antisense_endpoints) != splice_pos) {
-      Intlist_head_set(antisense_nmismatches,-1);
-      antisense_nmismatches = Intlist_push(antisense_nmismatches,-1);
-      Intlist_head_set(antisense_endpoints,splice_pos);
-    } else {
-      /* Only distal nmismatches is reliable */
-      /* Intlist_head_set(antisense_nmismatches,Intlist_head(left_amb_nmismatchesj_antisense)); */
-      antisense_nmismatches = Intlist_push(antisense_nmismatches,Intlist_head(left_amb_nmismatchesi_antisense));
-    }
-    antisense_endpoints = Intlist_push(antisense_endpoints,querystart);
-
-    antisense_lefts = Uintlist_push(antisense_lefts,left);
-    antisense_junctions = List_push(antisense_junctions,Junction_new_splice(splice_distance,SENSE_ANTI,
-									    donor_prob,acceptor_prob));
-
-  } else if (Intlist_vary(left_endpoints_antisense) == true) {
-    /* Skip, but extend leftward */
-    querystart = Intlist_head(antisense_endpoints);
-    if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
-							    /*pos5*/0,/*pos3*/querystart,plusp,genestrand)) > 0) {
-      Intlist_head_set(antisense_endpoints,querystart - max_leftward);
-      Intlist_head_set(antisense_endpoints,-1);
-    } else if ((max_leftward = Genome_consecutive_matches_leftward(query_compress,/*left*/Uintlist_head(antisense_lefts),
-								   /*pos5*/0,/*pos3*/querystart-1,plusp,genestrand)) > 0) {
-      Intlist_head_set(antisense_endpoints,querystart - max_leftward - 1);
-      Intlist_head_set(antisense_endpoints,-1);
-    } else {
-      /* Keep value as querystart */
-    }
-
-  } else {
-    /* Ambiguous substring on left */
-    debug13(printf("Ambiguous substring on left\n"));
-    splice_pos = Intlist_head(left_endpoints_antisense);
-    querystart = Intlist_head(left_querystarts_antisense); /* Should all be the same */
-
-    if (Intlist_head(antisense_endpoints) != splice_pos) {
-      Intlist_head_set(antisense_nmismatches,-1);
-      Intlist_head_set(antisense_endpoints,splice_pos);
-    }
-    /* antisense_endpoints = Intlist_push(antisense_endpoints,querystart); */
-
-    if (plusp == true) {
-      left_ambig_antisense = Substring_new_ambig_A(querystart,/*queryend*/splice_pos,
-						   /*splice_pos*/splice_pos,querylength,
-						   chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
-						   left_ambcoords_antisense,left_amb_knowni_antisense,
-						   left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
-						   /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
-						   /*substring1p*/true);
-    } else {
-      left_ambig_antisense = Substring_new_ambig_D(querylength - splice_pos,/*queryend*/querylength - querystart,
-						   /*splice_pos*/querylength - splice_pos,querylength,
-						   chrnum,chroffset,chrhigh,chrlength,plusp,genestrand,
-						   left_ambcoords_antisense,left_amb_knowni_antisense,
-						   left_amb_nmismatchesi_antisense,left_amb_probsi_antisense,
-						   /*amb_common_prob*/Doublelist_head(left_amb_probsj_antisense),
-						   /*substring1p*/false);
-    }
-  }
-
-  if (left_ambig_antisense != NULL) {
-    /* Endpoints begin after ambiguous substring */
-    debug13(printf("Antisense: Endpoints begin after ambiguous substring\n"));
-  } else if (Intlist_head(antisense_endpoints) == 0) {
-    /* First substring already goes to the beginning */
-    debug13(printf("Antisense: First substring already goes to the beginning\n"));
-  } else {
-    Intlist_head_set(antisense_endpoints,0);
-    Intlist_head_set(antisense_nmismatches,-1); /* Recalculate */
-  }
-
-
-  debug13(printf("After step 5 (ambiguous ends on left)\n"));
-  debug13(printf("sense (wrt plus): %s\n",Intlist_to_string(sense_endpoints)));
-  debug13(printf("antisense (wrt plus): %s\n",Intlist_to_string(antisense_endpoints)));
-  debug13(printf("sense nmismatches: %s\n",Intlist_to_string(sense_nmismatches)));
-  debug13(printf("antisense nmismatches: %s\n",Intlist_to_string(antisense_nmismatches)));
-
-#ifdef DEBUG13
-  printf("Sense junctions\n");
-  for (p = sense_junctions; p != NULL; p = List_next(p)) {
-    Junction_print(List_head(p));
-  }
-  printf("\n");
-  printf("Antisense junctions\n");
-  for (p = antisense_junctions; p != NULL; p = List_next(p)) {
-    Junction_print(List_head(p));
-  }
-  printf("\n");
-#endif
-
-
-  /* Need to rely on probability filtering in splice.c to get correct
-     results for sense and antisense */
-  sense_acceptable_p = endpoints_acceptable_p(&sense_intronp,sense_junctions,sense_endpoints);
-  antisense_acceptable_p = endpoints_acceptable_p(&antisense_intronp,antisense_junctions,
-						  antisense_endpoints);
-  if (sense_acceptable_p == true && antisense_acceptable_p == true) {
-    if (sense_intronp == true || right_ambig_sense != NULL || left_ambig_sense != NULL) {
-      sense_sensedir = SENSE_FORWARD;
-    } else {
-      sense_sensedir = SENSE_NULL;
-    }
-    if (antisense_intronp == true || right_ambig_antisense != NULL || left_ambig_antisense != NULL) {
-      antisense_sensedir = SENSE_ANTI;
-    } else {
-      antisense_sensedir = SENSE_NULL;
-    }
-
-    if (sense_sensedir == SENSE_NULL && antisense_sensedir == SENSE_NULL) {
-      /* Create just one hit */
-      if ((hit = Stage3end_new_substrings(&(*found_score),sense_endpoints,sense_lefts,
-					  sense_nmismatches,sense_junctions,querylength,query_compress,
-					  /*right_ambig*/NULL,/*left_ambig*/NULL,plusp,genestrand,/*sensedir*/SENSE_NULL,
-					  chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
-	Substring_free(&right_ambig_sense);
-	Substring_free(&left_ambig_sense);
-	/* Junction_gc(&sense_junctions); -- Done by Stage3end_new_substrings */
-	Substring_free(&right_ambig_antisense);
-	Substring_free(&left_ambig_antisense);
-      } else {
-	if (Stage3end_substrings_querystart(hit) < 8 &&
-	    Stage3end_substrings_queryend(hit) >= querylength - 8) {
-	  *completep = true;
-	}
-	hits = List_push(hits,(void *) hit);
-      }
-      Junction_gc(&antisense_junctions);
-
-    } else {
-      /* Create just both sense and antisense hits */
-      if ((hit = Stage3end_new_substrings(&(*found_score),sense_endpoints,sense_lefts,
-					  sense_nmismatches,sense_junctions,querylength,query_compress,
-					  right_ambig_sense,left_ambig_sense,plusp,genestrand,sense_sensedir,
-					  chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
-	Substring_free(&right_ambig_sense);
-	Substring_free(&left_ambig_sense);
-	/* Junction_gc(&sense_junctions); -- Done by Stage3end_new_substrings */
-      } else {
-	if (Stage3end_substrings_querystart(hit) < 8 &&
-	    Stage3end_substrings_queryend(hit) >= querylength - 8) {
-	  *completep = true;
-	}
-	hits = List_push(hits,(void *) hit);
-      }
-
-      if ((hit = Stage3end_new_substrings(&(*found_score),antisense_endpoints,antisense_lefts,
-					  antisense_nmismatches,antisense_junctions,querylength,query_compress,
-					  right_ambig_antisense,left_ambig_antisense,plusp,genestrand,antisense_sensedir,
-					  chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
-	Substring_free(&right_ambig_antisense);
-	Substring_free(&left_ambig_antisense);
-	/* Junction_gc(&antisense_junctions); -- Done by Stage3end_new_substrings */
-      } else {
-	if (Stage3end_substrings_querystart(hit) < 8 &&
-	    Stage3end_substrings_queryend(hit) >= querylength - 8) {
-	  *completep = true;
-	}
-	hits = List_push(hits,(void *) hit);
-      }
-    }
-    
-  } else if (sense_acceptable_p == true) {
-    if (sense_intronp == true || right_ambig_sense != NULL || left_ambig_sense != NULL) {
-      sensedir = SENSE_FORWARD;
-    } else {
-      sensedir = SENSE_NULL;
-    }
-    if ((hit = Stage3end_new_substrings(&(*found_score),sense_endpoints,sense_lefts,
-					sense_nmismatches,sense_junctions,querylength,query_compress,
-					right_ambig_sense,left_ambig_sense,plusp,genestrand,sensedir,
-					chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
-      Substring_free(&right_ambig_sense);
-      Substring_free(&left_ambig_sense);
-      /* Junction_gc(&sense_junctions); -- Done by Stage3end_new_substrings */
-    } else {
-      if (Stage3end_substrings_querystart(hit) < 8 &&
-	  Stage3end_substrings_queryend(hit) >= querylength - 8) {
-	*completep = true;
-      }
-      hits = List_push(hits,(void *) hit);
-    }
-
-    Substring_free(&right_ambig_antisense);
-    Substring_free(&left_ambig_antisense);
-    Junction_gc(&antisense_junctions);
-
-  } else if (antisense_acceptable_p == true) {
-    if (antisense_intronp == true || right_ambig_antisense != NULL || left_ambig_antisense != NULL) {
-      sensedir = SENSE_ANTI;
-    } else {
-      sensedir = SENSE_NULL;
-    }
-    if ((hit = Stage3end_new_substrings(&(*found_score),antisense_endpoints,antisense_lefts,
-					antisense_nmismatches,antisense_junctions,querylength,query_compress,
-					right_ambig_antisense,left_ambig_antisense,plusp,genestrand,sensedir,
-					chrnum,chroffset,chrhigh,chrlength,/*sarrayp*/true)) == NULL) {
-      Substring_free(&right_ambig_antisense);
-      Substring_free(&left_ambig_antisense);
-      /* Junction_gc(&antisense_junctions); -- Done by Stage3end_new_substrings */
-    } else {
-      if (Stage3end_substrings_querystart(hit) < 8 &&
-	  Stage3end_substrings_queryend(hit) >= querylength - 8) {
-	*completep = true;
-      }
-      hits = List_push(hits,(void *) hit);
-    }
-
-    Substring_free(&right_ambig_sense);
-    Substring_free(&left_ambig_sense);
-    Junction_gc(&sense_junctions);
-
-  } else {
-    /* Neither set of junctions/endpoints works */
-    Substring_free(&right_ambig_sense);
-    Substring_free(&left_ambig_sense);
-    Substring_free(&right_ambig_antisense);
-    Substring_free(&left_ambig_antisense);
-
-    Junction_gc(&sense_junctions);
-    Junction_gc(&antisense_junctions);
-  }
-
-
-  Intlist_free(&sense_nmismatches);
-  Intlist_free(&antisense_nmismatches);
-  Uintlist_free(&sense_lefts);
-  Uintlist_free(&antisense_lefts);
-  Intlist_free(&sense_endpoints);
-  Intlist_free(&antisense_endpoints);
-  
-  for (p = super_path; p != NULL; p = List_next(p)) {
-    diagonal = (Univdiag_T) List_head(p);
-    Univdiag_free(&diagonal);
-  }
-  List_free(&super_path);
-
-#ifdef HAVE_ALLOCA
-  if (querylength <= MAX_STACK_READLENGTH) {
-    FREEA(segmenti_donor_knownpos);
-    FREEA(segmentj_acceptor_knownpos);
-    FREEA(segmentj_antidonor_knownpos);
-    FREEA(segmenti_antiacceptor_knownpos);
-    FREEA(segmenti_donor_knowni);
-    FREEA(segmentj_acceptor_knowni);
-    FREEA(segmentj_antidonor_knowni);
-    FREEA(segmenti_antiacceptor_knowni);
-  } else {
-    FREE(segmenti_donor_knownpos);
-    FREE(segmentj_acceptor_knownpos);
-    FREE(segmentj_antidonor_knownpos);
-    FREE(segmenti_antiacceptor_knownpos);
-    FREE(segmenti_donor_knowni);
-    FREE(segmentj_acceptor_knowni);
-    FREE(segmentj_antidonor_knowni);
-    FREE(segmenti_antiacceptor_knowni);
-  }
-#else
-  FREE(segmenti_donor_knownpos);
-  FREE(segmentj_acceptor_knownpos);
-  FREE(segmentj_antidonor_knownpos);
-  FREE(segmenti_antiacceptor_knownpos);
-  FREE(segmenti_donor_knowni);
-  FREE(segmentj_acceptor_knowni);
-  FREE(segmentj_antidonor_knowni);
-  FREE(segmenti_antiacceptor_knowni);
-#endif
-
-  return hits;
-}
-
-
-
-
-List_T
-Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int querylength,
-		      Compress_T query_compress_fwd, Compress_T query_compress_rev, 
-		      int nmisses_allowed, int genestrand) {
-  List_T hits = NULL;
-  List_T plus_set, minus_set, p;
-  List_T rightward_set_plus = NULL, leftward_set_plus = NULL, rightward_set_minus = NULL, leftward_set_minus = NULL;
-  Elt_T best_plus_elt, best_minus_elt, elt, *plus_elt_array, *minus_elt_array;
-  UINT4 best_plus_nmatches, best_minus_nmatches, nmatches;
-  Sarrayptr_T initptr, finalptr;
-  bool successp, completep;
-  int plus_querypos, minus_querypos;
-  int i;
-  Chrnum_T chrnum;
-  Univcoord_T chroffset, chrhigh, left;
-  Chrpos_T chrlength;
-  T plus_sarray, minus_sarray;
-  char *plus_conversion, *minus_conversion;
-
-  int nseeds_plus, nseeds_minus;
-  int *scores_plus = NULL, *scores_minus = NULL;
-  int niter, best_plus_i, best_minus_i, nplus, nminus;
-  List_T *middle_path_plus = NULL, *right_paths_plus = NULL, *left_paths_plus = NULL,
-    *middle_path_minus = NULL, *right_paths_minus = NULL, *left_paths_minus = NULL;
-  Univdiag_T *middle_diagonals_plus = NULL, *middle_diagonals_minus = NULL;
-  List_T *best_right_diagonals_plus = NULL, *best_left_diagonals_plus = NULL,
-    *all_right_diagonals_plus = NULL, *all_left_diagonals_plus = NULL,
-    *fillin_diagonals_plus = NULL, *fillin_diagonals_minus = NULL,
-    *best_right_diagonals_minus = NULL, *best_left_diagonals_minus = NULL,
-    *all_right_diagonals_minus = NULL, *all_left_diagonals_minus = NULL;
-
-  Intlist_T right_endpoints_sense, right_endpoints_antisense,
-    left_endpoints_sense, left_endpoints_antisense;
-  Intlist_T right_queryends_sense, right_queryends_antisense,
-    left_querystarts_sense, left_querystarts_antisense;
-  Uintlist_T right_ambcoords_sense, right_ambcoords_antisense,
-    left_ambcoords_sense, left_ambcoords_antisense;
-  Intlist_T right_amb_knowni_sense, right_amb_knowni_antisense,
-    left_amb_knowni_sense, left_amb_knowni_antisense;
-  Intlist_T right_amb_nmismatchesi_sense, right_amb_nmismatchesi_antisense,
-    right_amb_nmismatchesj_sense, right_amb_nmismatchesj_antisense,
-    left_amb_nmismatchesi_sense, left_amb_nmismatchesi_antisense,
-    left_amb_nmismatchesj_sense, left_amb_nmismatchesj_antisense;
-  Doublelist_T right_amb_probsi_sense, right_amb_probsi_antisense,
-    right_amb_probsj_sense, right_amb_probsj_antisense,
-    left_amb_probsi_sense, left_amb_probsi_antisense,
-    left_amb_probsj_sense, left_amb_probsj_antisense;
-
-  List_T diagonal_path;
-  Univdiag_T diagonal;
-#if 0
-  bool *coveredp;
-  Chrpos_T **mappings;
-  int *npositions;
-  Oligoindex_T oligoindex;
-  int indexsize;
-  int best_score;
-#endif
-
-
-  debug(printf("\nStarting Sarray_search_greedy with querylength %d and indexsize %d and nmisses_allowed %d, genestrand %d\n",
-	       querylength,sarray_fwd->indexsize,nmisses_allowed,genestrand));
-  if (nmisses_allowed < 0) {
-    nmisses_allowed = 0;
-#if 0
-  } else {
-    /* It is possible that this makes GSNAP too slow */
-    nmisses_allowed = querylength;
-#endif
-  }
-  *found_score = querylength;
-
-  if (genestrand == +2) {
-    plus_conversion = conversion_rev;
-    minus_conversion = conversion_fwd;
-    plus_sarray = sarray_rev;
-    minus_sarray = sarray_fwd;
-  } else {
-    plus_conversion = conversion_fwd;
-    minus_conversion = conversion_rev;
-    plus_sarray = sarray_fwd;
-    minus_sarray = sarray_rev;
-  }
-
-
-  /* I.  Race from plus and minus start to end */
-  plus_set = minus_set = (List_T) NULL;
-  best_plus_nmatches = best_minus_nmatches = 0;
-  best_plus_elt = best_minus_elt = (Elt_T) NULL;
-  plus_querypos = 0;
-  minus_querypos = 0;
-  niter = 0;
-  while (niter < nmisses_allowed && plus_querypos < querylength && minus_querypos < querylength) {
-    sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[plus_querypos]),
-		  querylength - plus_querypos,/*queryoffset*/plus_querypos,
-		  query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,plus_conversion);
-    elt = Elt_new(plus_querypos,nmatches,initptr,finalptr,/*temporaryp*/false);
-    if (nmatches > best_plus_nmatches && elt->nptr <= MAX_HITS_FOR_BEST_ELT) {
-      best_plus_elt = elt;
-      best_plus_nmatches = nmatches;
-      best_plus_i = niter;
-    }
-    plus_set = List_push(plus_set,elt);
-    plus_querypos += nmatches;
-    plus_querypos += 1;		/* To skip the presumed mismatch */
-
-    sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryrc[minus_querypos]),
-		  querylength - minus_querypos,/*queryoffset*/minus_querypos,
-		  query_compress_rev,minus_sarray,/*plusp*/false,genestrand,minus_conversion);
-    elt = Elt_new(minus_querypos,nmatches,initptr,finalptr,/*temporaryp*/false);
-    if (nmatches > best_minus_nmatches && elt->nptr < MAX_HITS_FOR_BEST_ELT) {
-      best_minus_elt = elt;
-      best_minus_nmatches = nmatches;
-      best_minus_i = niter;
-    }
-    minus_set = List_push(minus_set,elt);
-    minus_querypos += nmatches;
-    minus_querypos += 1;		/* To skip the presumed mismatch */
-
-    niter++;
-  }
-
-#ifdef DEBUG
-  printf("niter %d vs %d allowed, plus 0..%d, minus 0..%d\n",niter,nmisses_allowed,plus_querypos,minus_querypos);
-  if (best_plus_elt != NULL) {
-    printf("best plus %d..%d (SA %u+%d)\n",
-	   best_plus_elt->querystart,best_plus_elt->queryend,best_plus_elt->initptr,best_plus_elt->finalptr - best_plus_elt->initptr);
-  }
-  if (best_minus_elt != NULL) {
-    printf("best minus %d..%d (SA %u+%d)\n",
-	 best_minus_elt->querystart,best_minus_elt->queryend,best_minus_elt->initptr,best_minus_elt->finalptr - best_minus_elt->initptr);
-  }
-  printf("plus set (positions not yet filled):\n");
-  for (p = plus_set; p != NULL; p = List_next(p)) {
-    Elt_dump((Elt_T) List_head(p));
-  }
-  printf("\n");
-  printf("minus set (positions not yet filled):\n");
-  for (p = minus_set; p != NULL; p = List_next(p)) {
-    Elt_dump((Elt_T) List_head(p));
-  }
-#endif
-
-  if (plus_querypos < querylength) {
-    debug(printf("Plus: could not find large pieces\n"));
-    nseeds_plus = 0;
-
-  } else if (best_plus_elt == NULL) {
-    debug(printf("Plus: No best elt\n"));
-    nseeds_plus = 0;
-
-  } else {
-    Elt_fill_positions_all(best_plus_elt,plus_sarray);
-    if (best_plus_elt->npositions == 0) {
-      /* Could happen if there are too many positions */
-      debug(printf("Plus: Best elt has no positions\n"));
-      nseeds_plus = 0;
-
-    } else {
-      plus_set = List_reverse(plus_set);
-      plus_elt_array = (Elt_T *) List_to_array_n(&nplus,plus_set);
-
-#ifdef DEBUG
-      printf("LEFT\n");
-      for (i = 0; i < best_plus_i; i++) {
-	Elt_dump(plus_elt_array[i]);
-      }
-      printf("MIDDLE\n");
-      Elt_dump(plus_elt_array[best_plus_i]);
-      printf("RIGHT\n");
-      for (i = best_plus_i + 1; i < nplus; i++) {
-	Elt_dump(plus_elt_array[i]);
-      }
-#endif
-
-      nseeds_plus = best_plus_elt->npositions;
-      scores_plus = (int *) MALLOC(nseeds_plus*sizeof(int));
-      /* Assigned only if score is high */
-      middle_path_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
-      right_paths_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
-      left_paths_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
-
-      middle_diagonals_plus = (Univdiag_T *) MALLOC(nseeds_plus*sizeof(Univdiag_T));
-      best_right_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
-      best_left_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
-      all_right_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
-      all_left_diagonals_plus = (List_T *) MALLOC(nseeds_plus*sizeof(List_T));
-      fillin_diagonals_plus = (List_T *) CALLOC(nseeds_plus,sizeof(List_T));
-
-      chrnum = 1;
-      Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
-      for (i = 0; i < nseeds_plus; i++) {
-	left = best_plus_elt->positions[i];
-	if (left > chrhigh) {
-	  chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
-	  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
-	  /* *chrhigh += 1U; */
-	}
-	/* May not want to solve for best_right_diagonals and best_left_diagonals.  Use oligoindex instead. */
-	scores_plus[i] = get_diagonals(&(middle_diagonals_plus[i]),
-				       &(best_right_diagonals_plus[i]),&(best_left_diagonals_plus[i]),
-				       &(all_right_diagonals_plus[i]),&(all_left_diagonals_plus[i]),
-				       plus_sarray,/*queryptr*/queryuc_ptr,querylength,query_compress_fwd,
-				       chroffset,chrhigh,/*goal*/left,plus_elt_array,
-				       best_plus_i,nplus,/*plusp*/true,genestrand,plus_conversion);
-	debug(printf("Got plus score %d\n",scores_plus[i]));
-      }
-
-      FREE(plus_elt_array);
-    }
-  }
-
-  if (minus_querypos < querylength) {
-    debug(printf("Minus: Could not find large pieces\n"));
-    nseeds_minus = 0;
-    
-  } else if (best_minus_elt == NULL) {
-    debug(printf("Minus: No best elt\n"));
-    nseeds_minus = 0;
-
-  } else {
-    Elt_fill_positions_all(best_minus_elt,minus_sarray);
-    if (best_minus_elt->npositions == 0) {
-      /* Could happen if there are too many positions */
-      debug(printf("Minus: Best elt has no positions\n"));
-      nseeds_minus = 0;
-
-    } else {
-      minus_set = List_reverse(minus_set);
-      minus_elt_array = (Elt_T *) List_to_array_n(&nminus,minus_set);
-
-#ifdef DEBUG
-      printf("LEFT\n");
-      for (i = 0; i < best_minus_i; i++) {
-	Elt_dump(minus_elt_array[i]);
-      }
-      printf("MIDDLE\n");
-      Elt_dump(minus_elt_array[best_minus_i]);
-      printf("RIGHT\n");
-      for (i = best_minus_i + 1; i < nminus; i++) {
-	Elt_dump(minus_elt_array[i]);
-      }
-#endif
-
-      nseeds_minus = best_minus_elt->npositions;
-      scores_minus = (int *) MALLOC(nseeds_minus*sizeof(int));
-      /* Assigned only if score is high */
-      middle_path_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
-      right_paths_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
-      left_paths_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
-
-      middle_diagonals_minus = (Univdiag_T *) MALLOC(nseeds_minus*sizeof(Univdiag_T));
-      best_right_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
-      best_left_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
-      all_right_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
-      all_left_diagonals_minus = (List_T *) MALLOC(nseeds_minus*sizeof(List_T));
-      fillin_diagonals_minus = (List_T *) CALLOC(nseeds_minus,sizeof(List_T));
-
-      chrnum = 1;
-      Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
-      for (i = 0; i < nseeds_minus; i++) {
-	left = best_minus_elt->positions[i];
-	if (left > chrhigh) {
-	  chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
-	  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
-	  /* *chrhigh += 1U; */
-	}
-	/* May not want to solve for best_right_diagonals and best_left_diagonals.  Use oligoindex instead. */
-	scores_minus[i] = get_diagonals(&(middle_diagonals_minus[i]),
-					&(best_right_diagonals_minus[i]),&(best_left_diagonals_minus[i]),
-					&(all_right_diagonals_minus[i]),&(all_left_diagonals_minus[i]),
-					minus_sarray,/*queryptr*/queryrc,querylength,query_compress_rev,
-					chroffset,chrhigh,/*goal*/left,minus_elt_array,
-					best_minus_i,nminus,/*plusp*/false,genestrand,minus_conversion);
-	debug(printf("Got minus score %d\n",scores_minus[i]));
-      }
-
-      FREE(minus_elt_array);
-    }
-  }
-
-#if 0
-  /* Because we don't always left-extend, we cannot trust best_score */
-  best_score = 0;
-  for (i = 0; i < nseeds_plus; i++) {
-    if (scores_plus[i] > best_score) {
-      best_score = scores_plus[i];
-    }
-  }
-  for (i = 0; i < nseeds_minus; i++) {
-    if (scores_minus[i] > best_score) {
-      best_score = scores_minus[i];
-    }
-  }
-#endif
-
-  debug(printf("Have %d nseeds_plus and %d nseeds_minus\n",nseeds_plus,nseeds_minus));
-
-#if 0
-  coveredp = (bool *) CALLOCA(querylength,sizeof(bool));
-  mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
-  npositions = (int *) CALLOCA(querylength,sizeof(int));
-  oligoindex = Oligoindex_array_elt(oligoindices_minor,/*source*/0);
-  indexsize = Oligoindex_indexsize(oligoindex);
-#endif
-
-  /* *sarray_gmap = (List_T) NULL; */
-
-  chrnum = 1;
-  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
-  for (i = 0; i < nseeds_plus; i++) {
-    if (1 /*|| scores_plus[i] > best_score - 20*/) {
-      diagonal = middle_diagonals_plus[i];
-      left = diagonal->univdiagonal;
-      if (left > chrhigh) {
-	chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
-	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
-	/* *chrhigh += 1U; */
-      }
-      middle_path_plus[i] = find_best_path(&(right_paths_plus[i]),&right_endpoints_sense,&right_endpoints_antisense,
-					   &right_queryends_sense,&right_queryends_antisense,
-					   &right_ambcoords_sense,&right_ambcoords_antisense,
-					   &right_amb_knowni_sense,&right_amb_knowni_antisense,
-					   &right_amb_nmismatchesi_sense,&right_amb_nmismatchesi_antisense,
-					   &right_amb_nmismatchesj_sense,&right_amb_nmismatchesj_antisense,
-					   &right_amb_probsi_sense,&right_amb_probsi_antisense,
-					   &right_amb_probsj_sense,&right_amb_probsj_antisense,
-					   &(left_paths_plus[i]),&left_endpoints_sense,&left_endpoints_antisense,
-					   &left_querystarts_sense,&left_querystarts_antisense,
-					   &left_ambcoords_sense,&left_ambcoords_antisense,
-					   &left_amb_knowni_sense,&left_amb_knowni_antisense,
-					   &left_amb_nmismatchesi_sense,&left_amb_nmismatchesi_antisense,
-					   &left_amb_nmismatchesj_sense,&left_amb_nmismatchesj_antisense,
-					   &left_amb_probsi_sense,&left_amb_probsi_antisense,
-					   &left_amb_probsj_sense,&left_amb_probsj_antisense,
-					   &(fillin_diagonals_plus[i]),diagonal,best_right_diagonals_plus[i],best_left_diagonals_plus[i],
-					   querylength,query_compress_fwd,chroffset,
-					   /*plusp*/true,genestrand,/*nmismatches_allowed*/nmisses_allowed);
-
-      hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_plus[i],
-				right_endpoints_sense,right_endpoints_antisense,
-				right_queryends_sense,right_queryends_antisense,
-				right_ambcoords_sense,right_ambcoords_antisense,
-				right_amb_knowni_sense,right_amb_knowni_antisense,
-				right_amb_nmismatchesi_sense,right_amb_nmismatchesi_antisense,
-				right_amb_nmismatchesj_sense,right_amb_nmismatchesj_antisense,
-				right_amb_probsi_sense,right_amb_probsi_antisense,
-				right_amb_probsj_sense,right_amb_probsj_antisense,
-
-				left_endpoints_sense,left_endpoints_antisense,
-				left_querystarts_sense,left_querystarts_antisense,
-				left_ambcoords_sense,left_ambcoords_antisense,
-				left_amb_knowni_sense,left_amb_knowni_antisense,
-				left_amb_nmismatchesi_sense,left_amb_nmismatchesi_antisense,
-				left_amb_nmismatchesj_sense,left_amb_nmismatchesj_antisense,
-				left_amb_probsi_sense,left_amb_probsi_antisense,
-				left_amb_probsj_sense,left_amb_probsj_antisense,
-
-				chrnum,chroffset,chrhigh,chrlength,
-				querylength,query_compress_fwd,/*plusp*/true,genestrand,
-				/*max_mismatches_allowed*/nmisses_allowed);
-
-#if 0
-      if (0 && completep == false) {
-	*sarray_gmap = run_gmap_plus(*sarray_gmap,middle_path_plus[i],/*start_paths*/left_paths_plus[i],/*end_paths*/right_paths_plus[i],
-				     chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength,
-				     genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
-				     oligoindices_minor,diagpool,cellpool);
-      }
-#endif
-
-      Intlist_free(&right_endpoints_sense); Intlist_free(&right_endpoints_antisense);
-      Intlist_free(&right_queryends_sense); Intlist_free(&right_queryends_antisense);
-      Uintlist_free(&right_ambcoords_sense); Uintlist_free(&right_ambcoords_antisense);
-      Intlist_free(&right_amb_knowni_sense); Intlist_free(&right_amb_knowni_antisense);
-      Intlist_free(&right_amb_nmismatchesi_sense); Intlist_free(&right_amb_nmismatchesi_antisense);
-      Intlist_free(&right_amb_nmismatchesj_sense); Intlist_free(&right_amb_nmismatchesj_antisense);
-      Doublelist_free(&right_amb_probsi_sense); Doublelist_free(&right_amb_probsi_antisense);
-      Doublelist_free(&right_amb_probsj_sense); Doublelist_free(&right_amb_probsj_antisense);
-
-      Intlist_free(&left_endpoints_sense); Intlist_free(&left_endpoints_antisense);
-      Intlist_free(&left_querystarts_sense); Intlist_free(&left_querystarts_antisense);
-      Uintlist_free(&left_ambcoords_sense); Uintlist_free(&left_ambcoords_antisense);
-      Intlist_free(&left_amb_knowni_sense); Intlist_free(&left_amb_knowni_antisense);
-      Intlist_free(&left_amb_nmismatchesi_sense); Intlist_free(&left_amb_nmismatchesi_antisense);
-      Intlist_free(&left_amb_nmismatchesj_sense); Intlist_free(&left_amb_nmismatchesj_antisense);
-      Doublelist_free(&left_amb_probsi_sense); Doublelist_free(&left_amb_probsi_antisense);
-      Doublelist_free(&left_amb_probsj_sense); Doublelist_free(&left_amb_probsj_antisense);
-    }
-  }
-
-  chrnum = 1;
-  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
-  for (i = 0; i < nseeds_minus; i++) {
-    if (1 /*|| scores_minus[i] > best_score - 20*/) {
-      diagonal = middle_diagonals_minus[i];
-      left = diagonal->univdiagonal;
-      if (left > chrhigh) {
-	chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
-	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
-	/* *chrhigh += 1U; */
-      }
-      middle_path_minus[i] = find_best_path(&(right_paths_minus[i]),&right_endpoints_sense,&right_endpoints_antisense,
-					    &right_queryends_sense,&right_queryends_antisense,
-					    &right_ambcoords_sense,&right_ambcoords_antisense,
-					    &right_amb_knowni_sense,&right_amb_knowni_antisense,
-					    &right_amb_nmismatchesi_sense,&right_amb_nmismatchesi_antisense,
-					    &right_amb_nmismatchesj_sense,&right_amb_nmismatchesj_antisense,
-					    &right_amb_probsi_sense,&right_amb_probsi_antisense,
-					    &right_amb_probsj_sense,&right_amb_probsj_antisense,
-					    &(left_paths_minus[i]),&left_endpoints_sense,&left_endpoints_antisense,
-					    &left_querystarts_sense,&left_querystarts_antisense,
-					    &left_ambcoords_sense,&left_ambcoords_antisense,
-					    &left_amb_knowni_sense,&left_amb_knowni_antisense,
-					    &left_amb_nmismatchesi_sense,&left_amb_nmismatchesi_antisense,
-					    &left_amb_nmismatchesj_sense,&left_amb_nmismatchesj_antisense,
-					    &left_amb_probsi_sense,&left_amb_probsi_antisense,
-					    &left_amb_probsj_sense,&left_amb_probsj_antisense,
-					    &(fillin_diagonals_minus[i]),diagonal,best_right_diagonals_minus[i],best_left_diagonals_minus[i],
-					    querylength,query_compress_rev,chroffset,
-					    /*plusp*/false,genestrand,/*nmismatches_allowed*/nmisses_allowed);
-      
-      hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_minus[i],
-				right_endpoints_sense,right_endpoints_antisense,
-				right_queryends_sense,right_queryends_antisense,
-				right_ambcoords_sense,right_ambcoords_antisense,
-				right_amb_knowni_sense,right_amb_knowni_antisense,
-				right_amb_nmismatchesi_sense,right_amb_nmismatchesi_antisense,
-				right_amb_nmismatchesj_sense,right_amb_nmismatchesj_antisense,
-				right_amb_probsi_sense,right_amb_probsi_antisense,
-				right_amb_probsj_sense,right_amb_probsj_antisense,
-
-				left_endpoints_sense,left_endpoints_antisense,
-				left_querystarts_sense,left_querystarts_antisense,
-				left_ambcoords_sense,left_ambcoords_antisense,
-				left_amb_knowni_sense,left_amb_knowni_antisense,
-				left_amb_nmismatchesi_sense,left_amb_nmismatchesi_antisense,
-				left_amb_nmismatchesj_sense,left_amb_nmismatchesj_antisense,
-				left_amb_probsi_sense,left_amb_probsi_antisense,
-				left_amb_probsj_sense,left_amb_probsj_antisense,
-				
-				chrnum,chroffset,chrhigh,chrlength,
-				querylength,query_compress_rev,/*plusp*/false,genestrand,
-				/*max_mismatches_allowed*/nmisses_allowed);
-
-#if 0
-      if (0 && completep == false) {
-	*sarray_gmap = run_gmap_minus(*sarray_gmap,middle_path_minus[i],/*start_paths*/right_paths_minus[i],/*end_paths*/left_paths_minus[i],
-				      chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength,
-				      genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
-				      oligoindices_minor,diagpool,cellpool);
-      }
-#endif
-
-      Intlist_free(&right_endpoints_sense); Intlist_free(&right_endpoints_antisense);
-      Intlist_free(&right_queryends_sense); Intlist_free(&right_queryends_antisense);
-      Uintlist_free(&right_ambcoords_sense); Uintlist_free(&right_ambcoords_antisense);
-      Intlist_free(&right_amb_knowni_sense); Intlist_free(&right_amb_knowni_antisense);
-      Intlist_free(&right_amb_nmismatchesi_sense); Intlist_free(&right_amb_nmismatchesi_antisense);
-      Intlist_free(&right_amb_nmismatchesj_sense); Intlist_free(&right_amb_nmismatchesj_antisense);
-      Doublelist_free(&right_amb_probsi_sense); Doublelist_free(&right_amb_probsi_antisense);
-      Doublelist_free(&right_amb_probsj_sense); Doublelist_free(&right_amb_probsj_antisense);
-
-      Intlist_free(&left_endpoints_sense); Intlist_free(&left_endpoints_antisense);
-      Intlist_free(&left_querystarts_sense); Intlist_free(&left_querystarts_antisense);
-      Uintlist_free(&left_ambcoords_sense); Uintlist_free(&left_ambcoords_antisense);
-      Intlist_free(&left_amb_knowni_sense); Intlist_free(&left_amb_knowni_antisense);
-      Intlist_free(&left_amb_nmismatchesi_sense); Intlist_free(&left_amb_nmismatchesi_antisense);
-      Intlist_free(&left_amb_nmismatchesj_sense); Intlist_free(&left_amb_nmismatchesj_antisense);
-      Doublelist_free(&left_amb_probsi_sense); Doublelist_free(&left_amb_probsi_antisense);
-      Doublelist_free(&left_amb_probsj_sense); Doublelist_free(&left_amb_probsj_antisense);
-
-    }
-  }
-
-
-#if 0
-  /* Salvage using gmap */
-  chrnum = 1;
-  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
-  for (i = 0; i < nseeds_plus; i++) {
-    if (incomplete_result_p(middle_path_plus[i],querylength) == true) {
-      left = best_plus_elt->positions[i];
-      if (left > chrhigh) {
-	chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
-	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
-	/* chrhigh += 1U; */
-      }
-      *sarray_gmap = run_gmap_plus(*sarray_gmap,middle_path_plus[i],/*start_paths*/left_paths_plus[i],/*end_paths*/right_paths_plus[i],
-				   chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength,
-				   genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
-				   oligoindices_minor,diagpool,cellpool);
-    }
-  }
-
-  chrnum = 1;
-  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
-  for (i = 0; i < nseeds_minus; i++) {
-    if (incomplete_result_p(middle_path_minus[i],querylength) == true) {
-      left = best_minus_elt->positions[i];
-      if (left > chrhigh) {
-	chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
-	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
-	/* chrhigh += 1U; */
-      }
-      *sarray_gmap = run_gmap_minus(*sarray_gmap,middle_path_minus[i],/*start_paths*/right_paths_minus[i],/*end_paths*/left_paths_minus[i],
-				    chrnum,chroffset,chrhigh,chrlength,queryuc_ptr,querylength,
-				    genestrand,first_read_p,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
-				    oligoindices_minor,diagpool,cellpool);
-    }
-  }
-#endif
-
-
-  if (nseeds_minus > 0) {
-    FREE(scores_minus);
-    for (i = 0; i < nseeds_minus; i++) {
-      for (p = right_paths_minus[i]; p != NULL; p = List_next(p)) {
-	diagonal_path = (List_T) List_head(p);
-	List_free(&diagonal_path);
-      }
-      for (p = left_paths_minus[i]; p != NULL; p = List_next(p)) {
-	diagonal_path = (List_T) List_head(p);
-	List_free(&diagonal_path);
-      }
-      List_free(&(middle_path_minus[i]));
-      List_free(&(left_paths_minus[i]));
-      List_free(&(right_paths_minus[i]));
-	     
-
-      Univdiag_free(&(middle_diagonals_minus[i]));
-      List_free(&(best_right_diagonals_minus[i]));
-      List_free(&(best_left_diagonals_minus[i]));
-      Univdiag_gc(&(all_right_diagonals_minus[i]));
-      Univdiag_gc(&(all_left_diagonals_minus[i]));
-      Univdiag_gc(&(fillin_diagonals_minus[i]));
-    }
-    FREE(middle_diagonals_minus);
-    FREE(best_right_diagonals_minus);
-    FREE(best_left_diagonals_minus);
-    FREE(all_right_diagonals_minus);
-    FREE(all_left_diagonals_minus);
-    FREE(fillin_diagonals_minus);
-
-    FREE(middle_path_minus);
-    FREE(right_paths_minus);
-    FREE(left_paths_minus);
-  }
-
-  if (nseeds_plus > 0) {
-    FREE(scores_plus);
-    for (i = 0; i < nseeds_plus; i++) {
-      for (p = right_paths_plus[i]; p != NULL; p = List_next(p)) {
-	diagonal_path = (List_T) List_head(p);
-	List_free(&diagonal_path);
-      }
-      for (p = left_paths_plus[i]; p != NULL; p = List_next(p)) {
-	diagonal_path = (List_T) List_head(p);
-	List_free(&diagonal_path);
-      }
-      List_free(&(middle_path_plus[i]));
-      List_free(&(left_paths_plus[i]));
-      List_free(&(right_paths_plus[i]));
-
-      Univdiag_free(&(middle_diagonals_plus[i]));
-      List_free(&(best_right_diagonals_plus[i]));
-      List_free(&(best_left_diagonals_plus[i]));
-      Univdiag_gc(&(all_right_diagonals_plus[i]));
-      Univdiag_gc(&(all_left_diagonals_plus[i]));
-      Univdiag_gc(&(fillin_diagonals_plus[i]));
-    }
-    FREE(middle_diagonals_plus);
-    FREE(best_right_diagonals_plus);
-    FREE(best_left_diagonals_plus);
-    FREE(all_right_diagonals_plus);
-    FREE(all_left_diagonals_plus);
-    FREE(fillin_diagonals_plus);
-
-    FREE(middle_path_plus);
-    FREE(right_paths_plus);
-    FREE(left_paths_plus);
-  }
-
-  List_free(&leftward_set_minus);
-  List_free(&rightward_set_minus);
-  List_free(&leftward_set_plus);
-  List_free(&rightward_set_plus);
-
-  for (p = plus_set; p != NULL; p = p->rest) {
-    elt = (Elt_T) p->first;
-    Elt_free(&elt);
-  }
-  List_free(&plus_set);
-
-  for (p = minus_set; p != NULL; p = p->rest) {
-    elt = (Elt_T) p->first;
-    Elt_free(&elt);
-  }
-  List_free(&minus_set);
-
-  debug(printf("Found %d hits\n",List_length(hits)));
-
-  return hits;
-}
 
diff --git a/src/sarray-read.h b/src/sarray-read.h
index b799ee0..953e7d3 100644
--- a/src/sarray-read.h
+++ b/src/sarray-read.h
@@ -1,14 +1,10 @@
-/* $Id: sarray-read.h 184464 2016-02-18 00:09:13Z twu $ */
+/* $Id: sarray-read.h 207324 2017-06-14 19:41:18Z twu $ */
 #ifndef SARRAY_READ_INCLUDED
 #define SARRAY_READ_INCLUDED
 #include "access.h"
 #include "bool.h"
 #include "mode.h"
-#include "genome.h"
 #include "compress.h"
-#include "genomicpos.h"
-#include "splicetrie.h"
-#include "iit-read-univ.h"
 
 
 #define T Sarray_T
@@ -18,30 +14,33 @@ typedef struct T *T;
 Univcoord_T
 Sarray_size (Sarray_T this);
 
-extern void
-Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
-	      Univ_IIT_T chromosome_iit_in, int circular_typeint_in, bool *circularp_in,
-	      Chrpos_T shortsplicedist_in, int splicing_penalty_in,
-	      int max_deletionlength, int max_end_deletions,
-	      int max_middle_insertions_in, int max_end_insertions,
-	      Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
-	      Chrpos_T *splicedists_in, int nsplicesites_in);
-
 #if 0
 extern void
 Sarray_shmem_remove (char *dir, char *fileroot, char *snps_root, Mode_T mode, bool fwdp);
 #endif
 
+extern Univcoord_T *
+Sarray_array (T this);
+
+extern Univcoord_T
+Sarray_position (T sarray, Sarrayptr_T i);
+
 extern T
 Sarray_new (char *dir, char *fileroot, Access_mode_T sarray_access, Access_mode_T lcp_access,
 	    Access_mode_T guideexc_access, Access_mode_T indexij_access, bool sharedp, Mode_T mode, bool fwdp);
 extern void
 Sarray_free (T *old);
 
-extern List_T
-Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int querylength,
-		      Compress_T query_compress_fwd, Compress_T query_compress_rev,
-		      int nmisses_allowed, int genestrand);
+extern void
+Sarray_read (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
+	     UINT4 *nmatches, char *query, UINT4 querylength, int queryoffset,
+	     Compress_T query_compress, T sarray, bool plusp, int genestrand,
+	     char conversion[]);
+
+extern Univcoord_T *
+Sarray_lookup (int *nhits, T sarray, char *query, UINT4 querylength, int queryoffset,
+	       Compress_T query_compress, bool plusp, int genestrand,
+	       char conversion[]);
 
 #undef T
 #endif
diff --git a/src/sarray-read.c b/src/sarray-search.c
similarity index 82%
copy from src/sarray-read.c
copy to src/sarray-search.c
index c69089f..a614cf4 100644
--- a/src/sarray-read.c
+++ b/src/sarray-search.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
+static char rcsid[] = "$Id: sarray-search.c 209125 2017-08-15 19:33:55Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -6,7 +6,7 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #define memcpy(d,s,n) bcopy((s),(d),(n))
 #endif
 
-#include "sarray-read.h"
+#include "sarray-search.h"
 
 #ifdef WORDS_BIGENDIAN
 #define CONVERT(x) Bigendian_convert_uint(x)
@@ -31,11 +31,9 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #include "genome128_hr.h"
 #include "splice.h"
 #include "indel.h"
+#include "intron.h"
+#include "maxent_hr.h"
 #include "stage3hr.h"
-#include "bytecoding.h"
-#include "bitpack64-read.h"
-#include "bitpack64-readtwo.h"
-#include "bitpack64-access.h"
 
 #include "comp.h"
 #include "diagdef.h"
@@ -47,7 +45,6 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #include "stage3hr.h"
 #include "sedgesort.h"
 
-
 #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2)
 #else
 #include <emmintrin.h>
@@ -60,6 +57,11 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #else
 #include <immintrin.h>
 #endif
+#if defined(WORDS_BIGENDIAN) || !defined(HAVE_AVX512)
+#else
+#include <immintrin.h>
+#endif
+
 
 #if !defined(HAVE_SSE4_2)
 /* Skip popcnt */
@@ -70,9 +72,14 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #endif
 
 
+
+/* Sedgesort giving errors on Intel compiler */
+#define USE_QSORT 1
+
 #define MIN_ENDLENGTH 12
 #define MIN_INTRONLEN 9
 
+/* Some limit is needed to prevent GSNAP from running very slowly */
 #define MAX_HITS_FOR_BEST_ELT 1000
 
 /* A value of 10000 misses various splices, although they are caught by GSNAP algorithm */
@@ -87,6 +94,7 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 
 /* #define USE_SEPARATE_BUCKETS 1 */
 
+
 /* Results of each suffix array search */
 #ifdef DEBUG
 #define debug(x) x
@@ -94,50 +102,6 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #define debug(x)
 #endif
 
-#define MAX_DEBUG1_HITS 100
-
-/* Details of suffix array search */
-#ifdef DEBUG1
-#define debug1(x) x
-#else
-#define debug1(x)
-#endif
-
-/* Search through saindex */
-#ifdef DEBUG1A
-#define debug1a(x) x
-#else
-#define debug1a(x)
-#endif
-
-/* get_child */
-#ifdef DEBUG2
-#define debug2(x) x
-#else
-#define debug2(x)
-#endif
-
-/* Compressed suffix array */
-#ifdef DEBUG3
-#define debug3(x) x
-#else
-#define debug3(x)
-#endif
-
-/* Compressed suffix array: comparison with sarray */
-#ifdef DEBUG3A
-#define debug3a(x) x
-#else
-#define debug3a(x)
-#endif
-
-/* Compressed suffix array: comparison with csa phi */
-#ifdef DEBUG3B
-#define debug3b(x) x
-#else
-#define debug3b(x)
-#endif
-
 /* known splicing */
 #ifdef DEBUG4S
 #define debug4s(x) x
@@ -202,13 +166,8 @@ static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $";
 #define debug14(x)
 #endif
 
-/* Compare separate buckets with a single one */
-#ifdef DEBUG15
-#define debug15(x) x
-#else
-#define debug15(x)
-#endif
 
+#define T Sarray_T
 
 #ifdef DEBUG7B
 static void
@@ -229,111 +188,50 @@ print_vector_uint (__m128i x) {
   return;
 }
 
-#ifdef HAVE_AVX2
+#ifdef HAVE_AVX512
 static void
-print_vector_hex_256 (__m256i x) {
+print_vector_hex_512 (__m512i x) {
   UINT4 *s = (UINT4 *) &x;
 
   /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */
-  printf("%08X %08X %08X %08X %08X %08X %08X %08X\n",s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]);
+  printf("%08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n",
+	 s[15],s[14],s[13],s[12],s[11],s[10],s[9],s[8],s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]);
   return;
 }
 
 static void
-print_vector_uint_256 (__m256i x) {
+print_vector_uint_512 (__m512i x) {
   UINT4 *s = (UINT4 *) &x;
 
   /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */
-  printf("%u %u %u %u %u %u %u %u\n",s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]);
+  printf("%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n",
+	 s[15],s[14],s[13],s[12],s[11],s[10],s[9],s[8],s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]);
   return;
 }
 #endif
-#endif
 
 
+#ifdef HAVE_AVX2
+static void
+print_vector_hex_256 (__m256i x) {
+  UINT4 *s = (UINT4 *) &x;
 
-#define T Sarray_T
-struct T {
-  Univcoord_T n;
-  Univcoord_T n_plus_one;
-
-  /* Old format */
-  int array_shmid;
-  key_t array_key;
-  Univcoord_T *array;
-
-  int lcpchilddc_shmid;
-  key_t lcpchilddc_key;
-  unsigned char *lcpchilddc;
-
-  int lcp_guide_shmid;
-  key_t lcp_guide_key;
-  int lcp_exceptions_shmid;
-  key_t lcp_exceptions_key;
-  UINT4 *lcp_guide;
-  UINT4 *lcp_exceptions;
-  int n_lcp_exceptions;		/* Won't be necessary if we change lcpchilddc to use guide array */
-  /* int lcp_guide_interval; -- Always use 1024 */
-  
-  int child_guide_shmid;
-  key_t child_guide_key;
-  int child_exceptions_shmid;
-  key_t child_exceptions_key;
-  UINT4 *child_guide;
-  UINT4 *child_exceptions;
-  /* int n_child_exceptions; */
-  int child_guide_interval; /* Always use 1024 */
+  /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */
+  printf("%08X %08X %08X %08X %08X %08X %08X %08X\n",s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]);
+  return;
+}
 
-#if 0
-  Sarrayptr_T initindexi[4];	/* For A, C, G, T */
-  Sarrayptr_T initindexj[4];	/* For A, C, G, T */
-#endif
+static void
+print_vector_uint_256 (__m256i x) {
+  UINT4 *s = (UINT4 *) &x;
 
-  int indexsize;
-  UINT4 indexspace;		/* 4^indexsize.  Used by sarray_search to detect when we have a poly-T oligo shorter than indexsize */
-#ifdef DEBUG15
-  UINT4 *indexi_ptrs, *indexi_comp, *indexj_ptrs, *indexj_comp; /* bucket array: oligomer lookup into suffix array */
-  UINT4 *indexij_ptrs, *indexij_comp;
-#elif defined(USE_SEPARATE_BUCKETS)
-  UINT4 *indexi_ptrs, *indexi_comp, *indexj_ptrs, *indexj_comp; /* bucket array: oligomer lookup into suffix array */
-#else
-  int indexij_ptrs_shmid;
-  key_t indexij_ptrs_key;
-  int indexij_comp_shmid;
-  key_t indexij_comp_key;
-  UINT4 *indexij_ptrs, *indexij_comp;
-#endif
-
-  Access_T array_access; int array_fd; size_t array_len;
-
-#ifdef DEBUG15
-  int indexi_ptrs_fd; size_t indexi_ptrs_len; int indexi_comp_fd; size_t indexi_comp_len;
-  int indexj_ptrs_fd; size_t indexj_ptrs_len; int indexj_comp_fd; size_t indexj_comp_len;
-  int indexij_ptrs_fd; size_t indexij_ptrs_len; int indexij_comp_fd; size_t indexij_comp_len;
-#elif defined(USE_SEPARATE_BUCKETS)
-  int indexi_ptrs_fd; size_t indexi_ptrs_len; int indexi_comp_fd; size_t indexi_comp_len;
-  int indexj_ptrs_fd; size_t indexj_ptrs_len; int indexj_comp_fd; size_t indexj_comp_len;
-#else
-  Access_T indexij_ptrs_access; int indexij_ptrs_fd; size_t indexij_ptrs_len;
-  Access_T indexij_comp_access; int indexij_comp_fd; size_t indexij_comp_len;
+  /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */
+  printf("%u %u %u %u %u %u %u %u\n",s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]);
+  return;
+}
+#endif
 #endif
 
-  Access_T lcpchilddc_access; int lcpchilddc_fd; size_t lcpchilddc_len;
-
-  Access_T lcp_guide_access; int lcp_guide_fd; size_t lcp_guide_len;
-  Access_T lcp_exceptions_access; int lcp_exceptions_fd; size_t lcp_exceptions_len;
-
-  Access_T child_guide_access; int child_guide_fd; size_t child_guide_len;
-  Access_T child_exceptions_access; int child_exceptions_fd; size_t child_exceptions_len;
-
-};
-
-
-/* For benchmarking */
-Univcoord_T
-Sarray_size (Sarray_T this) {
-  return this->n_plus_one;
-}
 
 
 static Sarray_T sarray_fwd;
@@ -350,6 +248,7 @@ static int splicing_penalty;
 
 static Chrpos_T overall_max_distance;
 static Chrpos_T shortsplicedist;
+static Chrpos_T min_intronlength;
 static Chrpos_T max_deletionlen;
 static Chrpos_T max_insertionlen_default;
 static int max_end_deletions;
@@ -362,6 +261,7 @@ static Chrpos_T *splicedists;
 static int nsplicesites;
 
 
+
 #if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
 static __m256i epi32_convert_256;	/* For converting unsigned ints to signed ints */
 #endif
@@ -375,83 +275,14 @@ static __m128i shuffle_mask16[16];
 #endif
 
 
-#if 0
-/* Simplified from sarray_search_simple in sarray-write.c */
-static void
-sarray_search_char (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, char desired_char,
-		    UINT4 *SA, UINT4 n, char *chartable) {
-  Sarrayptr_T low, high, mid;
-  Univcoord_T pos;
-  char c;
-
-  low = 1;
-  high = n + 1;
-
-  while (low < high) {
-#if 0
-    /* Compute mid for unsigned ints.  Want floor((low+high)/2). */
-    mid = low/2 + high/2;
-    if (low % 2 == 1 && high % 2 == 1) {
-      mid += 1;
-    }
-#else
-    mid = low + ((high - low) / 2);
-#endif
-#ifdef WORDS_BIGENDIAN
-    pos = Bigendian_convert_uint(SA[mid]);
-#else
-    pos = SA[mid];
-#endif
-    c = Genome_get_char_lex(genome,pos,n,chartable);
-    if (desired_char > c) {
-      low = mid + 1;
-    } else {
-      high = mid;
-    }
-  }
-
-  *initptr = low;
-
-  low--;
-  high = n;
-  while (low < high) {
-#if 1
-    /* Compute mid for unsigned ints.  Want ceil((low+high)/2). */
-    mid = low/2 + high/2;
-    if (low % 2 == 1 || high % 2 == 1) {
-      mid += 1;
-    }
-#else
-    /* This does not work for ceiling */
-    mid = low + ((high - low) / 2);
-#endif
-#ifdef WORDS_BIGENDIAN
-    pos = Bigendian_convert_uint(SA[mid]);
-#else
-    pos = SA[mid];
-#endif
-    c = Genome_get_char_lex(genome,pos,n,chartable);
-    if (desired_char >= c) {
-      low = mid;
-    } else {
-      high = mid - 1;
-    }
-  }
-
-  *finalptr = high;
-  return;
-}
-#endif
-
-
 void
-Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
-	      Univ_IIT_T chromosome_iit_in, int circular_typeint_in, bool *circularp_in,
-	      Chrpos_T shortsplicedist_in, int splicing_penalty_in,
-	      int max_deletionlength, int max_end_deletions_in,
-	      int max_middle_insertions_in, int max_end_insertions,
-	      Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
-	      Chrpos_T *splicedists_in, int nsplicesites_in) {
+Sarray_search_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
+		     Univ_IIT_T chromosome_iit_in, int circular_typeint_in, bool *circularp_in,
+		     Chrpos_T shortsplicedist_in, int splicing_penalty_in,
+		     int min_intronlength_in, int max_deletionlength, int max_end_deletions_in,
+		     int max_middle_insertions_in, int max_end_insertions,
+		     Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
+		     Chrpos_T *splicedists_in, int nsplicesites_in) {
   int i;
 
   sarray_fwd = sarray_fwd_in;
@@ -481,6 +312,7 @@ Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
   shortsplicedist = shortsplicedist_in;
   splicing_penalty = splicing_penalty_in;
 
+  min_intronlength = min_intronlength_in;
   max_deletionlen = max_deletionlength;
   max_end_deletions = max_end_deletions_in;
   max_middle_insertions_default = max_middle_insertions_in;
@@ -506,1405 +338,48 @@ Sarray_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
   sarray_search_char(&(sarray->initindexi[1]),&(sarray->initindexj[1]),/*desired_char*/'C',sarray->array,sarray->n);
   sarray_search_char(&(sarray->initindexi[2]),&(sarray->initindexj[2]),/*desired_char*/'G',sarray->array,sarray->n);
   sarray_search_char(&(sarray->initindexi[3]),&(sarray->initindexj[3]),/*desired_char*/'T',sarray->array,sarray->n);
-#endif
-
-#if 0
-  printf("A => %u %u\n",sarray->initindexi[0],sarray->initindexj[0]);
-  printf("C => %u %u\n",sarray->initindexi[1],sarray->initindexj[1]);
-  printf("G => %u %u\n",sarray->initindexi[2],sarray->initindexj[2]);
-  printf("T => %u %u\n",sarray->initindexi[3],sarray->initindexj[3]);
-#endif
-
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-  epi32_convert = _mm_set1_epi32(2147483648); /* 2^31 */
-#endif
-
-#if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
-  epi32_convert_256 = _mm256_set1_epi32(2147483648); /* 2^31 */
-#endif
-
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN) && defined(USE_SHUFFLE_MASK)
-  /* Used by fill_positions_filtered_first */
-  shuffle_mask16[0] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1);
-  shuffle_mask16[1] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  3, 2, 1, 0);
-  shuffle_mask16[2] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  7, 6, 5, 4);
-  shuffle_mask16[3] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1,  7, 6, 5, 4,  3, 2, 1, 0);
-  shuffle_mask16[4] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8);
-  shuffle_mask16[5] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8,  3, 2, 1, 0);
-  shuffle_mask16[6] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8,  7, 6, 5, 4);
-  shuffle_mask16[7] =  _mm_set_epi8(-1,-1,-1,-1, 11,10, 9, 8,  7, 6, 5, 4,  3, 2, 1, 0);
-  shuffle_mask16[8] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12);
-  shuffle_mask16[9] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12,  3, 2, 1, 0);
-  shuffle_mask16[10] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12,  7, 6, 5, 4);
-  shuffle_mask16[11] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12,  7, 6, 5, 4,  3, 2, 1, 0);
-  shuffle_mask16[12] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8);
-  shuffle_mask16[13] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8,  3, 2, 1, 0);
-  shuffle_mask16[14] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8,  7, 6, 5, 4);
-  shuffle_mask16[15] = _mm_set_epi8(15,14,13,12, 11,10, 9, 8,  7, 6, 5, 4,  3, 2, 1, 0);
-#endif
-  
-  return;
-}
-
-
-static int
-log4 (int result) {
-  int exponent = 0;
-
-  while (result > 1) {
-    result /= 4;
-    exponent++;
-  }
-
-  return exponent;
-}
-
-static UINT4
-power (int base, int exponent) {
-  UINT4 result = 1;
-  int i;
-
-  for (i = 0; i < exponent; i++) {
-    result *= base;
-  }
-
-  return result;
-}
-
-
-#if 0
-void
-Sarray_shmem_remove (char *dir, char *fileroot, char *snps_root, Mode_T mode, bool fwdp) {
-  char *mode_prefix;
-  char *sarrayfile;
-  char *lcpchilddcfile;
-  char *lcp_guidefile, *lcp_exceptionsfile;
-  char *child_guidefile, *child_exceptionsfile;
-  char *indexij_ptrsfile, *indexij_compfile;
-
-  if (mode == STANDARD) {
-    mode_prefix = ".";
-  } else if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
-    if (fwdp == true) {
-      mode_prefix = ".metct.";
-    } else {
-      mode_prefix = ".metga.";
-    }
-  } else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
-    if (fwdp == true) {
-      mode_prefix = ".a2iag.";
-    } else {
-      mode_prefix = ".a2itc.";
-    }
-  } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
-    if (fwdp == true) {
-      mode_prefix = ".a2itc.";
-    } else {
-      mode_prefix = ".a2iag.";
-    }
-  }
-
-  sarrayfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sarray")+1,sizeof(char));
-  sprintf(sarrayfile,"%s/%s%ssarray",dir,fileroot,mode_prefix);
-
-  lcpchilddcfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpchilddc")+1,sizeof(char));
-  sprintf(lcpchilddcfile,"%s/%s%ssalcpchilddc",dir,fileroot,mode_prefix);
-
-  lcp_guidefile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpguide1024")+1,sizeof(char));
-  sprintf(lcp_guidefile,"%s/%s%ssalcpguide1024",dir,fileroot,mode_prefix);
-  lcp_exceptionsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpexc")+1,sizeof(char));
-  sprintf(lcp_exceptionsfile,"%s/%s%ssalcpexc",dir,fileroot,mode_prefix);
-
-  child_guidefile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sachildguide1024")+1,sizeof(char));
-  sprintf(child_guidefile,"%s/%s%ssachildguide1024",dir,fileroot,mode_prefix);
-  child_exceptionsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sachildexc")+1,sizeof(char));
-  sprintf(child_exceptionsfile,"%s/%s%ssachildexc",dir,fileroot,mode_prefix);
-
-  indexij_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("saindex64meta")+1,sizeof(char));
-  sprintf(indexij_ptrsfile,"%s/%s%ssaindex64meta",dir,fileroot,mode_prefix);
-  indexij_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("saindex64strm")+1,sizeof(char));
-  sprintf(indexij_compfile,"%s/%s%ssaindex64strm",dir,fileroot,mode_prefix);
-
-  Access_shmem_remove(indexij_ptrsfile);
-  Access_shmem_remove(indexij_compfile);
-
-  Access_shmem_remove(sarrayfile);
-  Access_shmem_remove(lcpchilddcfile);
-  Access_shmem_remove(lcp_guidefile);
-  Access_shmem_remove(lcp_exceptionsfile);
-
-  Access_shmem_remove(child_guidefile);
-  Access_shmem_remove(child_exceptionsfile);
-
-  FREE(child_exceptionsfile);
-  FREE(child_guidefile);
-
-  FREE(lcp_exceptionsfile);
-  FREE(lcp_guidefile);
-
-  FREE(lcpchilddcfile);
-
-  FREE(sarrayfile);
-
-  return;
-}
-#endif
-
-
-#ifdef USE_CSA
-
-static Univcoord_T
-csa_lookup (T sarray, Sarrayptr_T i) {
-  Univcoord_T nhops = 0, expected_sa_i;
-  Sarrayptr_T expected_i;
-  __m128i converted, cmp;
-  int matchbits;
-
-  debug3(printf("Entered csa_lookup for %u:",i));
-#ifdef DEBUG3A
-  expected_sa_i = sarray->array[i];
-#endif
-
-  if (
-#ifdef DEBUG3A
-      0 && 
-#endif
-      sarray->array != NULL) {
-    debug3(printf("Returning %u\n",sarray->array[i]));
-    return sarray->array[i];
-  } else {
-    while ((i % sarray->sa_sampling) != 0) {
-      debug3(printf(",%u",i));
-#ifdef DEBUG3B
-      expected_i = sarray->csa[i];
-#endif
-
-#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
-      converted = _mm_sub_epi32(_mm_set1_epi32(i),epi32_convert);
-      cmp = _mm_cmpgt_epi32(converted,sarray->indices0); /* To use cmpgt, sarray->indices0 is shifted down by 1 */
-      matchbits = _mm_movemask_ps(_mm_castsi128_ps(cmp));
-      /* assert(matchbits == 0 || matchbits == 1 || matchbits == 3 || matchbits == 7 || matchbits == 15); */
-      debug3(printf("(%d)",matchbits));
-      i = Bitpack64_read_one(i - sarray->index0[matchbits],sarray->csa0ptrs[matchbits],sarray->csa0comp[matchbits]);
-#else
-      if (i >= sarray->indexX) {
-	assert(matchbits == 15);
-	printf("X");
-	i = Bitpack64_read_one(i-sarray->indexX,sarray->csaXptrs,sarray->csaXcomp);
-      } else if (i >= sarray->indexT) {
-	assert(matchbits == 7);
-	printf("T");
-	i = Bitpack64_read_one(i-sarray->indexT,sarray->csaTptrs,sarray->csaTcomp);
-      } else if (i >= sarray->indexG) {
-	assert(matchbits == 3);
-	printf("G");
-	i = Bitpack64_read_one(i-sarray->indexG,sarray->csaGptrs,sarray->csaGcomp);
-      } else if (i >= sarray->indexC) {
-	assert(matchbits == 1);
-	printf("C");
-	i = Bitpack64_read_one(i-sarray->indexC,sarray->csaCptrs,sarray->csaCcomp);
-      } else {
-	assert(matchbits == 0);
-	printf("A");
-	i = Bitpack64_read_one(i-sarray->indexA,sarray->csaAptrs,sarray->csaAcomp);
-      }
-#endif
-
-      debug3b(assert(i == expected_i));
-      nhops += 1;
-    }
-
-    debug3(printf("\n"));
-    debug3(printf("Returning %u = %u - nhops %u\n",
-		   sarray->array_samples[i/sarray->sa_sampling] - nhops,
-		   sarray->array_samples[i/sarray->sa_sampling],nhops));
-    
-    debug3a(assert(sarray->array_samples[i/sarray->sa_sampling] - nhops == expected_sa_i));
-
-    return sarray->array_samples[i/sarray->sa_sampling] - nhops;
-  }
-}
-
-#elif defined(WORDS_BIGENDIAN)
-
-#define csa_lookup(sarray,i) Bigendian_convert_uint(sarray->array[i])
-
-#else
-
-#define csa_lookup(sarray,i) sarray->array[i]
-
-#endif
-
-
-T
-Sarray_new (char *dir, char *fileroot, Access_mode_T sarray_access, Access_mode_T lcp_access,
-	    Access_mode_T guideexc_access, Access_mode_T indexij_access, bool sharedp, Mode_T mode, bool fwdp) {
-  T new;
-  char *comma1;
-  double seconds;
-  int npages;
-
-  bool old_format_p;
-  char *sarrayfile;		/* Old format */
-
-  char *lcpchilddcfile;
-  char *lcp_guidefile, *lcp_exceptionsfile;
-  char *child_guidefile, *child_exceptionsfile;
-#ifdef DEBUG15
-  char *indexi_ptrsfile, *indexi_compfile;
-  char *indexj_ptrsfile, *indexj_compfile;
-  char *indexij_ptrsfile, *indexij_compfile;
-#elif defined(USE_SEPARATE_BUCKETS)
-  char *indexi_ptrsfile, *indexi_compfile;
-  char *indexj_ptrsfile, *indexj_compfile;
-#else
-  char *indexij_ptrsfile, *indexij_compfile;
-#endif
-
-  char *mode_prefix;
-
-  if (mode == STANDARD) {
-    mode_prefix = ".";
-  } else if (mode == CMET_STRANDED || mode == CMET_NONSTRANDED) {
-    if (fwdp == true) {
-      mode_prefix = ".metct.";
-    } else {
-      mode_prefix = ".metga.";
-    }
-  } else if (mode == ATOI_STRANDED || mode == ATOI_NONSTRANDED) {
-    if (fwdp == true) {
-      mode_prefix = ".a2iag.";
-    } else {
-      mode_prefix = ".a2itc.";
-    }
-  } else if (mode == TTOC_STRANDED || mode == TTOC_NONSTRANDED) {
-    if (fwdp == true) {
-      mode_prefix = ".a2itc.";
-    } else {
-      mode_prefix = ".a2iag.";
-    }
-  }
-
-  /* Old format */
-  sarrayfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sarray")+1,sizeof(char));
-  sprintf(sarrayfile,"%s/%s%ssarray",dir,fileroot,mode_prefix);
-
-  lcpchilddcfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpchilddc")+1,sizeof(char));
-  sprintf(lcpchilddcfile,"%s/%s%ssalcpchilddc",dir,fileroot,mode_prefix);
-
-  lcp_guidefile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpguide1024")+1,sizeof(char));
-  sprintf(lcp_guidefile,"%s/%s%ssalcpguide1024",dir,fileroot,mode_prefix);
-  lcp_exceptionsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("salcpexc")+1,sizeof(char));
-  sprintf(lcp_exceptionsfile,"%s/%s%ssalcpexc",dir,fileroot,mode_prefix);
-
-  child_guidefile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sachildguide1024")+1,sizeof(char));
-  sprintf(child_guidefile,"%s/%s%ssachildguide1024",dir,fileroot,mode_prefix);
-  child_exceptionsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("sachildexc")+1,sizeof(char));
-  sprintf(child_exceptionsfile,"%s/%s%ssachildexc",dir,fileroot,mode_prefix);
-
-#ifdef DEBUG15
-  indexi_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindexi64meta")+1,sizeof(char));
-  sprintf(indexi_ptrsfile,"%s/%s%ssaindexi64meta",dir,fileroot,mode_prefix);
-  indexi_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindexi64strm")+1,sizeof(char));
-  sprintf(indexi_compfile,"%s/%s%ssaindexi64strm",dir,fileroot,mode_prefix);
-  indexj_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindexj64meta")+1,sizeof(char));
-  sprintf(indexj_ptrsfile,"%s/%s%ssaindexj64meta",dir,fileroot,mode_prefix);
-  indexj_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindexj64strm")+1,sizeof(char));
-  sprintf(indexj_compfile,"%s/%s%ssaindexj64strm",dir,fileroot,mode_prefix);
-  indexij_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindex64meta")+1,sizeof(char));
-  sprintf(indexij_ptrsfile,"%s/%s%ssaindex64meta",dir,fileroot,mode_prefix);
-  indexij_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindex64strm")+1,sizeof(char));
-  sprintf(indexij_compfile,"%s/%s%ssaindex64strm",dir,fileroot,mode_prefix);
-#elif defined(USE_SEPARATE_BUCKETS)
-  indexi_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindexi64meta")+1,sizeof(char));
-  sprintf(indexi_ptrsfile,"%s/%s%ssaindexi64meta",dir,fileroot,mode_prefix);
-  indexi_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindexi64strm")+1,sizeof(char));
-  sprintf(indexi_compfile,"%s/%s%ssaindexi64strm",dir,fileroot,mode_prefix);
-  indexj_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindexj64meta")+1,sizeof(char));
-  sprintf(indexj_ptrsfile,"%s/%s%ssaindexj64meta",dir,fileroot,mode_prefix);
-  indexj_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(".saindexj64strm")+1,sizeof(char));
-  sprintf(indexj_compfile,"%s/%s%ssaindexj64strm",dir,fileroot,mode_prefix);
-#else
-  indexij_ptrsfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("saindex64meta")+1,sizeof(char));
-  sprintf(indexij_ptrsfile,"%s/%s%ssaindex64meta",dir,fileroot,mode_prefix);
-  indexij_compfile = (char *) CALLOC(strlen(dir)+strlen("/")+strlen(fileroot)+strlen(mode_prefix)+strlen("saindex64strm")+1,sizeof(char));
-  sprintf(indexij_compfile,"%s/%s%ssaindex64strm",dir,fileroot,mode_prefix);
-#endif
-
-  if (Access_file_exists_p(sarrayfile) == false) {
-    fprintf(stderr,"No suffix array for genome\n");
-    new = (T) NULL;
-
-  } else if (Access_file_exists_p(lcpchilddcfile) == false) {
-    fprintf(stderr,"Enhanced suffix array file %s does not exist.  The genome was built using an obsolete version\n",
-	    lcpchilddcfile);
-    new = (T) NULL;
-    exit(9);
-
-  } else {
-    new = (T) MALLOC_KEEP(sizeof(*new));
-    old_format_p = true;
-
-    if (sarray_access == USE_MMAP_PRELOAD) {
-      if (old_format_p == true) {
-	fprintf(stderr,"Pre-loading suffix array...");
-	new->array = (UINT4 *) Access_mmap_and_preload(&new->array_fd,&new->array_len,&npages,&seconds,sarrayfile,
-						       sizeof(UINT4));
-	new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
-	new->n = new->n_plus_one - 1;
-
-	comma1 = Genomicpos_commafmt(new->array_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-      }
-      new->array_access = MMAPPED;
-
-    } else if (sarray_access == USE_MMAP_ONLY) {
-      if (old_format_p == true) {
-	new->array = (UINT4 *) Access_mmap(&new->array_fd,&new->array_len,sarrayfile,/*randomp*/true);
-	new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
-	new->n = new->n_plus_one - 1;
-      }
-      new->array_access = MMAPPED;
-
-    } else if (sarray_access == USE_ALLOCATE) {
-      if (old_format_p == true) {
-	fprintf(stderr,"Allocating memory for suffix array...");
-	if (sharedp == true) {
-	  new->array = (UINT4 *) Access_allocate_shared(&new->array_access,&new->array_shmid,&new->array_key,
-							&new->array_fd,&new->array_len,&seconds,sarrayfile,sizeof(UINT4));
-	} else {
-	  new->array = (UINT4 *) Access_allocate_private(&new->array_access,&new->array_len,&seconds,sarrayfile,sizeof(UINT4));
-	}
-	new->n_plus_one = new->array_len/sizeof(UINT4); /* Should be genomiclength + 1*/
-	new->n = new->n_plus_one - 1;
-	comma1 = Genomicpos_commafmt(new->array_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-      }
-    }
-
-#ifdef DEBUG15
-    /* 8 is for two DIFFERENTIAL_METAINFO_SIZE words */
-    new->indexi_ptrs = (UINT4 *) Access_allocate_private(&new->indexi_ptrs_len,&seconds,indexi_ptrsfile,sizeof(UINT4));
-    new->indexi_comp = (UINT4 *) Access_allocate_private(&new->indexi_comp_len,&seconds,indexi_compfile,sizeof(UINT4));
-    new->indexj_ptrs = (UINT4 *) Access_allocate_private(&new->indexj_ptrs_len,&seconds,indexj_ptrsfile,sizeof(UINT4));
-    new->indexj_comp = (UINT4 *) Access_allocate_private(&new->indexj_comp_len,&seconds,indexj_compfile,sizeof(UINT4));
-    new->indexij_ptrs = (UINT4 *) Access_allocate_private(&new->indexij_ptrs_len,&seconds,indexij_ptrsfile,sizeof(UINT4));
-    new->indexij_comp = (UINT4 *) Access_allocate_private(&new->indexij_comp_len,&seconds,indexij_compfile,sizeof(UINT4));
-    new->indexsize = 3 + log4(((new->indexij_ptrs_len - 8)/sizeof(UINT4)/2)/ /*DIFFERENTIAL_METAINFO_SIZE*/2);
-#elif defined(USE_SEPARATE_BUCKETS)
-    /* 8 is for two DIFFERENTIAL_METAINFO_SIZE words */
-    new->indexi_ptrs = (UINT4 *) Access_allocate_private(&new->indexi_ptrs_access,&new->indexi_ptrs_len,&seconds,indexi_ptrsfile,sizeof(UINT4));
-    new->indexi_comp = (UINT4 *) Access_allocate_private(&new->indexi_comp_access,&new->indexi_comp_len,&seconds,indexi_compfile,sizeof(UINT4));
-    new->indexj_ptrs = (UINT4 *) Access_allocate_private(&new->indexj_ptrs_access,&new->indexj_ptrs_len,&seconds,indexj_ptrsfile,sizeof(UINT4));
-    new->indexj_comp = (UINT4 *) Access_allocate_private(&new->indexj_comp_access,&new->indexj_comp_len,&seconds,indexj_compfile,sizeof(UINT4));
-    new->indexsize = 3 + log4(((new->indexi_ptrs_len - 8)/sizeof(UINT4))/ /*DIFFERENTIAL_METAINFO_SIZE*/2);
-#else
-    /* 8 is for two DIFFERENTIAL_METAINFO_SIZE words */
-    if (indexij_access == USE_MMAP_PRELOAD) {
-      fprintf(stderr,"Pre-loading indexij ptrs...");
-      new->indexij_ptrs = (UINT4 *) Access_mmap_and_preload(&new->indexij_ptrs_fd,&new->indexij_ptrs_len,&npages,&seconds,indexij_ptrsfile,
-							    sizeof(UINT4));
-      comma1 = Genomicpos_commafmt(new->indexij_ptrs_len);
-      fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-      FREE(comma1);
-
-      fprintf(stderr,"Pre-loading indexij comp...");
-      new->indexij_comp = (UINT4 *) Access_mmap_and_preload(&new->indexij_comp_fd,&new->indexij_comp_len,&npages,&seconds,indexij_compfile,
-							    sizeof(UINT4));
-      comma1 = Genomicpos_commafmt(new->indexij_comp_len);
-      fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-      FREE(comma1);
-
-      new->indexij_ptrs_access = MMAPPED;
-      new->indexij_comp_access = MMAPPED;
-
-    } else if (indexij_access == USE_MMAP_ONLY) {
-      new->indexij_ptrs = (UINT4 *) Access_mmap(&new->indexij_ptrs_fd,&new->indexij_ptrs_len,indexij_ptrsfile,/*randomp*/true);
-      new->indexij_comp = (UINT4 *) Access_mmap(&new->indexij_comp_fd,&new->indexij_comp_len,indexij_compfile,/*randomp*/true);
-
-      new->indexij_ptrs_access = MMAPPED;
-      new->indexij_comp_access = MMAPPED;
-
-    } else if (indexij_access == USE_ALLOCATE) {
-      if (sharedp == true) {
-	fprintf(stderr,"Allocating memory for indexij ptrs...");
-	new->indexij_ptrs = (UINT4 *) Access_allocate_shared(&new->indexij_ptrs_access,&new->indexij_ptrs_shmid,&new->indexij_ptrs_key,
-							     &new->indexij_ptrs_fd,&new->indexij_ptrs_len,&seconds,indexij_ptrsfile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->indexij_ptrs_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-
-	fprintf(stderr,"Allocating memory for indexij comp...");
-	new->indexij_comp = (UINT4 *) Access_allocate_shared(&new->indexij_comp_access,&new->indexij_comp_shmid,&new->indexij_comp_key,
-							     &new->indexij_comp_fd,&new->indexij_comp_len,&seconds,indexij_compfile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->indexij_comp_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-      } else {
-	fprintf(stderr,"Allocating memory for indexij ptrs...");
-	new->indexij_ptrs = (UINT4 *) Access_allocate_private(&new->indexij_ptrs_access,&new->indexij_ptrs_len,&seconds,indexij_ptrsfile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->indexij_ptrs_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-
-	fprintf(stderr,"Allocating memory for indexij comp...");
-	new->indexij_comp = (UINT4 *) Access_allocate_private(&new->indexij_comp_access,&new->indexij_comp_len,&seconds,indexij_compfile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->indexij_comp_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-      }
-
-    }
-
-    new->indexsize = 3 + log4(((new->indexij_ptrs_len - 8)/sizeof(UINT4)/2)/ /*DIFFERENTIAL_METAINFO_SIZE*/2);
-#endif
-    new->indexspace = power(4,new->indexsize);
-
-    if (lcp_access == USE_MMAP_PRELOAD) {
-      fprintf(stderr,"Pre-loading LCP/child/DC arrays...");
-      new->lcpchilddc = (unsigned char *) Access_mmap_and_preload(&new->lcpchilddc_fd,&new->lcpchilddc_len,&npages,&seconds,
-								  lcpchilddcfile,sizeof(unsigned char));
-      new->lcpchilddc_access = MMAPPED;
-      comma1 = Genomicpos_commafmt(new->lcpchilddc_len);
-      fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-      FREE(comma1);
-    } else if (lcp_access == USE_MMAP_ONLY) {
-      new->lcpchilddc = (unsigned char *) Access_mmap(&new->lcpchilddc_fd,&new->lcpchilddc_len,lcpchilddcfile,/*randomp*/true);
-      new->lcpchilddc_access = MMAPPED;
-    } else if (lcp_access == USE_ALLOCATE) {
-      fprintf(stderr,"Allocating memory for lcpchildc...");
-      if (sharedp == true) {
-	new->lcpchilddc = (unsigned char *) Access_allocate_shared(&new->lcpchilddc_access,&new->lcpchilddc_shmid,&new->lcpchilddc_key,
-								   &new->lcpchilddc_fd,&new->lcpchilddc_len,&seconds,lcpchilddcfile,sizeof(unsigned char));
-      } else {
-	new->lcpchilddc = (unsigned char *) Access_allocate_private(&new->lcpchilddc_access,&new->lcpchilddc_len,&seconds,lcpchilddcfile,sizeof(unsigned char));
-      }
-      comma1 = Genomicpos_commafmt(new->lcpchilddc_len);
-      fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-      FREE(comma1);
-    }
-
-    if (guideexc_access == USE_MMAP_PRELOAD) {
-      fprintf(stderr,"Pre-loading guide/exceptions...");
-      new->lcp_guide = (UINT4 *) Access_mmap_and_preload(&new->lcp_guide_fd,&new->lcp_guide_len,&npages,&seconds,
-							 lcp_guidefile,sizeof(UINT4));
-      new->lcp_exceptions = (UINT4 *) Access_mmap_and_preload(&new->lcp_exceptions_fd,&new->lcp_exceptions_len,&npages,&seconds,
-							 lcp_exceptionsfile,sizeof(UINT4));
-      new->child_guide = (UINT4 *) Access_mmap_and_preload(&new->child_guide_fd,&new->child_guide_len,&npages,&seconds,
-							 child_guidefile,sizeof(UINT4));
-      new->child_exceptions = (UINT4 *) Access_mmap_and_preload(&new->child_exceptions_fd,&new->child_exceptions_len,&npages,&seconds,
-							 child_exceptionsfile,sizeof(UINT4));
-      new->lcp_guide_access = MMAPPED;
-      new->lcp_exceptions_access = MMAPPED;
-      new->child_guide_access = MMAPPED;
-      new->child_exceptions_access = MMAPPED;
-      fprintf(stderr,"done\n");
-
-    } else if (guideexc_access == USE_MMAP_ONLY) {
-      new->lcp_guide = (UINT4 *) Access_mmap(&new->lcp_guide_fd,&new->lcp_guide_len,
-					     lcp_guidefile,/*randomp*/true);
-      new->lcp_exceptions = (UINT4 *) Access_mmap(&new->lcp_exceptions_fd,&new->lcp_exceptions_len,
-						  lcp_exceptionsfile,/*randomp*/true);
-      new->child_guide = (UINT4 *) Access_mmap(&new->child_guide_fd,&new->child_guide_len,
-					       child_guidefile,/*randomp*/true);
-      new->child_exceptions = (UINT4 *) Access_mmap(&new->child_exceptions_fd,&new->child_exceptions_len,
-							 child_exceptionsfile,/*randomp*/true);
-      new->lcp_guide_access = MMAPPED;
-      new->lcp_exceptions_access = MMAPPED;
-      new->child_guide_access = MMAPPED;
-      new->child_exceptions_access = MMAPPED;
-
-    } else if (guideexc_access == USE_ALLOCATE) {
-      fprintf(stderr,"Allocating memory for lcp guide...");
-      if (sharedp == true) {
-	new->lcp_guide = (UINT4 *) Access_allocate_shared(&new->lcp_guide_access,&new->lcp_guide_shmid,&new->lcp_guide_key,
-							  &new->lcp_guide_fd,&new->lcp_guide_len,&seconds,lcp_guidefile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->lcp_guide_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-      
-	fprintf(stderr,"Allocating memory for lcp exceptions...");
-	new->lcp_exceptions = (UINT4 *) Access_allocate_shared(&new->lcp_exceptions_access,&new->lcp_exceptions_shmid,&new->lcp_exceptions_key,
-							       &new->lcp_exceptions_fd,&new->lcp_exceptions_len,&seconds,lcp_exceptionsfile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->lcp_exceptions_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-	
-	fprintf(stderr,"Allocating memory for child guide...");
-	new->child_guide = (UINT4 *) Access_allocate_shared(&new->child_guide_access,&new->child_guide_shmid,&new->child_guide_key,
-							    &new->child_guide_fd,&new->child_guide_len,&seconds,child_guidefile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->child_guide_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-
-	fprintf(stderr,"Allocating memory for child exceptions...");
-	new->child_exceptions = (UINT4 *) Access_allocate_shared(&new->child_exceptions_access,&new->child_exceptions_shmid,&new->child_exceptions_key,
-								 &new->child_exceptions_fd,&new->child_exceptions_len,&seconds,child_exceptionsfile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->child_exceptions_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-      
-      } else {
-	new->lcp_guide = (UINT4 *) Access_allocate_private(&new->lcp_guide_access,&new->lcp_guide_len,&seconds,lcp_guidefile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->lcp_guide_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-      
-	fprintf(stderr,"Allocating memory for lcp exceptions...");
-	new->lcp_exceptions = (UINT4 *) Access_allocate_private(&new->lcp_exceptions_access,&new->lcp_exceptions_len,&seconds,
-								lcp_exceptionsfile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->lcp_exceptions_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-	
-	fprintf(stderr,"Allocating memory for child guide...");
-	new->child_guide = (UINT4 *) Access_allocate_private(&new->child_guide_access,&new->child_guide_len,&seconds,child_guidefile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->child_guide_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-
-	fprintf(stderr,"Allocating memory for child exceptions...");
-	new->child_exceptions = (UINT4 *) Access_allocate_private(&new->child_exceptions_access,&new->child_exceptions_len,&seconds,
-								  child_exceptionsfile,sizeof(UINT4));
-	comma1 = Genomicpos_commafmt(new->child_exceptions_len);
-	fprintf(stderr,"done (%s bytes, %.2f sec)\n",comma1,seconds);
-	FREE(comma1);
-      }
-    }
-
-    new->n_lcp_exceptions = new->lcp_exceptions_len/(sizeof(UINT4) + sizeof(UINT4));
-    new->child_guide_interval = 1024;
-  }
-
-
-  FREE(child_exceptionsfile);
-  FREE(child_guidefile);
-
-  FREE(lcp_exceptionsfile);
-  FREE(lcp_guidefile);
-
-  FREE(lcpchilddcfile);
-
-#ifdef DEBUG15
-  FREE(indexi_compfile);
-  FREE(indexi_ptrsfile);
-  FREE(indexj_compfile);
-  FREE(indexj_ptrsfile);
-  FREE(indexij_compfile);
-  FREE(indexij_ptrsfile);
-#elif defined(USE_SEPARATE_BUCKETS)
-  FREE(indexi_compfile);
-  FREE(indexi_ptrsfile);
-  FREE(indexj_compfile);
-  FREE(indexj_ptrsfile);
-#else
-  FREE(indexij_compfile);
-  FREE(indexij_ptrsfile);
-#endif
-
-  FREE(sarrayfile);
-
-  return new;
-}
-
-
-void
-Sarray_free (T *old) {
-  if (*old) {
-#ifdef DEBUG15
-    FREE((*old)->indexi_ptrs);
-    FREE((*old)->indexi_comp);
-    FREE((*old)->indexj_ptrs);
-    FREE((*old)->indexj_comp);
-    FREE((*old)->indexij_ptrs);
-    FREE((*old)->indexij_comp);
-#elif defined(USE_SEPARATE_BUCKETS)
-    FREE((*old)->indexi_ptrs);
-    FREE((*old)->indexi_comp);
-    FREE((*old)->indexj_ptrs);
-    FREE((*old)->indexj_comp);
-#else
-    if ((*old)->indexij_ptrs_access == MMAPPED) {
-      munmap((void *) (*old)->indexij_ptrs,(*old)->indexij_ptrs_len);
-      close((*old)->indexij_ptrs_fd);
-    } else if ((*old)->indexij_ptrs_access == ALLOCATED_PRIVATE) {
-      FREE((*old)->indexij_ptrs);
-    } else if ((*old)->indexij_ptrs_access == ALLOCATED_SHARED) {
-      Access_deallocate((*old)->indexij_ptrs,(*old)->indexij_ptrs_shmid,(*old)->indexij_ptrs_key);
-    }
-    if ((*old)->indexij_comp_access == MMAPPED) {
-      munmap((void *) (*old)->indexij_comp,(*old)->indexij_comp_len);
-      close((*old)->indexij_comp_fd);
-    } else if ((*old)->indexij_comp_access == ALLOCATED_PRIVATE) {
-      FREE((*old)->indexij_comp);
-    } else if ((*old)->indexij_comp_access == ALLOCATED_SHARED) {
-      Access_deallocate((*old)->indexij_comp,(*old)->indexij_comp_shmid,(*old)->indexij_comp_key);
-    }
-#endif
-
-    if ((*old)->lcp_guide_access == MMAPPED) {
-      munmap((void *) (*old)->lcp_guide,(*old)->lcp_guide_len);
-      close((*old)->lcp_guide_fd);
-    } else if ((*old)->lcp_guide_access == ALLOCATED_PRIVATE) {
-      FREE((*old)->lcp_guide);
-    } else if ((*old)->lcp_guide_access == ALLOCATED_SHARED) {
-      Access_deallocate((*old)->lcp_guide,(*old)->lcp_guide_shmid,(*old)->lcp_guide_key);
-    }
-
-    if ((*old)->lcp_exceptions_access == MMAPPED) {
-      munmap((void *) (*old)->lcp_exceptions,(*old)->lcp_exceptions_len);
-      close((*old)->lcp_exceptions_fd);
-    } else if ((*old)->lcp_exceptions_access == ALLOCATED_PRIVATE) {
-      FREE((*old)->lcp_exceptions);
-    } else if ((*old)->lcp_exceptions_access == ALLOCATED_SHARED) {
-      Access_deallocate((*old)->lcp_exceptions,(*old)->lcp_exceptions_shmid,(*old)->lcp_exceptions_key);
-    }
-
-    if ((*old)->child_guide_access == MMAPPED) {
-      munmap((void *) (*old)->child_guide,(*old)->child_guide_len);
-      close((*old)->child_guide_fd);
-    } else if ((*old)->child_guide_access == ALLOCATED_PRIVATE) {
-      FREE((*old)->child_guide);
-    } else if ((*old)->child_guide_access == ALLOCATED_SHARED) {
-      Access_deallocate((*old)->child_guide,(*old)->child_guide_shmid,(*old)->child_guide_key);
-    }
-
-    if ((*old)->child_exceptions_access == MMAPPED) {
-      munmap((void *) (*old)->child_exceptions,(*old)->child_exceptions_len);
-      close((*old)->child_exceptions_fd);
-    } else if ((*old)->child_exceptions_access == ALLOCATED_PRIVATE) {
-      FREE((*old)->child_exceptions);
-    } else if ((*old)->child_exceptions_access == ALLOCATED_SHARED) {
-      Access_deallocate((*old)->child_exceptions,(*old)->child_exceptions_shmid,(*old)->child_exceptions_key);
-    }
-
-    if ((*old)->lcpchilddc_access == MMAPPED) {
-      munmap((void *) (*old)->lcpchilddc,(*old)->lcpchilddc_len);
-      close((*old)->lcpchilddc_fd);
-    } else if ((*old)->lcpchilddc_access == ALLOCATED_PRIVATE) {
-      FREE((*old)->lcpchilddc);
-    } else if ((*old)->lcpchilddc_access == ALLOCATED_SHARED) {
-      Access_deallocate((*old)->lcpchilddc,(*old)->lcpchilddc_shmid,(*old)->lcpchilddc_key);
-    }
-
-    if ((*old)->array_access == MMAPPED) {
-      munmap((void *) (*old)->array,(*old)->array_len);
-      close((*old)->array_fd);
-    } else if ((*old)->array_access == ALLOCATED_PRIVATE) {
-      FREE((*old)->array);
-    } else if ((*old)->array_access == ALLOCATED_SHARED) {
-      Access_deallocate((*old)->array,(*old)->array_shmid,(*old)->array_key);
-    }
-
-    FREE_KEEP(*old);
-  }
-
-  return;
-}
-
-
-
-#if 0
-/* Old search method.  O(m*(log n)), where m is the querylength and n
-   is the size of the suffix array searched */
-static Sarrayptr_T
-sarray_search_init (char *query, int querylength, int queryoffset, Compress_T query_compress, bool plusp,
-		    Sarrayptr_T low, Sarrayptr_T high, Univcoord_T nmatches_low, Univcoord_T nmatches_high) {
-  Sarrayptr_T mid;
-  Univcoord_T pos;
-  Univcoord_T nmatches_mid, fasti;
-  char c;
-  UINT4 sa_low, sa_mid;
-  UINT4 lcp_low, lcp_mid;
-
-  assert(querylength > 0);
-
-  debug1(printf("sarray_search_init on querylength %d with low %u, high %u\n",querylength,low,high));
-  while (low + 1 < high) {
-#if 0
-    /* Compute mid for unsigned ints */
-    mid = low/2 + high/2;
-    if (low % 2 == 1 && high % 2 == 1) {
-      mid += 1;
-    }
-#else
-    mid = low + ((high - low) / 2);
-#endif
-
-    debug1(printf("low %u, high %u => mid %u\n",low,high,mid));
-    nmatches_mid =  (nmatches_low < nmatches_high) ? nmatches_low : nmatches_high;
-
-#ifdef WORDS_BIGENDIAN
-    fasti = nmatches_mid +
-      (Univcoord_T) Genome_consecutive_matches_rightward(query_compress,/*left*/Bigendian_convert_uint(sarray->array[mid])-queryoffset,
-							 /*pos5*/queryoffset+nmatches_mid,
-							 /*pos3*/queryoffset+querylength,plusp,genestrand);
-    pos = Bigendian_convert_uint(sarray->array[mid]) + fasti;
-#else
-    fasti = nmatches_mid +
-      (Univcoord_T) Genome_consecutive_matches_rightward(query_compress,/*left*/sarray->array[mid]-queryoffset,
-							 /*pos5*/queryoffset+nmatches_mid,
-							 /*pos3*/queryoffset+querylength,plusp,genestrand);
-    pos = sarray->array[mid] + fasti;
-#endif
-    c = Genome_get_char_lex(genome,pos,sarray->n,chartable);
-
-    if (fasti == (Univcoord_T) querylength || c > query[fasti]) {
-      high = mid;
-      /* nmatches_high = (sarray->lcp[mid] < nmatches_mid) ? sarray->lcp[mid] : nmatches_mid; */
-#ifdef WORDS_BIGENDIAN
-      sa_mid = Bigendian_convert_uint(sarray->array[mid]);
-#else
-      sa_mid = sarray->array[mid];
-#endif
-      lcp_mid = Bitpack64_read_one(sa_mid,sarray->plcp_ptrs,sarray->plcp_comp) - sa_mid;
-#ifdef USE_LCP
-      if (lcp_mid != sarray->lcp[mid]) {
-	fprintf(stderr,"LCP compression error at %u\n",mid);
-      }
-#endif
-      nmatches_high = (lcp_mid < nmatches_mid) ? lcp_mid : nmatches_mid;
-    } else {
-      low = mid;
-      /* nmatches_low = (sarray->lcp[low] < nmatches_mid) ? sarray->lcp[low] : nmatches_mid; */
-#ifdef WORDS_BIGENDIAN
-      sa_low = Bigendian_convert_uint(sarray->array[low]);
-#else
-      sa_low = sarray->array[low];
-#endif
-      lcp_low = Bitpack64_read_one(sa_low,sarray->plcp_ptrs,sarray->plcp_comp) - sa_low;
-#ifdef USE_LCP
-      if (lcp_low != sarray->lcp[low]) {
-	fprintf(stderr,"LCP compression error at %u\n",mid);
-      }
-#endif
-      nmatches_low = (lcp_low < nmatches_mid) ? lcp_low : nmatches_mid;
-    }
-
-    debug1(printf("sarray_search_init with low %u, high %u\n",low,high));
-  }
-
-  debug1(printf("sarray_search_init ended.  Returning low %u+1\n\n",low));
-  return low + 1;
-}
-#endif
-
-
-#if 0
-/* Old search method.  O(m*(log n)), where m is the querylength and n
-   is the size of the suffix array searched */
-static Sarrayptr_T
-sarray_search_final (char *query, int querylength, int queryoffset, Compress_T query_compress, bool plusp,
-		     Sarrayptr_T low, Sarrayptr_T high, Univcoord_T nmatches_low, Univcoord_T nmatches_high) {
-  Sarrayptr_T mid;
-  Univcoord_T pos;
-  Univcoord_T nmatches_mid, fasti;
-  UINT4 sa_low, sa_mid;
-  UINT4 lcp_low, lcp_mid;
-  char c;
-
-  assert(querylength > 0);
-
-  debug1(printf("sarray_search_final on querylength %d with low %u, high %u\n",querylength,low,high));
-  while (low + 1 < high) {
-#if 0
-    /* Compute mid for unsigned ints */
-    mid = low/2 + high/2;
-    if (low % 2 == 1 && high % 2 == 1) {
-      mid += 1;
-    }
-#else
-    mid = low + ((high - low) / 2);
-#endif
-    debug1(printf("low %u, high %u => mid %u\n",low,high,mid));
-    nmatches_mid =  (nmatches_low < nmatches_high) ? nmatches_low : nmatches_high;
-
-#ifdef WORDS_BIGENDIAN
-    fasti = nmatches_mid +
-      (Univcoord_T) Genome_consecutive_matches_rightward(query_compress,/*left*/Bigendian_convert_uint(sarray->array[mid])-queryoffset,
-							 /*pos5*/queryoffset+nmatches_mid,
-							 /*pos3*/queryoffset+querylength,plusp,genestrand);
-    pos = Bigendian_convert_uint(sarray->array[mid]) + fasti;
-#else
-    fasti = nmatches_mid +
-      (Univcoord_T) Genome_consecutive_matches_rightward(query_compress,/*left*/sarray->array[mid]-queryoffset,
-							 /*pos5*/queryoffset+nmatches_mid,
-							 /*pos3*/queryoffset+querylength,plusp,genestrand);
-    pos = sarray->array[mid] + fasti;
-#endif
-    c = Genome_get_char_lex(genome,pos,sarray->n,chartable);
-
-    if (fasti == (Univcoord_T) querylength || c < query[fasti]) {
-      low = mid;
-      /* nmatches_low = (sarray->lcp[low] < nmatches_mid) ? sarray->lcp[low] : nmatches_mid; */
-#ifdef WORDS_BIGENDIAN
-      sa_low = Bigendian_convert_uint(sarray->array[low]);
-#else
-      sa_low = sarray->array[low];
-#endif
-      lcp_low = Bitpack64_read_one(sa_low,sarray->plcp_ptrs,sarray->plcp_comp) - sa_low;
-#ifdef USE_LCP
-      if (lcp_low != sarray->lcp[low]) {
-	fprintf(stderr,"LCP compression error at %u\n",mid);
-      }
-#endif
-      nmatches_low = (lcp_low < nmatches_mid) ? lcp_low : nmatches_mid;
-    } else {
-      high = mid;
-      /* nmatches_high = (sarray->lcp[mid] < nmatches_mid) ? sarray->lcp[mid] : nmatches_mid; */
-#ifdef WORDS_BIGENDIAN
-      sa_mid = Bigendian_convert_uint(sarray->array[mid]);
-#else
-      sa_mid = sarray->array[mid];
-#endif
-      lcp_mid = Bitpack64_read_one(sa_mid,sarray->plcp_ptrs,sarray->plcp_comp) - sa_mid;
-#ifdef USE_LCP
-      if (lcp_mid != sarray->lcp[mid]) {
-	fprintf(stderr,"LCP compression error at %u\n",mid);
-      }
-#endif
-      nmatches_high = (lcp_mid < nmatches_mid) ? lcp_mid : nmatches_mid;
-    }
-
-    debug1(printf("sarray_search_final with low %u, high %u\n",low,high));
-  }
-
-  debug1(printf("sarray_search_final ended.  Returning high %u-1\n\n",high-1));
-  return high - 1;
-}
-#endif
-
-
-int
-nt_querylength (char *query, int querylength) {
-  int i;
-  char c;
-
-  i = 0;
-  while (i < querylength && ((c = query[i]) == 'A' || c == 'C' || c == 'G' || c == 'T')) {
-    i++;
-  }
-
-  return i;
-}
-
-
-Oligospace_T
-nt_oligo (char *query, int indexsize) {
-  Oligospace_T oligo = 0U;
-  int i;
-
-  for (i = 0; i < indexsize; i++) {
-    oligo *= 4;
-    
-    switch (query[i]) {
-    case 'A': break;
-    case 'C': oligo += 1; break;
-    case 'G': oligo += 2; break;
-    case 'T': oligo += 3; break;
-    default:
-      fprintf(stderr,"Saw N in nt_oligo\n");
-      abort();
-    }
-  }
-
-  return oligo;
-}
-
-Oligospace_T
-nt_oligo_truncate (char *query, int truncsize, int indexsize, int subst_value) {
-  Oligospace_T oligo = 0U;
-  int i;
-
-  for (i = 0; i < truncsize; i++) {
-    oligo *= 4;
-    
-    switch (query[i]) {
-    case 'A': break;
-    case 'C': oligo += 1; break;
-    case 'G': oligo += 2; break;
-    case 'T': oligo += 3; break;
-    default:
-      fprintf(stderr,"Saw N in nt_oligo\n");
-      abort();
-    }
-  }
-
-  for ( ; i < indexsize; i++) {
-    oligo *= 4;
-    oligo += subst_value;
-  }
-
-  return oligo;
-}
-
-
-
-/* For child[index+1].up, just calling child[index] */
-#define decode_up(index,child_bytes,child_guide,child_exceptions,child_guide_interval) index - Bytecoding_read_wguide(index,child_bytes,child_guide,child_exceptions,child_guide_interval)
-#define decode_down(index,child_bytes,child_guide,child_exceptions,child_guide_interval) Bytecoding_read_wguide(index,child_bytes,child_guide,child_exceptions,child_guide_interval) + index + 1
-#define decode_next(index,child_bytes,child_guide,child_exceptions,child_guide_interval) Bytecoding_read_wguide(index,child_bytes,child_guide,child_exceptions,child_guide_interval) + index + 1
-
-#if 0
-/*                                      0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F */
-static char discrim_char_before[16] = {'?','$','$','$','$','$','A','A','A','A','C','C','C','G','G','T'};
-static char discrim_char_after[16]  = {'?','A','C','G','T','X','C','G','T','X','G','T','X','T','X','X'};
-#endif
-
-static bool
-get_child_given_first (Sarrayptr_T *l, Sarrayptr_T *r, Sarrayptr_T i, Sarrayptr_T j, char desired_char,
-		       T sarray, unsigned char *lcpchilddc, UINT4 lcp_whole, UINT4 nextl) {
-  char c1, c2;
-  UINT4 child_next;
-
-  debug2(printf("Getting children for l-interval from %u to %u, char %c\n",i,j,desired_char));
-
-#if 0
-  /* First child already given */
-  debug1(printf("lcp-interval %u..%u\n",i,j));
-  up = decode_up(j,sarray->child_bytes,sarray->child_guide,sarray->child_exceptions,sarray->child_guide_interval);
-  if (i < up && up <= j) {
-    nextl = up;
-    debug2(printf("nextl is up: %u\n",nextl));
-  } else {
-    nextl = decode_down(i,sarray->child_bytes,sarray->child_guide,sarray->child_exceptions,sarray->child_guide_interval); /* down */
-    debug2(printf("nextl is down: %u\n",nextl));
-  }
-#endif
-
-  /* Test first child: Use discrim_chars, rather than looking up S[SA[i] + lcp_whole] */
-  c2 = Bytecoding_lcpchilddc_dc(&c1,nextl,lcpchilddc);
-  debug2(printf("First child: %u to %u, discrim chars %c and %c\n",i,nextl-1,c1,c2));
-
-  if (desired_char < c1) {
-    debug2(printf("1.  Returning false, because desired %c < c1 %c\n",desired_char,c1));
-    return false;
-  } else if (desired_char == c1) {
-    *l = i;
-    *r = nextl - 1;
-    debug2(printf("Returning true\n\n"));
-    return true;
-  } else if (desired_char < c2) {
-    debug2(printf("1.  Returning false, because desired %c < c2 %c\n",desired_char,c2));
-    return false;
-  } else {
-    /* Advance to middle children or final child */
-    debug2(printf("1.  Advancing\n"));
-  }
-
-  /* Test for child[i] being down: lcp[child[i]] > lcp[i] */
-  /* Test for child[i] being next_lindex: lcp[child[i]] == lcp[i] */
-  /* Test middle children */
-  while (nextl < j && Bytecoding_lcpchilddc_lcp_next(&child_next,nextl,/*bytes*/lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-						     sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions) == lcp_whole) {
-    /* Already tested for desired_char < c2 */
-    if (desired_char == c2) {
-      *l = nextl;
-#if 0
-      *r = Bytecoding_lcpchilddc_child_next(nextl,lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-					    sarray->child_guide_interval) - 1; /* child[nextl] - 1 */
-#else
-      *r = child_next - 1;
-#endif
-      debug2(printf("Child: %u to %u, c2 %c\n",nextl,*r,c2));
-      debug2(printf("Returning true\n\n"));
-      return true;
-    } else {
-      debug2(printf("Child: %u",nextl));
-#if 0
-      nextl = Bytecoding_lcpchilddc_child_next(nextl,lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-					       sarray->child_guide_interval); /* child[nextl] */
-#else
-      nextl = child_next;
-#endif
-      c2 = Bytecoding_lcpchilddc_dc(&c1,nextl,lcpchilddc);
-      debug2(printf(" to %u, discrim chars %c and %c\n",nextl-1,c1,c2));
-
-      if (desired_char < c2) {
-	debug2(printf("M.  Returning false, because desired %c < c2 %c\n",desired_char,c2));
-	return false;
-      } else {
-	debug2(printf("M.  Advancing\n"));
-      }
-    }
-  }
-
-  /* Test last child */
-  /* Already tested for desired_char < c2 */
-  debug2(printf("Final child: %u to %u, c2 %c\n",nextl,j,c2));
-  if (desired_char == c2) {
-    *l = nextl;
-    *r = j;
-    debug2(printf("Returning true\n\n"));
-    return true;
-  } else {
-    debug2(printf("3.  Returning false, because desired %c != c2 %c\n",desired_char,c2));
-    return false;
-  }
-}
-
-
-static UINT4
-find_longest_match (UINT4 nmatches, Sarrayptr_T *initptr, Sarrayptr_T *finalptr,
-		    Sarrayptr_T i, Sarrayptr_T j, char *query, UINT4 querylength,
-		    int queryoffset, Compress_T query_compress, T sarray, bool plusp,
-		    int genestrand, char conversion[]) {
-  UINT4 lcp_whole, nextl, up;
-  UINT4 minlength;
-  UINT4 l, r;
-  Univcoord_T SA_i;
-
-  while (nmatches < querylength) {
-    if (i == j) {
-      /* Singleton interval */
-      debug1(printf("Singleton interval %u..%u\n",i,j));
-      SA_i = csa_lookup(sarray,i);
-      nmatches +=
-	Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
-					     /*pos5*/queryoffset+nmatches,/*pos3*/queryoffset+querylength,
-					     plusp,genestrand);
-      *initptr = i;
-      *finalptr = j;
-      return nmatches;
-
-    } else {
-      /* First child */
-      debug1(printf("lcp-interval %u..%u\n",i,j));
-      up = Bytecoding_lcpchilddc_child_up(j,sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-					  sarray->child_guide_interval);
-      if (i < up && up <= j) {
-	nextl = up;
-	debug2(printf("nextl is up: %u\n",nextl));
-      } else {
-	nextl = Bytecoding_lcpchilddc_child_next(i,sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-						 sarray->child_guide_interval); /* really down */
-	debug2(printf("nextl is down: %u\n",nextl));
-      }
-
-      lcp_whole = Bytecoding_lcpchilddc_lcp(nextl,sarray->lcpchilddc,sarray->lcp_exceptions,
-					    sarray->n_lcp_exceptions); /* lcp(i,j) */
-      debug1(printf("lcp_whole for %u..%u is %d, compared with nmatches %d\n",i,j,lcp_whole,nmatches));
-
-      if (lcp_whole > nmatches) {
-	/* Check only up to minlength, so we validate the entire interval */
-	minlength = (lcp_whole < querylength) ? lcp_whole : querylength;
-	debug1(printf("Looking up genome for query from %d .. %d - 1\n",nmatches,minlength));
-	SA_i = csa_lookup(sarray,i);
-	nmatches +=
-	  Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
-					       /*pos5*/queryoffset+nmatches,/*pos3*/queryoffset+minlength,
-					       plusp,genestrand);
-	if (nmatches < minlength) {
-	  *initptr = i;
-	  *finalptr = j;
-	  return nmatches;
-
-	} else if (nmatches >= querylength) {
-	  debug1(printf("nmatches is now %d >= querylength %d => success\n",nmatches,querylength));
-	  *initptr = i;
-	  *finalptr = j;
-	  return nmatches;
-	}
-      }
-	
-      debug1(printf("nmatches is now %d => desired_char is %c => %c\n",
-		    nmatches,query[nmatches],conversion[query[nmatches]]));
-      if (get_child_given_first(&l,&r,i,j,/*desired_char*/conversion[(int) query[nmatches]],
-				sarray,sarray->lcpchilddc,lcp_whole,nextl) == false) {
-	*initptr = i;
-	*finalptr = j;
-	return nmatches;
-      } else {
-	nmatches += 1;
-	i = l;
-	j = r;
-      }
-    }
-  }
-
-  *initptr = i;
-  *finalptr = j;
-  return nmatches;
-}
-
-
-
-/* Searches using LCP and child arrays.  Should be O(m * |Sigma|),
-   where m wis the querylength and |Sigma| is the size of the alphabet
-   (4 for DNA) */
-/* query is a substring of the original, starting with queryoffset */
-static void
-sarray_search (Sarrayptr_T *initptr, Sarrayptr_T *finalptr, bool *successp,
-	       UINT4 *nmatches, char *query, UINT4 querylength, int queryoffset,
-	       Compress_T query_compress, T sarray, bool plusp, int genestrand,
-	       char conversion[]) {
-  int effective_querylength;	/* length to first N */
-  Oligospace_T oligo;
-  UINT4 l, r;
-
-#ifdef DEBUG1
-  Univcoord_T SA_i, hit, child_next;
-  int k = 0;
-  UINT4 recount, lcp_prev, lcp_next, lcp_i, max_lcp;
-  char Buffer[1000+1], c1, c2;
-  bool failp;
-#endif
-
-  debug1(printf("sarray_search on %.*s, querylength %d, plusp %d\n",querylength,query,querylength,plusp));
-
-  /* Find initial lcp-interval */
-  effective_querylength = nt_querylength(query,querylength);
-
-  *nmatches = 0;
-  if (effective_querylength == 0) {
-    *initptr = *finalptr = 0;
-    *successp = false;
-    return;
-
-  } else if (effective_querylength < sarray->indexsize) {
-    debug1(printf("string %.*s with effective querylength %d is shorter than indexsize",
-		  querylength,query,effective_querylength));
-    l = 1;
-    r = sarray->n;
-
-  } else {
-    oligo = nt_oligo(query,sarray->indexsize);
-#ifdef DEBUG15
-    if ((l = Bitpack64_read_two(&r,oligo*2,sarray->indexij_ptrs,sarray->indexij_comp)) !=
-	Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp)) {
-      abort();
-    } else if (r - 1 != Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp)) {
-      printf("For oligo %u, separate buckets give %u and %u, while single bucket gives %u and %u\n",
-	     oligo,
-	     Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp),
-	     Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp),
-	     l,r);
-      abort();
-    }
-    r--;			/* Because interleaved writes r+1 to maintain monotonicity */
-#elif defined(USE_SEPARATE_BUCKETS)
-    l = Bitpack64_read_one(oligo,sarray->indexi_ptrs,sarray->indexi_comp);
-    r = Bitpack64_read_one(oligo,sarray->indexj_ptrs,sarray->indexj_comp);
-#else
-    l = Bitpack64_read_two(&r,oligo*2,sarray->indexij_ptrs,sarray->indexij_comp);
-    r--;			/* Because interleaved writes r+1 to maintain monotonicity */
-#endif
-    debug1(printf("string %.*s is equal/longer than indexsize %d => oligo %u => interval %u..%u",
-		  querylength,query,sarray->indexsize,oligo,l,r));
-    if (l <= r) {
-      debug1(printf(" (good)\n"));
-      *nmatches = sarray->indexsize;
-      /* i = l; */
-      /* j = r; */
-    } else {
-      /* The entire lcp-interval [1,sarray->n] should also work without initindex */
-      l = 1;
-      r = sarray->n;
-      debug1(printf(" (bad) => entire lcp-interval: %u..%u\n",l,r));
-    }
-  }
-
-  if (l > r) {
-    /* Did not find a match using saindex or one letter */
-    *initptr = l;
-    *finalptr = r;
-  } else {
-    *nmatches = find_longest_match(*nmatches,&(*initptr),&(*finalptr),/*i*/l,/*j*/r,
-				   query,querylength,queryoffset,query_compress,sarray,
-				   plusp,genestrand,conversion);
-  }
-
-  /* Search through suffix tree */
-  debug1(printf("initptr gets %u, finalptr gets %u\n",*initptr,*finalptr));
-
-  if (*nmatches < querylength) {
-    *successp = false;
-    debug1(printf("%s fail at %d: got %d hits with %d matches:\n",
-		 plusp ? "plus" : "minus",queryoffset,(*finalptr - *initptr + 1),*nmatches));
-  } else {
-    *successp = true;
-    debug1(printf("%s success at %d: got %d hits with %d matches:\n",
-		 plusp ? "plus" : "minus",queryoffset,(*finalptr - *initptr + 1),*nmatches));
-  }
-
-#ifdef DEBUG1
-  failp = false;
-
-  /* Before */
-  if (*nmatches > 0 && *initptr > 0U) {
-    SA_i = csa_lookup(sarray,(*initptr)-1);
-    recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
-						   /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
-						   plusp,genestrand);
-    printf("%d\t%u\t%u\t",recount,(*initptr)-1,SA_i/*+ 1U*/);
-    c2 = Bytecoding_lcpchilddc_dc(&c1,(*initptr)-1,sarray->lcpchilddc);
-    printf("%c%c\t",c1,c2);
-    lcp_i = Bytecoding_lcpchilddc_lcp((*initptr)-1,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
-    printf("%u\t",lcp_i);
-    lcp_next = Bytecoding_lcpchilddc_lcp((*initptr),/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
-    printf("%u\t",Bytecoding_lcpchilddc_lcp_next(&child_next,(*initptr)-1,/*bytes*/sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-						 sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
-    if (genestrand == +2) {
-      if (plusp) {
-	Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
-      } else {
-	Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
-      }
-    } else {
-      if (plusp) {
-	Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
-      } else {
-	Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
-      }
-    }
-    printf("%s\n",Buffer);
-    if (recount >= *nmatches) {
-      printf("querylength is %d\n",querylength);
-      printf("false negative: recount %d at %u before init does equal expected nmatches %d\n",
-	     recount,SA_i,*nmatches);
-      failp = true;
-    }
-  }
-  printf("\n");
-
-
-  /* Hits */
-  lcp_prev = lcp_i;
-  for (k = 0; k < (int) (*finalptr - *initptr + 1) && k < MAX_DEBUG1_HITS; k++) {
-    SA_i = csa_lookup(sarray,(*initptr)+k);
-    recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
-						   /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
-						   plusp,genestrand);
-    printf("%d\t%u\t%u\t",recount,(*initptr)+k,SA_i/*+ 1U*/);
-    c2 = Bytecoding_lcpchilddc_dc(&c1,(*initptr)+k,sarray->lcpchilddc);
-    printf("%c%c\t",c1,c2);
-    lcp_i = Bytecoding_lcpchilddc_lcp((*initptr)+k,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
-    lcp_next = Bytecoding_lcpchilddc_lcp((*initptr)+k+1,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
-    printf("%u\t",lcp_i);
-    printf("%u\t",Bytecoding_lcpchilddc_lcp_next(&child_next,(*initptr)+k,/*bytes*/sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-						 sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
-    max_lcp = lcp_i;
-    if (lcp_prev > max_lcp) {
-      max_lcp = lcp_prev;
-    }
-    if (lcp_next > max_lcp) {
-      max_lcp = lcp_next;
-    }
-    if (max_lcp > 1000) {
-      max_lcp = 1000;
-    }
-
-    if (genestrand == +2) {
-      if (plusp) {
-	Genome_fill_buffer_convert_rev(SA_i,max_lcp+1,Buffer);
-      } else {
-	Genome_fill_buffer_convert_fwd(SA_i,max_lcp+1,Buffer);
-      }
-    } else {
-      if (plusp) {
-	Genome_fill_buffer_convert_fwd(SA_i,max_lcp+1,Buffer);
-      } else {
-	Genome_fill_buffer_convert_rev(SA_i,max_lcp+1,Buffer);
-      }
-    }
-    printf("%s\n",Buffer);
-    if (recount != *nmatches) {
-      printf("querylength is %d\n",querylength);
-      printf("false positive: recount %d at %u does not equal expected nmatches %d\n",
-	     recount,csa_lookup(sarray,(*initptr)),*nmatches);
-      failp = true;
-    }
-
-    lcp_prev = lcp_i;
-  }
-
-  if (k < (int) (*finalptr - *initptr + 1)) {
-    /* Overflow */
-    printf("...\n");
-    k = (int) (*finalptr - *initptr);
-    hit = csa_lookup(sarray,(*initptr)+k);
-    recount = Genome_consecutive_matches_rightward(query_compress,/*left*/hit-queryoffset,
-						   /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
-						   plusp,genestrand);
-    printf("%d\t%u\t%u\t",recount,(*initptr)+k,hit /*+ 1U*/);
-    c2 = Bytecoding_lcpchilddc_dc(&c1,(*initptr)+k,sarray->lcpchilddc);
-    printf("%c%c\t",c1,c2);
-    lcp_i = Bytecoding_lcpchilddc_lcp((*initptr)+k,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
-    lcp_next = Bytecoding_lcpchilddc_lcp((*initptr)+k+1,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions);
-    printf("%u\t",lcp_i);
-    printf("%u\t",Bytecoding_lcpchilddc_lcp_next(&child_next,(*initptr)+k,/*bytes*/sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-						 sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
-    if (genestrand == +2) {
-      if (plusp) {
-	Genome_fill_buffer_convert_rev(hit,recount+1,Buffer);
-      } else {
-	Genome_fill_buffer_convert_fwd(hit,recount+1,Buffer);
-      }
-    } else {
-      if (plusp) {
-	Genome_fill_buffer_convert_fwd(hit,recount+1,Buffer);
-      } else {
-	Genome_fill_buffer_convert_rev(hit,recount+1,Buffer);
-      }
-    }
-    printf("%s\n",Buffer);
-    if (recount != *nmatches) {
-      printf("querylength is %d\n",querylength);
-      printf("false positive: recount %d at %u does not equal expected nmatches %d\n",
-	     recount,csa_lookup(sarray,*initptr),*nmatches);
-      failp = true;
-    }
-    /* hits[k] = sarray->array[(*initptr)++]; */
-  }
+#endif
 
+#if 0
+  printf("A => %u %u\n",sarray->initindexi[0],sarray->initindexj[0]);
+  printf("C => %u %u\n",sarray->initindexi[1],sarray->initindexj[1]);
+  printf("G => %u %u\n",sarray->initindexi[2],sarray->initindexj[2]);
+  printf("T => %u %u\n",sarray->initindexi[3],sarray->initindexj[3]);
+#endif
 
-  /* After */
-  if (*nmatches > 0 && (SA_i = csa_lookup(sarray,(*finalptr)+1)) > 0U) {
-    printf("\n");
-    recount = Genome_consecutive_matches_rightward(query_compress,/*left*/SA_i-queryoffset,
-						   /*pos5*/queryoffset,/*pos3*/queryoffset+querylength,
-						   plusp,genestrand);
-    printf("%d\t%u\t%u\t",recount,(*finalptr)+1,SA_i/*+ 1U*/);
-    c2 = Bytecoding_lcpchilddc_dc(&c1,(*finalptr)+1,sarray->lcpchilddc);
-    printf("%c%c\t",c1,c2);
-    printf("%u\t",Bytecoding_lcpchilddc_lcp((*finalptr)+1,/*bytes*/sarray->lcpchilddc,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
-    printf("%u\t",Bytecoding_lcpchilddc_lcp_next(&child_next,(*finalptr)+1,/*bytes*/sarray->lcpchilddc,sarray->child_guide,sarray->child_exceptions,
-						 sarray->child_guide_interval,sarray->lcp_exceptions,sarray->n_lcp_exceptions));
-    if (genestrand == +2) {
-      if (plusp) {
-	Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
-      } else {
-	Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
-      }
-    } else {
-      if (plusp) {
-	Genome_fill_buffer_convert_fwd(SA_i,recount+1,Buffer);
-      } else {
-	Genome_fill_buffer_convert_rev(SA_i,recount+1,Buffer);
-      }
-    }
-    printf("%s\n",Buffer);
-    if (recount >= *nmatches) {
-      printf("querylength is %d\n",querylength);
-      printf("false negative: recount %d at %u after (*finalptr) does equal expected nmatches %d\n",
-	     recount,SA_i,*nmatches);
-      failp = true;
-    }
-  }
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
+  epi32_convert = _mm_set1_epi32(2147483648); /* 2^31 */
+#endif
 
-  if (failp == true) {
-    /* Can happen because $ ranks below 0 */
-    /* Can also happen with CMET or ATOI, since genome128_hr procedures find genome-to-query mismatches */
-    /* abort(); */
-  }
+#if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
+  epi32_convert_256 = _mm256_set1_epi32(2147483648); /* 2^31 */
 #endif
 
+#if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN) && defined(USE_SHUFFLE_MASK)
+  /* Used by fill_positions_filtered_first */
+  shuffle_mask16[0] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1);
+  shuffle_mask16[1] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  3, 2, 1, 0);
+  shuffle_mask16[2] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,  7, 6, 5, 4);
+  shuffle_mask16[3] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1,  7, 6, 5, 4,  3, 2, 1, 0);
+  shuffle_mask16[4] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8);
+  shuffle_mask16[5] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8,  3, 2, 1, 0);
+  shuffle_mask16[6] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 11,10, 9, 8,  7, 6, 5, 4);
+  shuffle_mask16[7] =  _mm_set_epi8(-1,-1,-1,-1, 11,10, 9, 8,  7, 6, 5, 4,  3, 2, 1, 0);
+  shuffle_mask16[8] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12);
+  shuffle_mask16[9] =  _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12,  3, 2, 1, 0);
+  shuffle_mask16[10] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12,  7, 6, 5, 4);
+  shuffle_mask16[11] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12,  7, 6, 5, 4,  3, 2, 1, 0);
+  shuffle_mask16[12] = _mm_set_epi8(-1,-1,-1,-1, -1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8);
+  shuffle_mask16[13] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8,  3, 2, 1, 0);
+  shuffle_mask16[14] = _mm_set_epi8(-1,-1,-1,-1, 15,14,13,12, 11,10, 9, 8,  7, 6, 5, 4);
+  shuffle_mask16[15] = _mm_set_epi8(15,14,13,12, 11,10, 9, 8,  7, 6, 5, 4,  3, 2, 1, 0);
+#endif
+  
   return;
 }
 
 
+
 /* For fill_positions_all: ELT_VIRGIN -> ELT_FILLED */
 /* For fill_positions_filtered: ELT_VIRGIN -(1st call)-> ELT_UNSORTED -(2nd call)-> ELT_SORTED */
 typedef enum {ELT_VIRGIN, ELT_FILLED, ELT_UNSORTED, ELT_SORTED} Elt_status_T;
@@ -2143,25 +618,38 @@ Elt_fill_positions_all (Elt_T this, T sarray) {
     if (this->nmatches == 0 || this->npositions > EXCESS_SARRAY_HITS) {
       this->positions_allocated = this->positions = (Univcoord_T *) NULL;
       this->npositions_allocated = this->npositions = 0;
+
     } else {
 #ifdef USE_QSORT
-      this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
+      if (this->npositions == 0) {
+	this->positions_allocated = this->positions = (Univcoord_T *) NULL;
+	this->npositions = 0;
+      } else {
+	this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
+	i = 0;
+	ptr = this->initptr;
+	while (ptr <= this->finalptr) {
+	  if ((pos = Sarray_position(sarray,ptr++)) >= (Univcoord_T) this->querystart) {
+	    this->positions[i++] = pos - this->querystart;
+	  }
+	}
+	this->npositions = i;
+	qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
+      }
+
 #else
       this->positions_allocated = this->positions = (Univcoord_T *) MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
-#endif
       i = 0;
       ptr = this->initptr;
       while (ptr <= this->finalptr) {
-	if ((pos = csa_lookup(sarray,ptr++)) >= (Univcoord_T) this->querystart) {
+	if ((pos = Sarray_position(sarray,ptr++)) >= (Univcoord_T) this->querystart) {
 	  this->positions[i++] = pos - this->querystart;
 	}
       }
       this->npositions = i;
-#ifdef USE_QSORT
-      qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
-#else
       Sedgesort_uint4(this->positions,this->npositions);
 #endif
+
     }
   }
 
@@ -2301,7 +789,215 @@ fill_positions_std (int *npositions, Univcoord_T low_adj, Univcoord_T high_adj,
 
 #ifdef HAVE_ALLOCA
 
-#if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
+#if defined(HAVE_AVX512) && !defined(WORDS_BIGENDIAN)
+
+/* AVX512 version is much simpler because it generates a mask directly
+   and it has compare operations for epu32 */
+
+static void
+fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
+  Univcoord_T low_adj, high_adj;
+  Univcoord_T *array = Sarray_array(sarray), value0;
+  Sarrayptr_T *array_stop, *array_end, *array_ptr;
+  Univcoord_T *positions_temp;
+  Univcoord_T *out;
+  __m512i adjusted;
+  __m512i floor, ceiling, values, adj;
+  __mmask16 mask;
+#if defined(REQUIRE_ALIGNMENT)
+  int n_prealign, k;
+#endif
+#if defined(DEBUG) || defined(DEBUG7)
+  int nmatches;
+#endif
+#ifdef DEBUG7
+  UINT8 pointer;
+  int i;
+#endif
+#ifdef DEBUG8
+  Univcoord_T *positions_std;
+  int npositions_std;
+#endif
+
+
+  debug(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
+	       low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
+  debug7(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n",
+		low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches));
+  
+  if (this->positions_allocated != NULL) {
+    /* Filled from a previous call */
+    FREE(this->positions_allocated);
+  }
+
+  if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) {
+    this->all_positions = (Univcoord_T *) NULL;
+
+  } else {
+    /* Function surrounded by HAVE_ALLOCA */
+#ifdef USE_QSORT
+    positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T));
+#else
+    positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1 + 1) * sizeof(Univcoord_T));
+#endif
+
+    low_adj = low + this->querystart;
+    high_adj = high + this->querystart;
+  
+    floor = _mm512_set1_epi32(low_adj - 1);
+    ceiling = _mm512_set1_epi32(high_adj + 1);
+    adj = _mm512_set1_epi32(this->querystart);
+
+    this->npositions_allocated = this->npositions = 0;
+#if defined(REQUIRE_ALIGNMENT)
+    array_ptr = &(array[this->initptr]);
+    
+    /* Initial part */
+    n_prealign = ((64 - ((UINT8) array_ptr & 0x3F))/8) & 0xF;
+    debug7(printf("Initial ptr is at location %p.  Need %d to get to 512-bit boundary\n",pointer,n_prealign));
+
+    debug7(printf("Initial part:\n"));
+    if (n_prealign > this->finalptr - this->initptr + 1) {
+      n_prealign = this->finalptr - this->initptr + 1;
+    }
+    for (k = 0; k < n_prealign; k++) {
+      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj));
+      if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) {
+	*out++ = value0 - this->querystart;
+      }
+    }
+#else
+    array_ptr = &(array[this->initptr]);
+#endif	/* REQUIRE_ALIGNMENT */
+
+
+    /* Aligned part */
+    if (this->finalptr < 16) {
+      array_stop = &(array[0]);
+    } else {
+      array_stop = &(array[this->finalptr - 16]);
+    }
+    array_end = &(array[this->finalptr]);
+
+    while (array_ptr <= array_stop) {
+
+#if defined(REQUIRE_ALIGNMENT)
+      /* Use stream_load to avoid polluting the cache with suffix array entries */
+      values = _mm512_stream_load_si512((__m512i *) array_ptr);
+#else
+      /* It looks like loadu is just as fast as load */
+      values = _mm512_loadu_si512((__m512i *) array_ptr);
+#endif
+      debug7b(print_vector_uint_512(values));
+
+      /* mask = _mm512_andnot_si512(_mm512_cmpgt_epu32_mask(floor,values),_mm512_cmpgt_epu32_mask(ceiling,values)); -- This is off by 1 at floor */
+      mask = _mm512_cmpgt_epu32_mask(values,floor) & _mm512_cmpgt_epu32_mask(ceiling,values);
+
+      /* Example: 0xCCCC (16 bits) */
+      debug7b(printf("%08X\n",mask));
+
+      /* Is it faster to skip check of mask? */
+      /* if (mask) { */
+	adjusted = _mm512_sub_epi32(values,adj);
+	_mm512_mask_compressstoreu_epi32((void *) out,mask,adjusted);
+
+#ifdef HAVE_POPCNT
+	out += _popcnt32(mask);
+	debug7b(printf("mask: %08X (%d ones)\n",mask,_popcnt32(mask)));
+#elif defined HAVE_MM_POPCNT
+	out += _mm_popcnt_u32(mask);
+	debug7b(printf("mask: %08X (%d ones)\n",mask,_mm_popcnt_u32(mask)));
+#else
+	out += __builtin_popcount(mask);
+	debug7b(printf("mask: %08X (%d ones)\n",mask,__builtin_popcount(mask)));
+#endif
+	/* } */
+
+      array_ptr += 16;
+    }
+
+    /* Partial block at end */
+    debug7(printf("\nFinal part:\n"));
+#if 0
+    /* Scalar */
+    while (array_ptr <= array_end) {
+      if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) {
+	*out++ = value0 - this->querystart;
+      }
+    }
+#else
+    /* Vector */
+    mask = ~(0xFFFF << (array_end - array_ptr + 1));
+    values = _mm512_mask_loadu_epi32(values,mask,(__m512i *) array_ptr);
+    mask &= _mm512_cmpgt_epu32_mask(values,floor) & _mm512_cmpgt_epu32_mask(ceiling,values);
+
+    adjusted = _mm512_sub_epi32(values,adj);
+    _mm512_mask_compressstoreu_epi32((void *) out,mask,adjusted);
+
+#ifdef HAVE_POPCNT
+    out += _popcnt32(mask);
+    debug7b(printf("mask: %08X (%d ones)\n",mask,_popcnt32(mask)));
+#elif defined HAVE_MM_POPCNT
+    out += _mm_popcnt_u32(mask);
+    debug7b(printf("mask: %08X (%d ones)\n",mask,_mm_popcnt_u32(mask)));
+#else
+    out += __builtin_popcount(mask);
+    debug7b(printf("mask: %08X (%d ones)\n",mask,__builtin_popcount(mask)));
+#endif
+#endif
+
+    this->npositions_allocated = this->npositions = out - positions_temp;
+    debug7(printf("SIMD method found %d positions\n",this->npositions));
+
+    /* Copy the positions into heap from temp in stack */
+    if (this->npositions == 0) {
+      this->positions_allocated = this->positions = (Univcoord_T *) NULL;
+    } else {
+      debug7(printf("Sorting %d positions\n",this->npositions));
+#ifdef USE_QSORT
+      qsort(positions_temp,this->npositions,sizeof(Univcoord_T),Univcoord_compare);
+#else
+      Sedgesort_uint4(positions_temp,this->npositions);
+#endif
+
+      /* Need to copy positions before the goal */
+#ifdef USE_QSORT
+      this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T));
+#else
+      this->positions_allocated = this->positions = MALLOC((this->npositions + 1) * sizeof(Univcoord_T));
+#endif
+      memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T));
+#ifdef DEBUG7
+      for (i = 0; i < this->npositions; i++) {
+	printf("%u\n",this->positions[i]);
+      }
+#endif
+
+#if 0
+      /* Not sure why we were doing this.  We will find collinear set of diagonals later. */
+      /* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */
+      /* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */
+      /* ? Replace with a binary search */
+      i = 0;
+      while (i < this->npositions && positions_temp[i] < goal) {
+	debug7(printf("1 Skipping position %u (%u) < goal %u (%u)\n",
+		      positions_temp[i],positions_temp[i] - chroffset,goal,goal - chroffset));
+	i++;
+      }
+      this->positions += i;
+      this->npositions -= i;
+      debug7(printf("Remaining: %d positions\n",this->npositions));
+#endif
+    }
+    
+    /* Function surrounded by HAVE_ALLOCA */
+    FREEA(positions_temp);
+  }
+
+  return;
+}
+
+#elif defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN)
 
 /* Using pext method, because _mm256_shuffle_epi32 doesn't work well
 because it works only within lanes, and MASTER_CONTROL does not extend
@@ -2310,7 +1006,7 @@ well to 256 bits */
 static void
 fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
   Univcoord_T low_adj, high_adj;
-  Univcoord_T *array = sarray->array, value0;
+  Univcoord_T *array = Sarray_array(sarray), value0;
   Sarrayptr_T *array_stop, *array_end, *array_ptr;
   Univcoord_T *positions_temp;
   Univcoord_T *out;
@@ -2395,7 +1091,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
     }
     array_end = &(array[this->finalptr]);
 
-    while (array_ptr < array_stop) {
+    while (array_ptr <= array_stop) {
 
 #if defined(REQUIRE_ALIGNMENT)
       /* Use stream_load to avoid polluting the cache with suffix array entries */
@@ -2565,7 +1261,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
 static void
 fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
   Univcoord_T low_adj, high_adj;
-  Univcoord_T *array = sarray->array, value0;
+  Univcoord_T *array = Sarray_array(sarray), value0;
   Sarrayptr_T *array_stop, *array_end, *array_ptr;
   Univcoord_T *positions_temp;
   Univcoord_T *out;
@@ -2651,7 +1347,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
 				   0x00, 0x80, 0xC0, 0xBC, 0x00, 0x00, 0x00, 0xC0);
 #endif
 
-    while (array_ptr < array_stop) {
+    while (array_ptr <= array_stop) {
 #if defined(REQUIRE_ALIGNMENT)
 
 #ifdef HAVE_SSE4_1      
@@ -2769,7 +1465,7 @@ static void
 fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) {
   Sarrayptr_T ptr;
   Univcoord_T low_adj, high_adj;
-  Univcoord_T *array = sarray->array;
+  Univcoord_T *array = Sarray_array(sarray);
   Univcoord_T value3, value2, value1, value0;
   Univcoord_T *positions_temp;
 #if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
@@ -2812,8 +1508,8 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
       /* Handle in normal manner */
       debug7(printf("Small batch, because %u + 3 <= %u\n",ptr,this->finalptr));
       while (ptr <= this->finalptr) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-	if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
+	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",Sarray_position(sarray,ptr),low_adj,high_adj));
+	if ((value0 = Sarray_position(sarray,ptr++)) < low_adj) {
 	  /* Skip */
 	} else if (value0 > high_adj) {
 	  /* Skip */
@@ -2916,8 +1612,8 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
       /* Final part */
       debug7(printf("\nFinal part:\n"));
       while (ptr <= this->finalptr) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-	if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
+	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",Sarray_position(sarray,ptr),low_adj,high_adj));
+	if ((value0 = Sarray_position(sarray,ptr++)) < low_adj) {
 	  /* Skip */
 	} else if (value0 > high_adj) {
 	  /* Skip */
@@ -2931,8 +1627,8 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
 #else
 
     while (ptr <= this->finalptr) {
-      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-      if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
+      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",Sarray_position(sarray,ptr),low_adj,high_adj));
+      if ((value0 = Sarray_position(sarray,ptr++)) < low_adj) {
 	/* Skip */
       } else if (value0 > high_adj) {
 	/* Skip */
@@ -2995,7 +1691,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
   int i;
   Univcoord_T low_adj, high_adj;
   Univcoord_T value3, value2, value1, value0;
-  Univcoord_T *array = sarray->array;
+  Univcoord_T *array = Sarray_array(sarray);
   Univcoord_T *more_positions;
 #if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
 #ifdef HAVE_64_BIT
@@ -3037,8 +1733,8 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
       /* Handle in normal manner */
       debug7(printf("Small batch, because %u + 3 <= %u\n",ptr,this->finalptr));
       while (ptr <= this->finalptr) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-	if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
+	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",Sarray_position(sarray,ptr),low_adj,high_adj));
+	if ((value0 = Sarray_position(sarray,ptr++)) < low_adj) {
 	  /* Skip */
 	} else if (value0 > high_adj) {
 	  /* Skip */
@@ -3164,8 +1860,8 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
       /* Final part */
       debug7(printf("\nFinal part:\n"));
       while (ptr <= this->finalptr) {
-	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-	if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
+	debug7a(printf("Looking at value %u, relative to low %u and high %u\n",Sarray_position(sarray,ptr),low_adj,high_adj));
+	if ((value0 = Sarray_position(sarray,ptr++)) < low_adj) {
 	  /* Skip */
 	} else if (value0 > high_adj) {
 	  /* Skip */
@@ -3183,8 +1879,8 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
 #else
 
     while (ptr <= this->finalptr) {
-      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",csa_lookup(sarray,ptr),low_adj,high_adj));
-      if ((value0 = csa_lookup(sarray,ptr++)) < low_adj) {
+      debug7a(printf("Looking at value %u, relative to low %u and high %u\n",Sarray_position(sarray,ptr),low_adj,high_adj));
+      if ((value0 = Sarray_position(sarray,ptr++)) < low_adj) {
 	/* Skip */
       } else if (value0 > high_adj) {
 	/* Skip */
@@ -3218,7 +1914,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
 #if defined(HAVE_SSE2) && !defined(WORDS_BIGENDIAN)
       if (this->initptr + 4 < ptr) {
 	while (i < this->npositions) {
-	  if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
+	  if ((value0 = Sarray_position(sarray,--ptr)) < low_adj) {
 	    /* Skip */
 	  } else if (value0 > high_adj) {
 	    /* Skip */
@@ -3297,7 +1993,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
 	  
 	/* Last part */
 	while (i < this->npositions) {
-	  if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
+	  if ((value0 = Sarray_position(sarray,--ptr)) < low_adj) {
 	    /* Skip */
 	  } else if (value0 > high_adj) {
 	    /* Skip */
@@ -3310,7 +2006,7 @@ fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_
 #else
 
       while (i < this->npositions) {
-	if ((value0 = csa_lookup(sarray,--ptr)) < low_adj) {
+	if ((value0 = Sarray_position(sarray,--ptr)) < low_adj) {
 	  /* Skip */
 	} else if (value0 > high_adj) {
 	  /* Skip */
@@ -3427,6 +2123,7 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
 #ifdef WORDS_BIGENDIAN
   int i;
 #endif
+  Univcoord_T *array = Sarray_array(sarray);
 
 
   if (this->nmatches == 0 || this->finalptr - this->initptr + 1 > EXCESS_SARRAY_HITS) {
@@ -3481,10 +2178,10 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
 #endif
 #ifdef WORDS_BIGENDIAN
       for (i = 0; i < this->n_all_positions; i++) {
-	this->all_positions[i] = Bigendian_convert_uint(sarray->array[this->initptr+i]);
+	this->all_positions[i] = Bigendian_convert_uint(array[this->initptr+i]);
       }
 #else
-      memcpy(this->all_positions,&(sarray->array[this->initptr]),this->n_all_positions*sizeof(Univcoord_T));
+      memcpy(this->all_positions,&(array[this->initptr]),this->n_all_positions*sizeof(Univcoord_T));
 #endif
 #ifdef USE_QSORT
       qsort(this->all_positions,this->n_all_positions,sizeof(Univcoord_T),Univcoord_compare);
@@ -3510,7 +2207,7 @@ Elt_fill_positions_filtered (Elt_T this, T sarray, Univcoord_T goal, Univcoord_T
 #ifdef DEBUG8
   positions_std = fill_positions_std(&npositions_std,/*low_adj*/low + this->querystart,
 				     /*high_adj*/high + this->querystart,
-				     this->initptr,this->finalptr,this->querystart,sarray->array);
+				     this->initptr,this->finalptr,this->querystart,array);
   positions_compare(this->positions_allocated,this->npositions_allocated,positions_std,npositions_std);
   FREE(positions_std);
 #endif
@@ -5314,9 +4011,9 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
 	    /* Create a new elt with new positions */
 	    querystart = ((Elt_T) elt_tree[j]->first)->querystart_leftward;
 	    /* queryend was computed above */
-	    sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
-			  /*querylength*/(queryend + 1) - querystart,/*queryoffset*/querystart,
-			  query_compress,sarray,plusp,genestrand,conversion);
+	    Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
+			/*querylength*/(queryend + 1) - querystart,/*queryoffset*/querystart,
+			query_compress,sarray,plusp,genestrand,conversion);
 	    elt_tree[j] = List_pop(elt_tree[j],(void **) &elt);
 	    if (elt->temporaryp == true) {
 	      Elt_free(&elt);
@@ -5382,9 +4079,9 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
 	    /* Create a new elt with new positions */
 	    querystart = ((Elt_T) elt_tree[j]->first)->querystart_leftward;
 	    /* queryend was computed above */
-	    sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
-			  /*querylength*/(queryend + 1) - querystart,/*queryoffset*/querystart,
-			  query_compress,sarray,plusp,genestrand,conversion);
+	    Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
+			/*querylength*/(queryend + 1) - querystart,/*queryoffset*/querystart,
+			query_compress,sarray,plusp,genestrand,conversion);
 	    elt_tree[j] = List_pop(elt_tree[j],(void **) &elt);
 	    if (elt->temporaryp == true) {
 	      Elt_free(&elt);
@@ -5446,9 +4143,9 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
       /* B.  Try subdividing elt using 16-mers every 8 */
       debug13(printf("B.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
       for (querypos = queryend - 16; querypos >= querystart; querypos -= 8) {
-	sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-		      /*querylength*/16,/*queryoffset*/querypos,
-		      query_compress,sarray,plusp,genestrand,conversion);
+	Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+		    /*querylength*/16,/*queryoffset*/querypos,
+		    query_compress,sarray,plusp,genestrand,conversion);
 	elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
 	elt_tree[i] = List_push(elt_tree[i],(void *) elt);
 	Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
@@ -5460,9 +4157,9 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
 	/* C.  Try subdividing elt using 16-mers every 1 */
 	debug13(printf("C.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
 	for (querypos = queryend - 16; querypos >= querystart; querypos -= 1) {
-	  sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-			/*querylength*/16,/*queryoffset*/querypos,
-			query_compress,sarray,plusp,genestrand,conversion);
+	  Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+		      /*querylength*/16,/*queryoffset*/querypos,
+		      query_compress,sarray,plusp,genestrand,conversion);
 	  elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
 	  elt_tree[i] = List_push(elt_tree[i],(void *) elt);
 	  Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
@@ -5475,9 +4172,9 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
 	/* D.  Try subdividing elt using 8-mers every 1 */
 	debug13(printf("D.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
 	for (querypos = queryend - 8; querypos >= querystart; querypos -= 1) {
-	  sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-			/*querylength*/8,/*queryoffset*/querypos,
-			query_compress,sarray,plusp,genestrand,conversion);
+	  Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+		      /*querylength*/8,/*queryoffset*/querypos,
+		      query_compress,sarray,plusp,genestrand,conversion);
 	  elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
 	  elt_tree[i] = List_push(elt_tree[i],(void *) elt);
 	  Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
@@ -5544,9 +4241,9 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
       /* B.  Try subdividing elt using 16-mers every 8 */
       debug13(printf("B.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
       for (querypos = queryend - 16; querypos >= querystart; querypos -= 8) {
-	sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
-		      /*querylength*/16,/*queryoffset*/querystart,
-		      query_compress,sarray,plusp,genestrand,conversion);
+	Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querystart]),
+		    /*querylength*/16,/*queryoffset*/querystart,
+		    query_compress,sarray,plusp,genestrand,conversion);
 	elt = Elt_new(querystart,nmatches,initptr,finalptr,/*temporaryp*/true);
 	elt_tree[i] = List_push(elt_tree[i],(void *) elt);
 	Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
@@ -5558,9 +4255,9 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
 	/* C.  Try subdividing elt using 16-mers every 1 */
 	debug13(printf("C.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
 	for (querypos = queryend - 16; querypos >= querystart; querypos -= 1) {
-	  sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-			/*querylength*/16,/*queryoffset*/querypos,
-			query_compress,sarray,plusp,genestrand,conversion);
+	  Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+		      /*querylength*/16,/*queryoffset*/querypos,
+		      query_compress,sarray,plusp,genestrand,conversion);
 	  elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
 	  elt_tree[i] = List_push(elt_tree[i],(void *) elt);
 	  Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
@@ -5573,9 +4270,9 @@ get_diagonals (Univdiag_T *middle_diagonal, List_T *best_right_diagonals, List_T
 	/* D.  Try subdividing elt using 8-mers every 1 */
 	debug13(printf("D.  Try to subdivide elt region at %d..%d\n",querystart,queryend));
 	for (querypos = queryend - 8; querypos >= querystart; querypos -= 1) {
-	  sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
-			/*querylength*/8,/*queryoffset*/querypos,
-			query_compress,sarray,plusp,genestrand,conversion);
+	  Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryptr[querypos]),
+		      /*querylength*/8,/*queryoffset*/querypos,
+		      query_compress,sarray,plusp,genestrand,conversion);
 	  elt = Elt_new(querypos,nmatches,initptr,finalptr,/*temporaryp*/true);
 	  elt_tree[i] = List_push(elt_tree[i],(void *) elt);
 	  Elt_fill_positions_filtered(elt,sarray,goal,low,high,query_compress,plusp,genestrand,/*multiplep*/false);
@@ -5988,7 +4685,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
 
 		Univdiag_T middle_diagonal, List_T best_right_diagonals, List_T best_left_diagonals,
 
-		int querylength, Compress_T query_compress, Univcoord_T chroffset,
+		int querylength, Compress_T query_compress, Univcoord_T chroffset, Chrpos_T chrlength,
 		bool plusp, int genestrand, int max_mismatches_allowed) {
   List_T middle_path;
   List_T p;
@@ -6060,7 +4757,7 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
 #endif
 
 
-  debug13(printf("***Entered find_best_path\n"));
+  debug13(printf("***Entered find_best_path with chrlength %u\n",chrlength));
 
 #ifdef SUBDIVIDE_ENDS
   mappings = (Chrpos_T **) MALLOCA(querylength * sizeof(Chrpos_T *));
@@ -6426,9 +5123,14 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
 
   /* A5. Process common diagonal from right */
   while (common_diagonal != NULL) {
-    middle_path = List_push(middle_path,(void *) common_diagonal);
-    debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
-		   common_diagonal->querystart,common_diagonal->queryend,common_diagonal->univdiagonal - chroffset));
+    if (middle_diagonal->univdiagonal < chroffset + chrlength && common_diagonal->univdiagonal > chroffset + chrlength) {
+      debug13(printf("Cannot handle common diagonal across circular origin: query %d..%d, diagonal %u\n",
+		     common_diagonal->querystart,common_diagonal->queryend,common_diagonal->univdiagonal - chroffset));
+    } else {
+      middle_path = List_push(middle_path,(void *) common_diagonal);
+      debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
+		     common_diagonal->querystart,common_diagonal->queryend,common_diagonal->univdiagonal - chroffset));
+    }
     common_diagonal = common_diagonal->prev;
   }
 
@@ -6739,9 +5441,14 @@ find_best_path (List_T *right_paths, Intlist_T *right_endpoints_sense, Intlist_T
   /* Pops off in reverse */
   for (p = diagonal_path; p != NULL; p = List_next(p)) {
     diagonal = (Univdiag_T) List_head(p);
-    debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
-		   diagonal->querystart,diagonal->queryend,diagonal->univdiagonal - chroffset));
-    middle_path = List_push(middle_path,(void *) diagonal);
+    if (middle_diagonal->univdiagonal > chroffset + chrlength && common_diagonal->univdiagonal < chroffset + chrlength) {
+      debug13(printf("Cannot handle common diagonal across circular origin: query %d..%d, diagonal %u\n",
+		     diagonal->querystart,diagonal->queryend,diagonal->univdiagonal - chroffset));
+    } else {
+      debug13(printf("Pushing common diagonal onto middle: query %d..%d, diagonal %u\n",
+		     diagonal->querystart,diagonal->queryend,diagonal->univdiagonal - chroffset));
+      middle_path = List_push(middle_path,(void *) diagonal);
+    }
   }
   List_free(&diagonal_path);
 
@@ -7369,6 +6076,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
 
   int splice_pos;
   double donor_prob, acceptor_prob;
+  int introntype;
 
   bool sense_acceptable_p, antisense_acceptable_p, sense_intronp, antisense_intronp;
   Univcoord_T left, prev_left;
@@ -7586,7 +6294,7 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
 		     prev_diagonal->querystart,diagonal->queryend,indel_pos,best_nmismatches_i,best_nmismatches_j));
       
     } else if (left <= prev_left + max_deletionlen) {
-      /* Deletion */
+      /* Deletion (or short intron) */
       nindels = left - prev_left;
 #if 0
       max_mismatches_allowed = (diagonal->querystart - prev_diagonal->queryend - 1);
@@ -7598,16 +6306,61 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
 	max_mismatches_allowed += 1;
       }
 #endif
-      if ((indel_pos = Indel_resolve_middle_deletion(&best_nmismatches_i,&best_nmismatches_j,
+      if ((indel_pos = Indel_resolve_middle_deletion(&introntype,&best_nmismatches_i,&best_nmismatches_j,
 						     /*left*/prev_left,/*indels*/-nindels,query_compress,
 						     prev_diagonal->querystart,diagonal->queryend,querylength,
-						     max_mismatches_allowed,/*plusp:true*/true,genestrand)) < 0) {
+						     max_mismatches_allowed,/*plusp:true*/true,genestrand,
+						     min_intronlength)) < 0) {
 	sense_junctions = List_push(sense_junctions,NULL);
 	antisense_junctions = List_push(antisense_junctions,NULL);
-      } else {
+      } else if (nindels < min_intronlength) {
+	/* Cannot be an intron, so must be a deletion */
 	deletionpos = prev_left + indel_pos;
 	sense_junctions = List_push(sense_junctions,Junction_new_deletion(nindels,deletionpos));
 	antisense_junctions = List_push(antisense_junctions,Junction_new_deletion(nindels,deletionpos));
+      } else if ((sensedir = Intron_canonical_sensedir(introntype)) == SENSE_NULL) {
+	/* No intron dinucleotids found, so must be a deletion */
+	deletionpos = prev_left + indel_pos;
+	sense_junctions = List_push(sense_junctions,Junction_new_deletion(nindels,deletionpos));
+	antisense_junctions = List_push(antisense_junctions,Junction_new_deletion(nindels,deletionpos));
+      } else {
+	deletionpos = prev_left + indel_pos;
+	if (plusp == true) {
+	  if (sensedir == SENSE_FORWARD) {
+	    donor_prob = Maxent_hr_donor_prob(deletionpos,chroffset);
+	    acceptor_prob = Maxent_hr_acceptor_prob(deletionpos+nindels,chroffset);
+	    sense_junctions = List_push(sense_junctions,Junction_new_splice(/*splice_distance*/nindels,SENSE_FORWARD,
+									    donor_prob,acceptor_prob));
+	    antisense_junctions = List_push(antisense_junctions,Junction_new_deletion(nindels,deletionpos));
+
+	  } else {
+	    donor_prob = Maxent_hr_antidonor_prob(deletionpos+nindels,chroffset);
+	    acceptor_prob = Maxent_hr_antiacceptor_prob(deletionpos,chroffset);
+	    antisense_junctions = List_push(antisense_junctions,Junction_new_splice(/*splice_distance*/nindels,SENSE_ANTI,
+										    donor_prob,acceptor_prob));
+	    sense_junctions = List_push(sense_junctions,Junction_new_deletion(nindels,deletionpos));
+
+	  }
+
+	} else {
+	  sensedir = (sensedir == SENSE_FORWARD) ? SENSE_ANTI : SENSE_FORWARD;
+	  if (sensedir == SENSE_FORWARD) {
+	    /* check */
+	    donor_prob = Maxent_hr_antidonor_prob(deletionpos+nindels,chroffset);
+	    acceptor_prob = Maxent_hr_antiacceptor_prob(deletionpos,chroffset);
+	    sense_junctions = List_push(sense_junctions,Junction_new_splice(/*splice_distance*/nindels,SENSE_FORWARD,
+									    donor_prob,acceptor_prob));
+	    antisense_junctions = List_push(antisense_junctions,Junction_new_deletion(nindels,deletionpos));
+
+	  } else {
+	    /* check */
+	    donor_prob = Maxent_hr_donor_prob(deletionpos,chroffset);
+	    acceptor_prob = Maxent_hr_acceptor_prob(deletionpos+nindels,chroffset);
+	    antisense_junctions = List_push(antisense_junctions,Junction_new_splice(/*splice_distance*/nindels,SENSE_ANTI,
+										    donor_prob,acceptor_prob));
+	    sense_junctions = List_push(sense_junctions,Junction_new_deletion(nindels,deletionpos));
+	  }
+	}
       }
 
       if ((prev_nmismatches = Intlist_head(sense_nmismatches)) < 0) {
@@ -8228,6 +6981,10 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
   sense_acceptable_p = endpoints_acceptable_p(&sense_intronp,sense_junctions,sense_endpoints);
   antisense_acceptable_p = endpoints_acceptable_p(&antisense_intronp,antisense_junctions,
 						  antisense_endpoints);
+  debug13(printf("sense_acceptable_p %d, antisense_acceptable_p %d\n",
+		 sense_acceptable_p,antisense_acceptable_p));
+  debug13(printf("sense_intronp %d, antisense_intronp %d\n",sense_intronp,antisense_intronp));
+
   if (sense_acceptable_p == true && antisense_acceptable_p == true) {
     if (sense_intronp == true || right_ambig_sense != NULL || left_ambig_sense != NULL) {
       sense_sensedir = SENSE_FORWARD;
@@ -8261,7 +7018,9 @@ solve_via_segments (int *found_score, bool *completep, List_T hits, List_T middl
       Junction_gc(&antisense_junctions);
 
     } else {
-      /* Create just both sense and antisense hits */
+      /* Create both sense and antisense hits */
+      debug13(printf("Creating both sense and antisense hits\n"));
+      
       if ((hit = Stage3end_new_substrings(&(*found_score),sense_endpoints,sense_lefts,
 					  sense_nmismatches,sense_junctions,querylength,query_compress,
 					  right_ambig_sense,left_ambig_sense,plusp,genestrand,sense_sensedir,
@@ -8465,8 +7224,8 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
 #endif
 
 
-  debug(printf("\nStarting Sarray_search_greedy with querylength %d and indexsize %d and nmisses_allowed %d, genestrand %d\n",
-	       querylength,sarray_fwd->indexsize,nmisses_allowed,genestrand));
+  debug(printf("\nStarting Sarray_search_greedy with querylength %d and nmisses_allowed %d, genestrand %d\n",
+	       querylength,nmisses_allowed,genestrand));
   if (nmisses_allowed < 0) {
     nmisses_allowed = 0;
 #if 0
@@ -8497,10 +7256,10 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
   plus_querypos = 0;
   minus_querypos = 0;
   niter = 0;
-  while (niter < nmisses_allowed && plus_querypos < querylength && minus_querypos < querylength) {
-    sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[plus_querypos]),
-		  querylength - plus_querypos,/*queryoffset*/plus_querypos,
-		  query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,plus_conversion);
+  while (niter <= nmisses_allowed && plus_querypos < querylength && minus_querypos < querylength) {
+    Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryuc_ptr[plus_querypos]),
+		querylength - plus_querypos,/*queryoffset*/plus_querypos,
+		query_compress_fwd,plus_sarray,/*plusp*/true,genestrand,plus_conversion);
     elt = Elt_new(plus_querypos,nmatches,initptr,finalptr,/*temporaryp*/false);
     if (nmatches > best_plus_nmatches && elt->nptr <= MAX_HITS_FOR_BEST_ELT) {
       best_plus_elt = elt;
@@ -8511,11 +7270,11 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
     plus_querypos += nmatches;
     plus_querypos += 1;		/* To skip the presumed mismatch */
 
-    sarray_search(&initptr,&finalptr,&successp,&nmatches,&(queryrc[minus_querypos]),
-		  querylength - minus_querypos,/*queryoffset*/minus_querypos,
-		  query_compress_rev,minus_sarray,/*plusp*/false,genestrand,minus_conversion);
+    Sarray_read(&initptr,&finalptr,&successp,&nmatches,&(queryrc[minus_querypos]),
+		querylength - minus_querypos,/*queryoffset*/minus_querypos,
+		query_compress_rev,minus_sarray,/*plusp*/false,genestrand,minus_conversion);
     elt = Elt_new(minus_querypos,nmatches,initptr,finalptr,/*temporaryp*/false);
-    if (nmatches > best_minus_nmatches && elt->nptr < MAX_HITS_FOR_BEST_ELT) {
+    if (nmatches > best_minus_nmatches && elt->nptr <= MAX_HITS_FOR_BEST_ELT) {
       best_minus_elt = elt;
       best_minus_nmatches = nmatches;
       best_minus_i = niter;
@@ -8598,7 +7357,7 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
       for (i = 0; i < nseeds_plus; i++) {
 	left = best_plus_elt->positions[i];
-	if (left > chrhigh) {
+	if (left >= chrhigh) {
 	  chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
 	  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
 	  /* *chrhigh += 1U; */
@@ -8667,7 +7426,7 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
       Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,/*chrnum*/1,circular_typeint);
       for (i = 0; i < nseeds_minus; i++) {
 	left = best_minus_elt->positions[i];
-	if (left > chrhigh) {
+	if (left >= chrhigh) {
 	  chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
 	  Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
 	  /* *chrhigh += 1U; */
@@ -8719,7 +7478,7 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
     if (1 /*|| scores_plus[i] > best_score - 20*/) {
       diagonal = middle_diagonals_plus[i];
       left = diagonal->univdiagonal;
-      if (left > chrhigh) {
+      if (left >= chrhigh) {
 	chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
 	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
 	/* *chrhigh += 1U; */
@@ -8741,7 +7500,7 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
 					   &left_amb_probsi_sense,&left_amb_probsi_antisense,
 					   &left_amb_probsj_sense,&left_amb_probsj_antisense,
 					   &(fillin_diagonals_plus[i]),diagonal,best_right_diagonals_plus[i],best_left_diagonals_plus[i],
-					   querylength,query_compress_fwd,chroffset,
+					   querylength,query_compress_fwd,chroffset,chrlength,
 					   /*plusp*/true,genestrand,/*nmismatches_allowed*/nmisses_allowed);
 
       hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_plus[i],
@@ -8802,7 +7561,7 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
     if (1 /*|| scores_minus[i] > best_score - 20*/) {
       diagonal = middle_diagonals_minus[i];
       left = diagonal->univdiagonal;
-      if (left > chrhigh) {
+      if (left >= chrhigh) {
 	chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
 	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
 	/* *chrhigh += 1U; */
@@ -8824,7 +7583,7 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
 					    &left_amb_probsi_sense,&left_amb_probsi_antisense,
 					    &left_amb_probsj_sense,&left_amb_probsj_antisense,
 					    &(fillin_diagonals_minus[i]),diagonal,best_right_diagonals_minus[i],best_left_diagonals_minus[i],
-					    querylength,query_compress_rev,chroffset,
+					    querylength,query_compress_rev,chroffset,chrlength,
 					    /*plusp*/false,genestrand,/*nmismatches_allowed*/nmisses_allowed);
       
       hits = solve_via_segments(&(*found_score),&completep,hits,middle_path_minus[i],
@@ -8888,7 +7647,7 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
   for (i = 0; i < nseeds_plus; i++) {
     if (incomplete_result_p(middle_path_plus[i],querylength) == true) {
       left = best_plus_elt->positions[i];
-      if (left > chrhigh) {
+      if (left >= chrhigh) {
 	chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
 	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
 	/* chrhigh += 1U; */
@@ -8905,7 +7664,7 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
   for (i = 0; i < nseeds_minus; i++) {
     if (incomplete_result_p(middle_path_minus[i],querylength) == true) {
       left = best_minus_elt->positions[i];
-      if (left > chrhigh) {
+      if (left >= chrhigh) {
 	chrnum = Univ_IIT_get_one(chromosome_iit,left,left);
 	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
 	/* chrhigh += 1U; */
diff --git a/src/sarray-search.h b/src/sarray-search.h
new file mode 100644
index 0000000..f30030e
--- /dev/null
+++ b/src/sarray-search.h
@@ -0,0 +1,32 @@
+/* $Id: sarray-search.h 207324 2017-06-14 19:41:18Z twu $ */
+#ifndef SARRAY_SEARCH_INCLUDED
+#define SARRAY_SEARCH_INCLUDED
+#include "access.h"
+#include "bool.h"
+#include "mode.h"
+#include "genome.h"
+#include "compress.h"
+#include "genomicpos.h"
+#include "splicetrie.h"
+#include "iit-read-univ.h"
+#include "sarray-read.h"
+
+
+#define T Sarray_T
+
+extern void
+Sarray_search_setup (T sarray_fwd_in, T sarray_rev_in, Genome_T genome_in, Mode_T mode,
+		     Univ_IIT_T chromosome_iit_in, int circular_typeint_in, bool *circularp_in,
+		     Chrpos_T shortsplicedist_in, int splicing_penalty_in,
+		     int min_intronlength_in, int max_deletionlength, int max_end_deletions,
+		     int max_middle_insertions_in, int max_end_insertions,
+		     Univcoord_T *splicesites_in, Splicetype_T *splicetypes_in,
+		     Chrpos_T *splicedists_in, int nsplicesites_in);
+
+extern List_T
+Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int querylength,
+		      Compress_T query_compress_fwd, Compress_T query_compress_rev,
+		      int nmisses_allowed, int genestrand);
+
+#undef T
+#endif
diff --git a/src/sequence.c b/src/sequence.c
index b657ee3..af85535 100644
--- a/src/sequence.c
+++ b/src/sequence.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sequence.c 184174 2016-02-12 19:48:42Z twu $";
+static char rcsid[] = "$Id: sequence.c 207388 2017-06-15 21:01:15Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -892,7 +892,7 @@ input_sequence (int *nextchar, char **pointer1, int *length1, char **pointer2a,
    procedures have their own specialized creators. */
 T
 Sequence_genomic_new (char *contents, int length, bool copyp) {
-  T new = (T) MALLOC(sizeof(*new));
+  T new = (T) MALLOC_IN(sizeof(*new));
   char *copy;
 
   new->acc = (char *) NULL;
@@ -905,7 +905,7 @@ Sequence_genomic_new (char *contents, int length, bool copyp) {
 #endif
 
   if (copyp == true) {
-    copy = (char *) CALLOC(length+1,sizeof(char));
+    copy = (char *) MALLOC_IN((length+1)*sizeof(char));
     strncpy(copy,contents,length);
     new->contents = copy;
     new->contents_alloc = copy;
@@ -935,7 +935,7 @@ make_complement (char *sequence, unsigned int length) {
   char *complement;
   int i, j;
 
-  complement = (char *) CALLOC_IN(length+1,sizeof(char));
+  complement = (char *) MALLOC_IN((length+1)*sizeof(char));
   for (i = length-1, j = 0; i >= 0; i--, j++) {
     complement[j] = complCode[(int) sequence[i]];
   }
@@ -951,7 +951,7 @@ make_reverse (char *sequence, unsigned int length) {
   if (sequence == NULL) {
     return (char *) NULL;
   } else {
-    reverse = (char *) CALLOC_IN(length+1,sizeof(char));
+    reverse = (char *) MALLOC_IN((length+1)*sizeof(char));
     for (i = length-1, j = 0; i >= 0; i--, j++) {
       reverse[j] = sequence[i];
     }
@@ -1122,7 +1122,7 @@ make_uppercase (char *sequence, unsigned int length) {
 #endif
   unsigned int i;
 
-  uppercase = (char *) CALLOC_IN(length+1,sizeof(char));
+  uppercase = (char *) MALLOC_IN((length+1)*sizeof(char));
   for (i = 0; i < length; i++) {
     uppercase[i] = uppercaseCode[(int) sequence[i]];
   }
@@ -1143,7 +1143,7 @@ Sequence_uppercase (T this) {
   if (this->quality_alloc == NULL) {
     new->quality = new->quality_alloc = (char *) NULL;
   } else {
-    new->quality = new->quality_alloc =(char *) CALLOC_IN(this->fulllength+1,sizeof(char));
+    new->quality = new->quality_alloc =(char *) MALLOC_IN((this->fulllength+1)*sizeof(char));
     strcpy(new->quality,this->quality);
   }
 #endif
@@ -1334,7 +1334,7 @@ Sequence_read (int *nextchar, FILE *input) {
 #endif
 
   debug(printf("Final query sequence is:\n"));
-  debug(Sequence_print(stdout,new,/*uppercasep*/false,/*wraplength*/60,/*trimmedp*/false));
+  debug(Sequence_stdout(new,/*uppercasep*/false,/*wraplength*/60,/*trimmedp*/false));
   return new;
 }
 
@@ -1442,7 +1442,7 @@ Sequence_read_unlimited (int *nextchar, FILE *input) {
     maxseqlen = MAXSEQLEN;
   }
   intlist = Intlist_reverse(intlist);
-  new->contents = new->contents_alloc = Intlist_to_char_array(&length,intlist);
+  new->contents = new->contents_alloc = Intlist_to_char_array_in(&length,intlist);
 
   Intlist_free(&intlist);
 
@@ -1506,6 +1506,24 @@ Sequence_print_header (Filestring_T fp, T this, bool checksump) {
   return;
 }
 
+void
+Sequence_stdout_header (T this) {
+  if (this->acc == NULL) {
+    printf("NO_HEADER");
+  } else {
+    if (this->restofheader == NULL || this->restofheader[0] == '\0') {
+      printf("%s",this->acc);
+    } else {
+      printf("%s %s",this->acc,this->restofheader);
+    }
+  }
+
+  printf("\n");
+
+  return;
+}
+
+
 #if 0
 /* Used by revcomp.c */
 void
diff --git a/src/sequence.h b/src/sequence.h
index bc9ec26..b247942 100644
--- a/src/sequence.h
+++ b/src/sequence.h
@@ -1,4 +1,4 @@
-/* $Id: sequence.h 170023 2015-07-17 16:47:21Z twu $ */
+/* $Id: sequence.h 207325 2017-06-14 19:41:47Z twu $ */
 #ifndef SEQUENCE_INCLUDED
 #define SEQUENCE_INCLUDED
 #ifdef HAVE_CONFIG_H
@@ -112,6 +112,8 @@ extern void
 Sequence_print_digest (Filestring_T fp, T this);
 extern void
 Sequence_print_header (Filestring_T fp, T this, bool checksump);
+extern void
+Sequence_stdout_header (T this);
 
 extern void
 Sequence_print (Filestring_T fp, T this, bool uppercasep, int wraplength, bool trimmedp);
diff --git a/src/shortread.c b/src/shortread.c
index 3c57ca8..5eb7678 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 196410 2016-08-16 15:57:57Z twu $";
+static char rcsid[] = "$Id: shortread.c 207399 2017-06-15 22:24:35Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -955,10 +955,10 @@ input_header_fastq (int *nchars, bool *filterp, char **restofheader, int nextcha
   *filterp = false;
 
   if (nextchar == EOF) { /* Was feof(fp) */
-    return NULL;
+    return (char *) NULL;
   } else if ((p = fgets(&(Header[0]),HEADERLEN,fp)) == NULL) {
     /* File must terminate after > */
-    return NULL;
+    return (char *) NULL;
   } else {
     *nchars += strlen(p);
   }
@@ -1554,7 +1554,7 @@ input_oneline (int *nextchar, int *nchars, char **longstring, char *Start,
 	}
 
 	intlist = Intlist_reverse(intlist);
-	*longstring = Intlist_to_char_array(&i,intlist);
+	*longstring = Intlist_to_char_array_in(&i,intlist);
 	Intlist_free_in(&intlist);
 
 	debug(printf("nchars %d: Intlist method returning %d\n",*nchars,i));
@@ -1649,7 +1649,7 @@ input_oneline_filecontents (int *nextchar, char **longstring, char *Start,
 	}
 
 	intlist = Intlist_reverse(intlist);
-	*longstring = Intlist_to_char_array(&i,intlist);
+	*longstring = Intlist_to_char_array_in(&i,intlist);
 	Intlist_free_in(&intlist);
 
 	debug(printf("Intlist method returning %d\n",i));
@@ -1757,7 +1757,7 @@ input_oneline_gzip (int *nextchar, char **longstring, char *Start,
 	}
 
 	intlist = Intlist_reverse(intlist);
-	*longstring = Intlist_to_char_array(&i,intlist);
+	*longstring = Intlist_to_char_array_in(&i,intlist);
 	Intlist_free_in(&intlist);
 
 	debug(printf("Intlist method returning %d\n",i));
@@ -1867,7 +1867,7 @@ input_oneline_bzip2 (int *nextchar, char **longstring, char *Start,
 	}
 
 	intlist = Intlist_reverse(intlist);
-	*longstring = Intlist_to_char_array(&i,intlist);
+	*longstring = Intlist_to_char_array_in(&i,intlist);
 	Intlist_free_in(&intlist);
 
 	debug(printf("Intlist method returning %d\n",i));
@@ -2774,7 +2774,7 @@ Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
 			   FILE **input1, FILE **input2,
 			   char ***files, int *nfiles, bool skipp) {
   T queryseq1;
-  int nextchar2;
+  int nextchar2 = '\0';		/* Can be anything but EOF */
   char *acc, *restofheader, *acc2, *restofheader2;
   char *long_read_1, *long_read_2, *long_quality;
   int fulllength1, fulllength2, quality_length;
@@ -2974,16 +2974,20 @@ Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
 		(*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
 					     Quality,long_quality,quality_length,barcode_length,
 					     invert_second_p,/*copy_acc_p*/false,skipp);
-		FREE_IN(acc2);
-		FREE_IN(restofheader2);
+		if (skipp == false) {
+		  FREE_IN(acc2);
+		  FREE_IN(restofheader2);
+		}
 	      }
 	    } else {
 	      /* End 2 without quality string */
 	      (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
 					   /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
 					   invert_second_p,/*copy_acc_p*/false,skipp);
-	      FREE_IN(acc2);
-	      FREE_IN(restofheader2);
+	      if (skipp == false) {
+		FREE_IN(acc2);
+		FREE_IN(restofheader2);
+	      }
 	    }
 	  }
 
@@ -3037,7 +3041,7 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
 #endif
 			 char ***files, int *nfiles, bool skipp) {
   T queryseq1;
-  int nextchar2;
+  int nextchar2 = '\0';		/* Can be anything but EOF */
   char *acc, *restofheader, *acc2, *restofheader2;
   char *long_read_1, *long_read_2, *long_quality;
   int fulllength1, fulllength2, quality_length;
@@ -3282,16 +3286,20 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
 		(*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
 					     Quality,long_quality,quality_length,barcode_length,
 					     invert_second_p,/*copy_acc_p*/false,skipp);
-		FREE_IN(acc2);
-		FREE_IN(restofheader2);
+		if (skipp == false) {
+		  FREE_IN(acc2);
+		  FREE_IN(restofheader2);
+		}
 	      }
 	    } else {
 	      /* End 2 without quality string */
 	      (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
 					   /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
 					   invert_second_p,/*copy_acc_p*/false,skipp);
-	      FREE_IN(acc2);
-	      FREE_IN(restofheader2);
+	      if (skipp == false) {
+		FREE_IN(acc2);
+		FREE_IN(restofheader2);
+	      }
 	    }
 	  }
 
@@ -3343,7 +3351,7 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
 			   gzFile *input1, gzFile *input2,
 			   char ***files, int *nfiles, bool skipp) {
   T queryseq1;
-  int nextchar2;
+  int nextchar2 = '\0';		/* Can be anything but EOF */
   char *acc, *restofheader, *acc2, *restofheader2;
   char *long_read_1, *long_read_2, *long_quality;
   int fulllength1, fulllength2, quality_length;
@@ -3602,16 +3610,20 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
 		(*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
 					     Quality,long_quality,quality_length,barcode_length,
 					     invert_second_p,/*copy_acc_p*/false,skipp);
-		FREE_IN(acc2);
-		FREE_IN(restofheader2);
+		if (skipp == false) {
+		  FREE_IN(acc2);
+		  FREE_IN(restofheader2);
+		}
 	      }
 	    } else {
 	      /* End 2 without quality string */
 	      (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
 					   /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
 					   invert_second_p,/*copy_acc_p*/false,skipp);
-	      FREE_IN(acc2);
-	      FREE_IN(restofheader2);
+	      if (skipp == false) {
+		FREE_IN(acc2);
+		FREE_IN(restofheader2);
+	      }
 	    }
 	  }
 
@@ -3670,7 +3682,7 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
 			    Bzip2_T *input1, Bzip2_T *input2,
 			    char ***files, int *nfiles, bool skipp) {
   T queryseq1;
-  int nextchar2;
+  int nextchar2 = '\0';		/* Can be anything but EOF */
   char *acc, *restofheader, *acc2, *restofheader2;
   char *long_read_1, *long_read_2, *long_quality;
   int fulllength1, fulllength2, quality_length;
@@ -3920,16 +3932,20 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
 		 (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
 					      Quality,long_quality,quality_length,barcode_length,
 					      invert_second_p,/*copy_acc_p*/false,skipp);
-		 FREE_IN(acc2);
-		 FREE_IN(restofheader2);
+		 if (skipp == false) {
+		   FREE_IN(acc2);
+		   FREE_IN(restofheader2);
+		 }
 	       }
 	    } else {
 	      /* End 2 without quality string */
 	      (*queryseq2) = Shortread_new(/*acc2*/NULL,/*restofheader2*/NULL,filterp,Read2,long_read_2,fulllength2,
 					   /*quality*/NULL,/*long_quality*/NULL,/*quality_length*/0,barcode_length,
 					   invert_second_p,/*copy_acc_p*/false,skipp);
-	      FREE_IN(acc2);
-	      FREE_IN(restofheader2);
+	      if (skipp == false) {
+		FREE_IN(acc2);
+		FREE_IN(restofheader2);
+	      }
 	    }
 	  }
 
@@ -4175,7 +4191,7 @@ read_fastq_filecontents (int *nextchar, T *queryseq2,
 #endif
 			 char ***files, int *nfiles, bool skipp) {
   T queryseq1;
-  int nextchar2 = '\0';
+  int nextchar2 = '\0';		/* Can be anything but EOF */
   char *acc, *restofheader;
   char *long_read_1, *long_read_2, *long_quality;
   int fulllength, quality_length;
@@ -4412,7 +4428,7 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
 			   gzFile *input1, gzFile *input2,
 			   char ***files, int *nfiles, bool skipp) {
   T queryseq1;
-  int nextchar2 = '\0';
+  int nextchar2 = '\0';		/* Can be anything but EOF */
   char *acc, *restofheader;
   char *long_read_1, *long_read_2, *long_quality;
   int fulllength, quality_length;
@@ -4636,7 +4652,7 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
 			    Bzip2_T *input1, Bzip2_T *input2,
 			    char ***files, int *nfiles, bool skipp) {
   T queryseq1;
-  int nextchar2 = '\0';
+  int nextchar2 = '\0';		/* Can be anything but EOF */
   char *acc, *restofheader;
   char *long_read_1, *long_read_2, *long_quality;
   int fulllength, quality_length;
@@ -5204,7 +5220,6 @@ Shortread_print_oneline (Filestring_T fp, T this) {
 
 void
 Shortread_print_oneline_revcomp (Filestring_T fp, T this) {
-  int i = 0;
 
 #if 0
   for (i = this->fulllength-1; i >= 0; --i) {
@@ -5250,7 +5265,6 @@ Shortread_print_chopped_sam (Filestring_T fp, T this, int hardclip_low, int hard
 
 void
 Shortread_print_chopped_revcomp_sam (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
-  int i;
 
 #ifdef PRINT_INDIVIDUAL_CHARS
   FPRINTF(fp,"\t");
@@ -5297,7 +5311,6 @@ Shortread_print_chopped_end (Filestring_T fp, T this, int hardclip_low, int hard
 /* For samprint XH field */
 void
 Shortread_print_chopped_end_revcomp (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
-  int i;
 
   if (hardclip_low > 0) {
 #ifdef PRINT_INDIVIDUAL_CHARS
@@ -5354,7 +5367,6 @@ Shortread_print_chopped_end_quality (Filestring_T fp, T this, int hardclip_low,
 /* For samprint XI field */
 void
 Shortread_print_chopped_end_quality_reverse (Filestring_T fp, T this, int hardclip_low, int hardclip_high) {
-  int i;
 
   if (hardclip_low > 0) {
 #ifdef PRINT_INDIVIDUAL_CHARS
@@ -5392,7 +5404,6 @@ Shortread_print_barcode (Filestring_T fp, T this) {
 
 void
 Shortread_print_chop (Filestring_T fp, T this, bool invertp) {
-  int i;
 
   if (this->chop != NULL) {
     FPRINTF(fp,"\tXP:Z:");
@@ -5516,7 +5527,6 @@ Shortread_print_quality_revcomp (Filestring_T fp, T this, int hardclip_low, int
 
 void
 Shortread_print_oneline_uc (Filestring_T fp, T this) {
-  int i = 0;
 
 #ifdef PRINT_INDIVIDUAL_CHARS
   for (i = 0; i < this->fulllength; i++) {
@@ -5531,7 +5541,6 @@ Shortread_print_oneline_uc (Filestring_T fp, T this) {
 
 void
 Shortread_print_oneline_revcomp_uc (Filestring_T fp, T this) {
-  int i = 0;
 
 #ifdef PRINT_INDIVIDUAL_CHARS
   for (i = this->fulllength-1; i >= 0; --i) {
diff --git a/src/smooth.c b/src/smooth.c
index f87ce26..0825b54 100644
--- a/src/smooth.c
+++ b/src/smooth.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: smooth.c 184474 2016-02-18 00:12:53Z twu $";
+static char rcsid[] = "$Id: smooth.c 202031 2016-12-29 16:20:14Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -550,10 +550,12 @@ bad_intron_p (double donor_prob, double acceptor_prob, int intron_matches_left,
 
 struct Smoothcell_T {
   int exoni;
-  double pvalue;
+  /* double pvalue; */
+  int exonmatches;
   int exonstatus;
 };
 
+#if 0
 static int
 Smoothcell_cmp (const void *x, const void *y) {
   struct Smoothcell_T a = * (struct Smoothcell_T *) x;
@@ -567,6 +569,22 @@ Smoothcell_cmp (const void *x, const void *y) {
     return 0;
   }
 }
+#else
+static int
+Smoothcell_cmp (const void *x, const void *y) {
+  struct Smoothcell_T a = * (struct Smoothcell_T *) x;
+  struct Smoothcell_T b = * (struct Smoothcell_T *) y;
+
+  if (a.exonmatches < b.exonmatches) {
+    return -1;
+  } else if (b.exonmatches < a.exonmatches) {
+    return +1;
+  } else {
+    return 0;
+  }
+}
+#endif
+
 
 
 static void
@@ -590,7 +608,7 @@ find_internal_bads_by_prob (bool *deletep, int *exonstatus, int *exonmatches, in
   for (i = 0; i < nexons; i++) {
     exonstatus[i] = KEEP;
     cells[i].exoni = i;
-    cells[i].pvalue = 1.0;
+    cells[i].exonmatches = exonmatches[i];
     cells[i].exonstatus = KEEP;
   }
   
@@ -603,7 +621,8 @@ find_internal_bads_by_prob (bool *deletep, int *exonstatus, int *exonmatches, in
 				 intron_matches_right[i],intron_denominator_right[i],total_matches,total_denominator);
     debug(printf("For exon %d, left intron bad %d, right intron bad %d\n",i,intron1_bad_p,intron2_bad_p));
 
-    if (intron1_bad_p == true && intron2_bad_p == true) {
+    if (intron1_bad_p == true || intron2_bad_p == true) {
+#if 0
       numerator0 = exonmatches[i];
       denominator0 = exon_denominator[i];
       theta0 = (double) (total_matches - numerator0 + 1)/(double) (total_denominator - denominator0 + 1);
@@ -613,6 +632,11 @@ find_internal_bads_by_prob (bool *deletep, int *exonstatus, int *exonmatches, in
 	cells[i].pvalue = pvalue;
 	cells[i].exonstatus = DELETE;
       }
+#else
+      if (exonmatches[i] < 15) {
+	cells[i].exonstatus = DELETE;
+      }
+#endif
 
     } else {
       /* Do nothing */
@@ -623,8 +647,9 @@ find_internal_bads_by_prob (bool *deletep, int *exonstatus, int *exonmatches, in
 
   qsort(cells,nexons,sizeof(struct Smoothcell_T),Smoothcell_cmp);
   i = 0;
-  while (i < nexons && cells[i].pvalue < STRICT_EXON_PVALUE) {
+  while (i < nexons && cells[i].exonmatches < 15) {
     if (cells[i].exonstatus == DELETE) {
+      debug(printf("  Will delete exon %d\n",i));
       *deletep = true;
       exonstatus[cells[i].exoni] = DELETE;
       exonstatus[cells[i].exoni - 1] = KEEP; /* Prevent consecutive deletes */
diff --git a/src/spanningelt.c b/src/spanningelt.c
index 9d16a5a..f24768c 100644
--- a/src/spanningelt.c
+++ b/src/spanningelt.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: spanningelt.c 184022 2016-02-10 01:52:03Z twu $";
+static char rcsid[] = "$Id: spanningelt.c 205969 2017-05-04 00:50:24Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -6,9 +6,11 @@ static char rcsid[] = "$Id: spanningelt.c 184022 2016-02-10 01:52:03Z twu $";
 #include "spanningelt.h"
 #include <stdlib.h>
 #include <math.h>		/* For qsort */
+#include "assert.h"
 #include "mem.h"
 #include "indexdbdef.h"
 
+
 #ifdef WORDS_BIGENDIAN
 #include "bigendian.h"
 #endif
@@ -83,7 +85,7 @@ void
 Spanningelt_gc (T old) {
 
   if (old->intersection_diagonals_reset != NULL) {
-    FREE(old->intersection_diagonals_reset);
+    FREE_ALIGN(old->intersection_diagonals_reset);
   }
   if (old->compoundpos != NULL) {
     Compoundpos_free(&(old->compoundpos));
@@ -687,7 +689,8 @@ compute_intersection (int *ndiagonals,
 #else
   Univcoord_T *positions0, *positions1;
 #endif
-  int npositions0, npositions1, delta, j, diagterm;
+  int npositions0, npositions1, delta, j, diagterm, i;
+
 
   if (npositionsa < npositionsb) {
 #ifdef LARGE_GENOMES
@@ -727,8 +730,10 @@ compute_intersection (int *ndiagonals,
     debug(printf("intersection is null\n"));
     return (Univcoord_T *) NULL;
   } else {
-    /* Allocate maximum possible size */
-    diagonals = (Univcoord_T *) CALLOC(npositions0,sizeof(Univcoord_T));
+    /* Note: This has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */
+    diagonals = (Univcoord_T *) MALLOC_ALIGN(npositions0 * sizeof(Univcoord_T));
+    /* Previously, allocated maximum possible size */
+    /* diagonals = (Univcoord_T *) CALLOC(npositions0,sizeof(Univcoord_T)); */
   }
 
   while (npositions0 > 0) {
@@ -790,6 +795,7 @@ compute_intersection (int *ndiagonals,
 #ifdef LARGE_GENOMES
     if (npositions1 <= 0) {
       return diagonals;
+
     } else if ((((Univcoord_T) *positions1_high) << 32) + (*positions1_low) == local_goal) {
       /* Found local goal.  Save and advance */
       debug(printf("    intersection list 1: %d:%u  found\n",
@@ -806,6 +812,7 @@ compute_intersection (int *ndiagonals,
 #elif defined(WORDS_BIGENDIAN)
     if (npositions1 <= 0) {
       return diagonals;
+
     } else if (Bigendian_convert_univcoord(*positions1) == local_goal) {
       /* Found local goal.  Save and advance */
       debug(printf("    intersection list 1: %d:%u  found\n",
@@ -821,6 +828,7 @@ compute_intersection (int *ndiagonals,
 #else
     if (npositions1 <= 0) {
       return diagonals;
+
     } else if ((*positions1) == local_goal) {
       /* Found local goal.  Save and advance */
       debug(printf("    intersection list 1: %d:%u  found\n",npositions1,*positions1));
@@ -857,7 +865,7 @@ compoundpos_intersect (int *ndiagonals,
 #endif
 		       int diagterm0, int npositions0, Compoundpos_T compoundpos, int diagterm1) {
   Univcoord_T *diagonals, local_goal, last_local_goal;
-  int delta;
+  int delta, i;
   bool emptyp;
 
   delta = diagterm0 - diagterm1; /* list0 + (diagterm0 - diagterm1) = list1 */
@@ -866,8 +874,9 @@ compoundpos_intersect (int *ndiagonals,
   if (npositions0 == 0) {
     return (Univcoord_T *) NULL;
   } else {
-  /* Could add up compoundpos->npositions to see if we could allocate less memory */
-    diagonals = (Univcoord_T *) CALLOC(npositions0,sizeof(Univcoord_T));
+    /* Could add up compoundpos->npositions to see if we could allocate less memory */
+    /* Note: This has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */
+    diagonals = (Univcoord_T *) MALLOC_ALIGN(npositions0 * sizeof(Univcoord_T));
   }
   
   last_local_goal = 0U;
@@ -917,7 +926,7 @@ check_diagonals (Univcoord_T *diagonals, int ndiagonals) {
   for (i = 0; i < ndiagonals; i++) {
     if (diagonals[i] == last_diagonal) {
       fprintf(stderr,"Saw repeat of %u\n",diagonals[i]);
-      exit(9);
+      abort();
     } else {
       last_diagonal = diagonals[i];
     }
@@ -951,6 +960,8 @@ Spanningelt_diagonals (int *ndiagonals, T this, int *miss_querypos5, int *miss_q
     /* Previously computed a result */
     *ndiagonals = this->intersection_ndiagonals;
     check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals));
+    debug(printf("Returning previous result\n"));
+    CHECK_ALIGN(this->intersection_diagonals);
     return this->intersection_diagonals;
 
   } else if (this->partnerp == false) {
@@ -961,7 +972,8 @@ Spanningelt_diagonals (int *ndiagonals, T this, int *miss_querypos5, int *miss_q
 	this->intersection_diagonals = (Univcoord_T *) NULL;
 	*ndiagonals = this->intersection_ndiagonals = 0;
       } else {
-	q = this->intersection_diagonals = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T));
+	/* Note: This has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */
+	q = this->intersection_diagonals = (Univcoord_T *) MALLOC_ALIGN(this->npositions * sizeof(Univcoord_T));
 #ifdef LARGE_GENOMES
 	p_high = this->positions_high;
 	p_low = this->positions_low;
@@ -1003,6 +1015,7 @@ Spanningelt_diagonals (int *ndiagonals, T this, int *miss_querypos5, int *miss_q
 
       debug(printf("Returning %p (%d diagonals)\n",this->intersection_diagonals,this->intersection_ndiagonals));
       check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals));
+      CHECK_ALIGN(this->intersection_diagonals);
       return this->intersection_diagonals;
 
     } else {
@@ -1016,6 +1029,7 @@ Spanningelt_diagonals (int *ndiagonals, T this, int *miss_querypos5, int *miss_q
       this->intersection_ndiagonals_reset = this->intersection_ndiagonals;
 
       check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals));
+      CHECK_ALIGN(this->intersection_diagonals);
       return this->intersection_diagonals;
     }
 
@@ -1040,6 +1054,7 @@ Spanningelt_diagonals (int *ndiagonals, T this, int *miss_querypos5, int *miss_q
       this->intersection_ndiagonals_reset = this->intersection_ndiagonals;
 
       check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals));
+      CHECK_ALIGN(this->intersection_diagonals);
       return this->intersection_diagonals;
 
     } else {
@@ -1060,6 +1075,7 @@ Spanningelt_diagonals (int *ndiagonals, T this, int *miss_querypos5, int *miss_q
       this->intersection_ndiagonals_reset = this->intersection_ndiagonals;
 
       check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals));
+      CHECK_ALIGN(this->intersection_diagonals);
       return this->intersection_diagonals;
     }      
   }
diff --git a/src/spanningelt.h b/src/spanningelt.h
index f400e14..44f5bea 100644
--- a/src/spanningelt.h
+++ b/src/spanningelt.h
@@ -1,4 +1,4 @@
-/* $Id: spanningelt.h 180341 2015-12-07 18:29:40Z twu $ */
+/* $Id: spanningelt.h 205968 2017-05-04 00:50:05Z twu $ */
 #ifndef SPANNINGELT_INCLUDED
 #define SPANNINGELT_INCLUDED
 
@@ -19,7 +19,7 @@ struct T {
   int partner_querypos;		/* for debugging */
   int querypos;			/* for debugging */
 
-  /* Intersectionr results are in native format, not littleendian */
+  /* Intersection results are in native format, not littleendian */
   Univcoord_T *intersection_diagonals;
   int intersection_ndiagonals;
 
@@ -52,7 +52,7 @@ struct T {
   int miss_querypos5; /* If partnerp is true, this is the overlap of the two partners */
   int miss_querypos3;
 
-  /* Reset values */
+  /* Reset values.  Needed because stage1hr procedures modify the pointer and number fields */
   Univcoord_T *intersection_diagonals_reset;
   int intersection_ndiagonals_reset;
 #ifdef LARGE_GENOMES
diff --git a/src/stage1hr.c b/src/stage1hr.c
index 7178440..790be8c 100644
--- a/src/stage1hr.c
+++ b/src/stage1hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage1hr.c 199517 2016-10-24 23:55:08Z twu $";
+static char rcsid[] = "$Id: stage1hr.c 209122 2017-08-15 19:29:33Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -41,10 +41,11 @@ static char rcsid[] = "$Id: stage1hr.c 199517 2016-10-24 23:55:08Z twu $";
 #include "uint8list.h"
 #else
 #include "uintlist.h"
-#include "sarray-read.h"
+#include "sarray-search.h"
 #endif
 
 #include "spanningelt.h"
+#include "merge.h"
 #include "cmet.h"
 #include "atoi.h"
 
@@ -65,7 +66,21 @@ static char rcsid[] = "$Id: stage1hr.c 199517 2016-10-24 23:55:08Z twu $";
 #endif
 #endif
 
+
+/* Three methods for performing a multiway merge.  Need to define one below. */
+#if defined(LARGE_GENOMES) || !defined(HAVE_SSE4_1)
 #define USE_HEAP 1
+/* #define USE_LOSER_TREES 1 */
+#else
+#define USE_MERGE 1
+/* #define USE_HEAP 1 */
+#endif
+
+
+#ifdef USE_HEAP
+#include "merge-heap.h"
+#endif
+
 
 #define SPEED 1
 
@@ -101,6 +116,7 @@ static char rcsid[] = "$Id: stage1hr.c 199517 2016-10-24 23:55:08Z twu $";
 
 static bool use_sarray_p = true;
 static bool use_only_sarray_p = true;
+static bool require_completeset_p = false;
 
 
 /* Mode */
@@ -129,6 +145,9 @@ static bool find_dna_chimeras_p;
 static bool distances_observed_p;
 
 static Chrpos_T min_intronlength;
+static Chrpos_T expected_pairlength;
+static Chrpos_T pairlength_deviation;
+
 
 /* Splicing */
 static Univcoord_T *splicesites;
@@ -144,7 +163,6 @@ static int min_distantsplicing_identity;
 static bool gmap_segments_p;	/* previously called gmap_terminal_p.  Should move earlier (1). */
 static bool gmap_pairsearch_p;	/* controls halfmapping.  Should move later (2). */
 static bool gmap_improvement_p;	/* Should be at end (3). */
-static bool gmap_indel_knownsplice_p;
 static bool gmap_rerun_p = true;
 
 static int antistranded_penalty;
@@ -171,7 +189,9 @@ static int max_floors_readlength;
 
 /* Originally allowed only 1, to print only unique translocations.
    But need to allow enough to avoid missing some translocations. */
-#define MAXCHIMERAPATHS 100
+/* For transcript splicing, need to increase MAXCHIMERAPATHS */
+/* #define MAXCHIMERAPATHS 100 */
+#define MAXCHIMERAPATHS 10000
 
 #define NREQUIRED_FAST 2	/* For candidate generation using
 				   multimiss.  A value of 2 implies 
@@ -1679,7 +1699,7 @@ make_complement_inplace (char *sequence, unsigned int length) {
 typedef struct Batch_T *Batch_T;
 
 struct Batch_T {
-#ifndef USE_HEAP
+#ifdef USE_LOSER_TREES
   int nodei;			/* Node in loser tree.  Also used for debugging */
 #endif
   int querypos;
@@ -1706,12 +1726,12 @@ Batch_init (Batch_T batch, int querypos, int diagterm,
 	    Univcoord_T *positions,
 #endif
 	    int npositions, int querylength
-#ifndef USE_HEAP
+#ifdef USE_LOSER_TREES
 	    , int nodei
 #endif
 	    ) {
 
-#ifndef USE_HEAP
+#ifdef USE_LOSER_TREES
   batch->nodei = nodei;
 #endif
   batch->querypos = querypos;
@@ -1760,23 +1780,24 @@ Batch_init (Batch_T batch, int querypos, int diagterm,
 }
 
 
+#if defined(USE_HEAP) || defined(USE_LOSER_TREES)
 static void
 Batch_init_simple (Batch_T batch, Univcoord_T *diagonals, int ndiagonals, int querylength,
-#ifdef USE_HEAP
+#ifndef USE_LOSER_TREES
 		   int querypos
 #else
 		   int nodei
 #endif
 		   ) {
 
-#ifdef USE_HEAP
+#ifndef USE_LOSER_TREES
   batch->querypos = querypos;
 #else
   batch->nodei = nodei;
 #endif
   batch->positions = diagonals;
   batch->npositions = ndiagonals;
-#ifdef USE_HEAP
+#ifndef USE_LOSER_TREES
   batch->diagonal = *diagonals;	/* Already in correct endianness */
 #else
   if (batch->npositions == 0) {
@@ -1798,6 +1819,7 @@ Batch_init_simple (Batch_T batch, Univcoord_T *diagonals, int ndiagonals, int qu
 
   return;
 }
+#endif
 
 
 static void
@@ -1833,6 +1855,7 @@ min_heap_insert (Batch_T *heap, int *heapsize, Batch_T batch) {
 }
 
 
+#if defined(USE_HEAP) || defined(USE_LOSER_TREES)
 static void
 min_heap_insert_simple (Batch_T *heap, int *heapsize, Batch_T batch) {
   int i;
@@ -1848,7 +1871,7 @@ min_heap_insert_simple (Batch_T *heap, int *heapsize, Batch_T batch) {
 
   return;
 }
-
+#endif
 
 
 /* Note FORMULA: formulas for querypos <-> diagonal (diagterm in call to Indexdb_read) are:
@@ -1910,7 +1933,9 @@ report_perfect_segment (int *found_score, int *nhits, List_T hits, Univcoord_T l
       }
     }
 
-  } else {
+  } else if ((nmismatches = Genome_count_mismatches_limit(query_compress,left,/*pos5*/0,/*pos3*/querylength,
+							  /*max_mismatches_allowed*/nmisses_allowed,
+							  plusp,genestrand)) == 0) {
     /* mode == STANDARD && spansize == index1part */
     if ((hit = Stage3end_new_exact(&(*found_score),left,/*genomiclength*/querylength,
 				   query_compress,plusp,genestrand,
@@ -1920,6 +1945,19 @@ report_perfect_segment (int *found_score, int *nhits, List_T hits, Univcoord_T l
       *nhits += 1;
       return List_push(hits,(void *) hit);
     }
+
+  } else {
+    /* mode == STANDARD && spansize == index1part */
+    /* Appeared to be perfect, but isn't */
+    if ((hit = Stage3end_new_substitution(&(*found_score),nmismatches,left,/*genomiclength*/querylength,
+					  query_compress,plusp,genestrand,
+					  chrnum,chroffset,chrhigh,chrlength,
+					  /*sarrayp*/false)) == NULL) {
+      return hits;
+    } else {
+      *nhits += 1;
+      return List_push(hits,(void *) hit);
+    }
   }
 }
 
@@ -2146,7 +2184,8 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
 	  if (elt->miss_querypos3 > miss_querypos3) miss_querypos3 = elt->miss_querypos3;
 	  /* continue; -- naturally falls to end of loop */
 	}
-      } else if (*elt->intersection_diagonals > local_goal) {
+
+      } else if (*elt->intersection_diagonals > goal) { /* was local_goal */
 	/* Already advanced past goal, so continue with one more miss seen. */
 	debug7(printf(" one miss --"));
 	if (++nmisses_seen > nmisses_allowed) {
@@ -2158,6 +2197,7 @@ identify_multimiss_iter (int *found_score, Chrnum_T *chrnum, Univcoord_T *chroff
 	  if (elt->miss_querypos3 > miss_querypos3) miss_querypos3 = elt->miss_querypos3;
 	  /* continue; -- naturally falls to end of loop */
 	}
+
       } else {
 	/* Found goal.  Advance past goal and continue with loop. */
 	debug7(printf(" advancing\n"));
@@ -3316,7 +3356,249 @@ init_tree (Batch_T *losers, int heapsize) {
 }
 
 
-#ifdef USE_HEAP
+#ifdef USE_MERGE
+static List_T
+find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T this, int genestrand,
+				 int nrequired, int querylength, Compress_T query_compress_fwd, Compress_T query_compress_rev,
+				 int nmisses_allowed) {
+  Univcoord_T *diagonals, *all_diagonals_merged, *all_diagonals, diagonal, new_diagonal;
+  Spanningelt_T *array;
+  List_T prev;
+  int nunion = nmisses_allowed + nrequired, nelts, elti, nstreams;
+  int count, mod, i;
+  int ndiagonals, nempty, n_all_diagonals;
+  int global_miss_querypos5, global_miss_querypos3;
+  int elt_miss_querypos5, elt_miss_querypos3;
+  List_T stream_list;
+  Intlist_T streamsize_list;
+  Univcoord_T chroffset, chrhigh;
+  Chrpos_T chrlength;
+  Chrnum_T chrnum;
+
+  debug(printf("Starting find_spanning_multimiss_matches with %d misses allowed\n",nmisses_allowed));
+
+
+  /* Plus */
+  for (mod = 0; mod < index1interval; mod++) {
+    array = this->plus_spanningset[mod];
+    nelts = this->plus_spanningset_nelts[mod];
+    debug(printf("Multimiss plus mod %d, nelts %d\n",mod,nelts));
+
+    qsort(array,nelts,sizeof(Spanningelt_T),Spanningelt_candidates_cmp);
+    if (nelts > nunion) {
+      qsort(&(array[nunion]),nelts-nunion,sizeof(Spanningelt_T),Spanningelt_pruning_cmp);
+    }
+    for (elti = 0; elti < nelts; elti++) {
+      Spanningelt_reset(array[elti]);
+    }
+
+    debug(printf("*** find_spanning_multimiss_matches, %d misses allowed, plus mod %d\n",nmisses_allowed,mod));
+    debug(Spanningelt_print_array(array,nelts));
+
+    /* Put first few pointers into heap */
+    global_miss_querypos5 = querylength;
+    global_miss_querypos3 = 0;
+    stream_list = (List_T) NULL;
+    streamsize_list = (Intlist_T) NULL;
+    nstreams = 0;
+    for (elti = 0; elti < nelts && elti < nunion; elti++) {
+      /* Get list as a special one, and perform conversion if necessary */
+      diagonals = Spanningelt_diagonals(&ndiagonals,(Spanningelt_T) array[elti],&elt_miss_querypos5,&elt_miss_querypos3);
+      /* Note: diagonals has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */
+      if (elt_miss_querypos5 < global_miss_querypos5) global_miss_querypos5 = elt_miss_querypos5;
+      if (elt_miss_querypos3 > global_miss_querypos3) global_miss_querypos3 = elt_miss_querypos3;
+
+      debug(printf("Adding plus batch/stream %d of size %d...",elti,ndiagonals));
+      if (ndiagonals > 0) {
+#ifdef DEBUG
+	for (i = 0; i < ndiagonals; i++) {
+	  printf("%u\n",diagonals[i]);
+	}
+#endif
+	stream_list = List_push(stream_list,(void *) diagonals);
+	streamsize_list = Intlist_push(streamsize_list,ndiagonals);
+	nstreams++;
+      }
+      debug(printf("\n"));
+    }
+
+    all_diagonals_merged = Merge_diagonals(&n_all_diagonals,stream_list,streamsize_list);
+#ifdef DEBUG7
+    for (i = 0; i < n_all_diagonals; i++) {
+      printf("DIAGONAL %u\n",all_diagonals_merged[i]);
+    }
+#endif
+    
+    /* Skip diagonals at beginning of genome */
+    all_diagonals = all_diagonals_merged;
+    while (n_all_diagonals > 0 && *all_diagonals < (unsigned int) querylength) {
+      debug11(printf("Eliminating diagonal %llu as straddling beginning of genome (Batch_init)\n",
+		     (unsigned long long) *all_diagonals));
+      all_diagonals++;
+      n_all_diagonals--;
+    }
+
+    /* Process sorted diagonals */
+    if (n_all_diagonals > 0) {
+      prev = (struct List_T *) MALLOCA((nelts - elti + 1) * sizeof(struct List_T));
+      List_fill_array_with_handle(prev,(void *) &(array[elti]),nelts - elti);
+
+      nempty = 0;
+      chrhigh = 0U;
+
+      debug7(printf("*** multimiss mod %d plus:\n",mod));
+      diagonal = all_diagonals[0];
+      count = 1;
+      debug7(printf("at ??, initial diagonal is %llu\n",(unsigned long long) diagonal));
+
+      i = 1;
+      while (i < n_all_diagonals && *nhits <= maxpaths_search) {
+	if ((new_diagonal = all_diagonals[i++]) == diagonal) {
+	  count++;
+	  debug7(printf("at ??, incrementing diagonal %llu to count %d\n",(unsigned long long) diagonal,count));
+	} else {
+	  /* End of diagonal */
+	  if (count >= nrequired) {
+	    hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal,
+					   prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
+					   querylength,/*query_compress*/query_compress_fwd,
+					   /*plusp*/true,genestrand,nmisses_allowed,
+					   /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3);
+	  }
+	  diagonal = new_diagonal;
+	  count = 1;
+	  debug7(printf("at ??, next diagonal is %llu\n",(unsigned long long) diagonal));
+	}
+      }
+
+      /* Terminate loop */
+      if (count >= nrequired && *nhits <= maxpaths_search) {
+	hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal,
+				       prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
+				       querylength,/*query_compress*/query_compress_fwd,
+				       /*plusp*/true,genestrand,nmisses_allowed,
+				       /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3);
+      }
+
+      FREEA(prev);
+    }
+
+    /* Previously had an exception for nstreams == 1 */
+    FREE_ALIGN(all_diagonals_merged);
+  }
+
+  /* Minus */
+  for (mod = 0; mod < index1interval; mod++) {
+    array = this->minus_spanningset[mod];
+    nelts = this->minus_spanningset_nelts[mod];
+    debug(printf("Multimiss minus mod %d, nelts %d\n",mod,nelts));
+
+    qsort(array,nelts,sizeof(Spanningelt_T),Spanningelt_candidates_cmp);
+    if (nelts > nunion) {
+      qsort(&(array[nunion]),nelts-nunion,sizeof(Spanningelt_T),Spanningelt_pruning_cmp);
+    }
+    for (elti = 0; elti < nelts; elti++) {
+      Spanningelt_reset(array[elti]);
+    }
+
+    debug(printf("*** find_spanning_multimiss_matches, %d misses_allowed, minus mod %d\n",nmisses_allowed,mod));
+    debug(Spanningelt_print_array(array,nelts));
+
+    /* Put first few pointers into heap */
+    global_miss_querypos5 = querylength;
+    global_miss_querypos3 = 0;
+    stream_list = (List_T) NULL;
+    streamsize_list = (Intlist_T) NULL;
+    nstreams = 0;
+    for (elti = 0; elti < nelts && elti < nunion; elti++) {
+      /* Get list as a special one, and perform conversion if necessary */
+      diagonals = Spanningelt_diagonals(&ndiagonals,(Spanningelt_T) array[elti],&elt_miss_querypos5,&elt_miss_querypos3);
+      if (elt_miss_querypos5 < global_miss_querypos5) global_miss_querypos5 = elt_miss_querypos5;
+      if (elt_miss_querypos3 > global_miss_querypos3) global_miss_querypos3 = elt_miss_querypos3;
+
+      debug(printf("Adding minus batch/stream %d of size %d...",elti,ndiagonals));
+      if (ndiagonals > 0) {
+#ifdef DEBUG
+	for (i = 0; i < ndiagonals; i++) {
+	  printf("%u\n",diagonals[i]);
+	}
+#endif
+	stream_list = List_push(stream_list,(void *) diagonals);
+	streamsize_list = Intlist_push(streamsize_list,ndiagonals);
+	nstreams++;
+      }
+      debug(printf("\n"));
+    }
+
+    all_diagonals_merged = Merge_diagonals(&n_all_diagonals,stream_list,streamsize_list);
+#ifdef DEBUG7
+    for (i = 0; i < n_all_diagonals; i++) {
+      printf("DIAGONAL %u\n",all_diagonals_merged[i]);
+    }
+#endif
+
+    /* Skip diagonals at beginning of genome */
+    all_diagonals = all_diagonals_merged;
+    while (n_all_diagonals > 0 && *all_diagonals < (unsigned int) querylength) {
+      debug11(printf("Eliminating diagonal %llu as straddling beginning of genome (Batch_init)\n",
+		     (unsigned long long) *all_diagonals));
+      all_diagonals++;
+      n_all_diagonals--;
+    }
+
+    /* Process sorted diagonals */
+    if (n_all_diagonals > 0) {
+      prev = (struct List_T *) MALLOCA((nelts - elti + 1) * sizeof(struct List_T));
+      List_fill_array_with_handle(prev,(void *) &(array[elti]),nelts - elti);
+
+      nempty = 0;
+      chrhigh = 0U;
+
+      debug7(printf("*** multimiss mod %d minus:\n",mod));
+      diagonal = all_diagonals[0];
+      count = 1;
+      debug7(printf("at ??, initial diagonal is %llu\n",(unsigned long long) diagonal));
+
+      i = 1;
+      while (i < n_all_diagonals && *nhits <= maxpaths_search) {
+	if ((new_diagonal = all_diagonals[i++]) == diagonal) {
+	  count++;
+	  debug7(printf("at ??, incrementing diagonal %llu to count %d\n",(unsigned long long) diagonal,count));
+	} else {
+	  /* End of diagonal */
+	  if (count >= nrequired) {
+	    hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal,
+					   prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
+					   querylength,/*query_compress*/query_compress_rev,
+					   /*plusp*/false,genestrand,nmisses_allowed,
+					   /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3);
+	  }
+	  diagonal = new_diagonal;
+	  count = 1;
+	  debug7(printf("at ??, next diagonal is %llu\n",(unsigned long long) diagonal));
+	}
+      }
+
+      /* Terminate loop */
+      if (count >= nrequired && *nhits <= maxpaths_search) {
+	hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal,
+				       prev,&nempty,&global_miss_querypos5,&global_miss_querypos3,
+				       querylength,/*query_compress*/query_compress_rev,
+				       /*plusp*/false,genestrand,nmisses_allowed,
+				       /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3);
+      }
+
+      FREEA(prev);
+    }
+
+    /* Previously had an exception for nstreams == 1 */
+    FREE_ALIGN(all_diagonals_merged);
+  }
+
+  return hits;
+}
+
+#elif defined(USE_HEAP)
 static List_T
 find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T this, int genestrand,
 				 int nrequired, int querylength, Compress_T query_compress_fwd, Compress_T query_compress_rev,
@@ -3378,11 +3660,15 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
       if (elt_miss_querypos3 > global_miss_querypos3) global_miss_querypos3 = elt_miss_querypos3;
 
       batch = &(batchpool[elti]);
-      debug(printf("Adding batch %d of size %d...",elti,ndiagonals));
+      debug(printf("Adding plus batch/stream %d of size %d...",elti,ndiagonals));
       if (ndiagonals > 0) {
+#ifdef DEBUG
+	for (i = 0; i < ndiagonals; i++) {
+	  printf("%u\n",diagonals[i]);
+	}
+#endif
 	Batch_init_simple(batch,diagonals,ndiagonals,querylength,/*querypos*/elti);
 	if (batch->npositions > 0) {
-	  debug(printf("inserting into heap"));
 	  min_heap_insert_simple(heap,&heapsize,batch);
 	}
       }
@@ -3409,7 +3695,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
       batch = heap[1];
       diagonal = batch->diagonal;
       count = 1;
-      debug7(printf("at #%d, initial diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
+      debug7(printf("at #%d, initial diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
 
       /* Update batch */
       if (--batch->npositions <= 0) {
@@ -3451,7 +3737,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
 	if (batch->diagonal == diagonal) {
 	  count++;
 	  debug7(printf("at #%d, incrementing diagonal %llu to count %d\n",
-			batch->nodei,(unsigned long long) diagonal,count));
+			batch->querypos,(unsigned long long) diagonal,count));
 	} else {
 	  /* End of diagonal */
 	  if (count >= nrequired) {
@@ -3464,7 +3750,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
 	  }
 	  diagonal = batch->diagonal;
 	  count = 1;
-	  debug7(printf("at #%d, next diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
+	  debug7(printf("at #%d, next diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
 	}
 
 	/* Update batch */
@@ -3541,11 +3827,15 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
       if (elt_miss_querypos3 > global_miss_querypos3) global_miss_querypos3 = elt_miss_querypos3;
 
       batch = &(batchpool[elti]);
-      debug(printf("Adding batch %d of size %d...",elti,ndiagonals));
+      debug(printf("Adding minus batch/stream %d of size %d...",elti,ndiagonals));
       if (ndiagonals > 0) {
+#ifdef DEBUG
+	for (i = 0; i < ndiagonals; i++) {
+	  printf("%u\n",diagonals[i]);
+	}
+#endif
 	Batch_init_simple(batch,diagonals,ndiagonals,querylength,/*querypos*/elti);
 	if (batch->npositions > 0) {
-	  debug(printf("inserting into heap"));
 	  min_heap_insert_simple(heap,&heapsize,batch);
 	}
       }
@@ -3573,7 +3863,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
       batch = heap[1];
       diagonal = batch->diagonal;
       count = 1;
-      debug7(printf("at #%d, initial diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
+      debug7(printf("at #%d, initial diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
 
       /* Update batch */
       if (--batch->npositions <= 0) {
@@ -3615,7 +3905,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
 	if (batch->diagonal == diagonal) {
 	  count++;
 	  debug7(printf("at #%d, incrementing diagonal %llu to count %d\n",
-			batch->nodei,(unsigned long long) diagonal,count));
+			batch->querypos,(unsigned long long) diagonal,count));
 	} else {
 	  /* End of diagonal */
 	  if (count >= nrequired) {
@@ -3627,7 +3917,7 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
 	  }
 	  diagonal = batch->diagonal;
 	  count = 1;
-	  debug7(printf("at #%d, next diagonal is %llu\n",batch->nodei,(unsigned long long) diagonal));
+	  debug7(printf("at #%d, next diagonal is %llu\n",batch->querypos,(unsigned long long) diagonal));
 	}
 
 	/* Update batch */
@@ -3681,10 +3971,9 @@ find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T th
 
   return hits;
 }
-#endif
 
-#ifndef USE_HEAP
-/* Uses a loser tree */
+#elif defined(USE_LOSER_TREES)
+
 static List_T
 find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T this, int genestrand,
 				 int nrequired, int querylength, Compress_T query_compress_fwd, Compress_T query_compress_rev,
@@ -4053,6 +4342,7 @@ find_complete_mm (int *found_score, int *nhits, List_T hits, Segment_T *anchor_s
     segmenti = *p;
     assert(segmenti->diagonal != (Univcoord_T) -1);
     if (segmenti->floor <= max_mismatches_allowed) {
+      assert(segmenti->diagonal >= (Univcoord_T) querylength);  /* identify_all_segments should have performed filtering */
       left = segmenti->diagonal - querylength;
       nmismatches = Genome_count_mismatches_limit(query_compress,left,/*pos5*/0,/*pos3*/querylength,
 						  max_mismatches_allowed,plusp,genestrand);
@@ -4074,36 +4364,1073 @@ find_complete_mm (int *found_score, int *nhits, List_T hits, Segment_T *anchor_s
 }
 
 
-#ifdef USE_HEAP
-/* TODO: Change spliceable to be an attribute of the segment.  Then we
-   can loop over anchor_segments only */
-static struct Segment_T *
-identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchors,
-		       Segment_T **spliceable, int *nspliceable,
-#ifdef LARGE_GENOMES
-		       unsigned char **positions_high, UINT4 **positions_low,
-#else
-		       Univcoord_T **positions,
-#endif
-		       int *npositions, bool *omitted, int querylength, int query_lastpos,
-		       Floors_T floors, bool plusp) {
-  struct Segment_T *segments = NULL;
-  Segment_T *all_segments, *ptr_all, *ptr_anchor, *dest, *src;
-  int length_threshold;
-  int n_all_segments, n;
-  int nanchors_bymod[MAX_INDEX1INTERVAL], naccept_bymod[MAX_INDEX1INTERVAL];
-  int mod;
-  int k;
+#if 0
+/* Modified from pair_up_concordant_aux in stage3hr.c */
+static void
+pair_up_segments (struct Segment_T *plus_segments_5, int plus_nsegments_5,
+		  struct Segment_T *minus_segments_5, int minus_nsegments_5,
+		  struct Segment_T *plus_segments_3, int plus_nsegments_3,
+		  struct Segment_T *minus_segments_3, int minus_nsegments_3,
+		  int querylength5, int querylength3, Chrpos_T pairmax) {
+  int i, j;
+  Univcoord_T insert_start;
+  Segment_T segment5, segment3;	/* Need pointers, because we are changing the pairable value */
 
-  struct Batch_T *batchpool;
-  struct Batch_T sentinel_struct;
-  Batch_T *heap, sentinel;
-  int smallesti, righti;
-  Batch_T batch;
-  int heapsize = 0;
-  int parenti, i;
-  int querypos, first_querypos, last_querypos;
-  int floor_left, floor_right, floor_incr;
+  debug(printf("Entered pair_up_segments\n"));
+
+  /* plus/plus */
+  j = 0;
+  for (i = 0; i < plus_nsegments_5; i++) {
+    segment5 = &(plus_segments_5[i]);
+    if ((insert_start = segment5->diagonal) == (Univcoord_T) -1) {
+      /* Skip chromosomal end marker */
+    } else {
+#ifdef DEBUG5
+      printf("plus/plus: i=%d/%d %u %d..%d\n",
+	     i,plus_nsegments_5,segment5->diagonal,segment5->querypos5,segment5->querypos3);
+      if (j >= plus_nsegments_3) {
+	printf("  current: j=%d/%d\n",j,plus_nsegments_3);
+      } else if (plus_segments_3[j].diagonal == (Univcoord_T) -1) {
+	printf("  current: j=%d/%d %u\n",j,plus_nsegments_3,plus_segments_3[j].diagonal);
+      } else {
+	printf("  current: j=%d/%d %u %d..%d\n",
+	       j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3);
+      }
+#endif
+
+      /* Get to correct chrnum */
+      while (j < plus_nsegments_3 && (plus_segments_3[j].diagonal == (Univcoord_T) -1 || plus_segments_3[j].diagonal < segment5->diagonal)) {
+#ifdef DEBUG5
+	if (plus_segments_3[j].diagonal == (Univcoord_T) -1) {
+	  printf("  advancing: j=%d/%d %u\n",j,plus_nsegments_3,plus_segments_3[j].diagonal);
+	} else {
+	  printf("  advancing: j=%d/%d %u %d..%d\n",
+		 j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3);
+	}
+#endif
+	j++;
+      }
+
+      if (j < plus_nsegments_3) {
+	while (j >= 0 && plus_segments_3[j].diagonal != (Univcoord_T) -1 && plus_segments_3[j].diagonal > segment5->diagonal) {
+	  debug5(printf("  backup: j=%d/%d %u %d..%d\n",
+			j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3));
+	  j--;
+	}
+	j++;		/* Finish backup */
+
+	/* Cannot perform arithmetic on diagonal, because we want to preserve -1 as being the largest value */
+	/* Ignore inclusion of querylength inside pairmax */
+	while (j < plus_nsegments_3 && plus_segments_3[j].diagonal <= insert_start + pairmax /*- querylength3*/) {
+	  debug5(printf("  overlap: j=%d/%d, %u <= %u + %u, %d..%d\n",
+			j,plus_nsegments_3,plus_segments_3[j].diagonal,
+			insert_start,pairmax,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3));
+	  debug5(printf("Setting plus segments %d and %d to be pairable: %u and %u\n",i,j,segment5->diagonal,plus_segments_3[j].diagonal));
+	  segment5->pairablep = true;
+	  plus_segments_3[j].pairablep = true;
+	  j++;
+	}
+      }
+    }
+  }
+		
+  /* minus/minus */
+  j = 0;
+  for (i = 0; i < minus_nsegments_3; i++) {
+    segment3 = &(minus_segments_3[i]);
+    if ((insert_start = segment3->diagonal) == (Univcoord_T) -1) {
+      /* Skip chromosomal end marker */
+    } else {
+#ifdef DEBUG5
+      printf("minus/minus: i=%d/%d %u %d..%d\n",
+	     i,minus_nsegments_3,segment3->diagonal,segment3->querypos5,segment3->querypos3);
+      if (j >= minus_nsegments_5) {
+	printf("  current: j=%d/%d\n",j,minus_nsegments_5);
+      } else if (minus_segments_5[j].diagonal == (Univcoord_T) -1) {
+	printf("  current: j=%d/%d %u\n",j,minus_nsegments_5,minus_segments_5[j].diagonal);
+      } else {
+	printf("  current: j=%d/%d %u %d..%d\n",
+	       j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3);
+      }
+#endif
+      
+      /* Get to correct chrnum */
+      while (j < minus_nsegments_5 && (minus_segments_5[j].diagonal == (Univcoord_T) -1 || minus_segments_5[j].diagonal < segment3->diagonal)) {
+#ifdef DEBUG5
+	if (minus_segments_5[j].diagonal == (Univcoord_T) -1) {
+	  printf("  advancing: j=%d/%d %u\n",j,minus_nsegments_5,minus_segments_5[j].diagonal);
+	} else {
+	  printf("  advancing: j=%d/%d %u %d..%d\n",
+		 j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3);
+	}
+#endif
+	j++;
+      }
+
+      if (j < minus_nsegments_5) {
+	while (j >= 0 && minus_segments_5[j].diagonal != (Univcoord_T) -1 && minus_segments_5[j].diagonal > segment3->diagonal) {
+	  debug5(printf("  backup: j=%d/%d %u %d..%d\n",
+			j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3));
+	  j--;
+	}
+	j++;		/* Finish backup */
+
+	/* Cannot perform arithmetic on diagonal, because we want to preserve -1 as being the largest value */
+	/* Ignore inclusion of querylength inside pairmax */
+	while (j < minus_nsegments_5 && minus_segments_5[j].diagonal <= insert_start + pairmax /*- querylength5*/) {
+	  debug5(printf("  overlap: j=%d/%d %u %d..%d\n",
+			j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3));
+	  debug5(printf("Setting minus segments %d and %d to be pairable: %u and %u\n",i,j,segment3->diagonal,minus_segments_5[j].diagonal));
+	  segment3->pairablep = true;
+	  minus_segments_5[j].pairablep = true;
+	  j++;
+	}
+      }
+    }
+  }
+
+  return;
+}
+#endif
+
+
+#ifdef LARGE_GENOMES
+/* TODO: Change spliceable to be an attribute of the segment.  Then we
+   can loop over anchor_segments only */
+static struct Segment_T *
+identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchors,
+		       Segment_T **spliceable, int *nspliceable,
+#ifdef LARGE_GENOMES
+		       unsigned char **positions_high, UINT4 **positions_low,
+#else
+		       Univcoord_T **positions,
+#endif
+		       int *npositions, bool *omitted, int querylength, int query_lastpos,
+		       Floors_T floors, bool plusp) {
+  struct Segment_T *segments = NULL;
+  Segment_T *all_segments, *ptr_all, *ptr_anchor, *dest, *src;
+  int length_threshold;
+  int n_all_segments, n;
+  int nanchors_bymod[MAX_INDEX1INTERVAL], naccept_bymod[MAX_INDEX1INTERVAL];
+  int mod;
+  int k;
+
+  struct Batch_T *batchpool;
+  struct Batch_T sentinel_struct;
+  Batch_T *heap, sentinel;
+  int smallesti, righti;
+  Batch_T batch;
+  int heapsize = 0;
+  int parenti, i;
+  int querypos, first_querypos, last_querypos;
+  int floor_left, floor_right, floor_incr;
+  int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast;
+  int *floors_from_neg3, *floors_to_pos3;
+  /* int exclude_xfirst, exclude_xlast; */
+  Univcoord_T diagonal, segment_left, last_diagonal, chroffset = 0U, chrhigh = 0U;
+  Chrpos_T chrlength, max_distance;
+  Chrnum_T chrnum = 1;
+#ifdef OLD_FLOOR_ENDS
+  int halfquerylength, halfquery_lastpos;
+#endif
+
+#ifdef DIAGONAL_ADD_QUERYPOS
+  UINT8 diagonal_add_querypos;
+#endif
+  int total_npositions = 0;
+  int joffset = 0, j;
+
+#ifdef DEBUG
+  Segment_T segment, *p;
+#endif
+
+  Segment_T ptr, ptr_chrstart;
+  Segment_T *ptr_spliceable;
+  bool last_spliceable_p = false;
+  /* bool next_spliceable_p; */
+#ifdef DEBUG19
+  Segment_T ptr0;
+#endif
+#ifndef SLOW_CHR_UPDATE
+  Univcoord_T goal;
+  int nchromosomes_local = nchromosomes;
+  Univcoord_T *chrhighs_local = chrhighs;
+#endif
+
+  Univcoord_T *splicesites_local, splicesites_static[1];
+  int nsplicesites_local;
+
+  debug(printf("*** Starting identify_all_segments on %s ***\n",plusp ? "plus" : "minus"));
+
+  if (floors == NULL) {
+    *nsegments = 0;
+    *anchor_segments = (Segment_T *) NULL;
+    *nanchors = 0;
+    *spliceable = (Segment_T *) NULL;
+    *nspliceable = 0;
+    return (struct Segment_T *) NULL;
+  }
+
+  if (splicesites == NULL) {
+    splicesites_local = splicesites_static;
+    splicesites_local[0] = (Univcoord_T) -1;
+    nsplicesites_local = 0;
+  } else {
+    splicesites_local = splicesites;
+    nsplicesites_local = nsplicesites;
+  }
+
+#ifdef OLD_FLOOR_ENDS
+  halfquerylength = querylength / 2;
+  halfquery_lastpos = halfquerylength - index1part;
+#endif
+
+  /* Create sentinel */
+#ifdef DIAGONAL_ADD_QUERYPOS
+  sentinel_struct.diagonal_add_querypos = (UINT8) -1; /* infinity */
+  sentinel_struct.diagonal_add_querypos <<= 32;
+#else
+  sentinel_struct.querypos = querylength; /* essentially infinity */
+  sentinel_struct.diagonal = (Univcoord_T) -1; /* infinity */
+#endif
+  sentinel = &sentinel_struct;
+
+  /* Set up batches */
+  batchpool = (struct Batch_T *) MALLOCA((query_lastpos+1) * sizeof(struct Batch_T));
+  heap = (Batch_T *) MALLOCA((2*(query_lastpos+1)+1+1) * sizeof(Batch_T));
+
+  /* Don't add entries for compoundpos positions (skip querypos -2, -1, lastpos+1, lastpos+2) */
+  if (plusp) {
+    for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
+      if (omitted[querypos] == true) {
+	debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
+		      querypos,npositions[querypos],omitted[querypos]));
+      } else if (npositions[querypos] > 0) {
+	debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n",
+		      querypos,npositions[querypos],omitted[querypos]));
+	batch = &(batchpool[i]);
+#ifdef LARGE_GENOMES
+	Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions_high[querypos],positions_low[querypos],
+		   npositions[querypos],querylength);
+#else
+	Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions[querypos],
+		   npositions[querypos],querylength);
+#endif
+	total_npositions += npositions[querypos];
+	if (batch->npositions > 0) {
+	  min_heap_insert(heap,&heapsize,batch);
+	  i++;
+	}
+      } else {
+	debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
+		      querypos,npositions[querypos],omitted[querypos]));
+      }
+    }
+  } else {
+    for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
+      if (omitted[querypos] == true) {
+	debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
+		      querypos,npositions[querypos],omitted[querypos]));
+      } else if (npositions[querypos] > 0) {
+	debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n",
+		      querypos,npositions[querypos],omitted[querypos]));
+	batch = &(batchpool[i]);
+#ifdef LARGE_GENOMES
+	Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions_high[querypos],positions_low[querypos],
+		   npositions[querypos],querylength);
+#else
+	Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions[querypos],
+		   npositions[querypos],querylength);
+#endif
+	total_npositions += npositions[querypos];
+	if (batch->npositions > 0) {
+	  min_heap_insert(heap,&heapsize,batch);
+	  i++;
+	}
+      } else {
+	debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
+		      querypos,npositions[querypos],omitted[querypos]));
+      }
+    }
+  }
+  debug14(printf("Initial total_npositions = %d\n",total_npositions));
+
+
+  if (i == 0) {
+    FREEA(heap);
+    FREEA(batchpool);
+    *nsegments = 0;
+    return (struct Segment_T *) NULL;
+  }
+
+  /* Set up rest of heap */
+  for (i = heapsize+1; i <= 2*heapsize+1; i++) {
+    heap[i] = sentinel;
+  }
+
+  /* Putting chr marker "segments" after each chromosome */
+  segments = (struct Segment_T *) MALLOC((total_npositions + nchromosomes) * sizeof(struct Segment_T));
+  ptr_chrstart = ptr = &(segments[0]);
+  all_segments = (Segment_T *) MALLOC(total_npositions * sizeof(Segment_T));
+  ptr_all = &(all_segments[0]);
+  *anchor_segments = (Segment_T *) MALLOC(total_npositions * sizeof(Segment_T));
+  ptr_anchor = &((*anchor_segments)[0]);
+  if (overall_max_distance == 0) {
+    ptr_spliceable = *spliceable = (Segment_T *) NULL;
+  } else {
+    ptr_spliceable = *spliceable = (Segment_T *) CALLOC(total_npositions,sizeof(Segment_T));
+  }
+
+  /*
+  if ((exclude_xfirst = firstbound-2-index1part-max_end_insertions) < 3) {
+    exclude_xfirst = 3;
+  }
+  if ((exclude_xlast = lastbound+1+max_end_insertions) > query_lastpos-3) {
+    exclude_xlast = query_lastpos-3;
+  }
+  */
+
+#if 0
+  /* Should account for firstbound and lastbound */
+  floors_from_xfirst = floors->scorefrom[/* xfirst_from = */ firstbound-index1interval+max_end_insertions];
+  floors_to_xlast = floors->scoreto[/* xlast_to = */ lastbound+1+index1interval-index1part-max_end_insertions];
+#else
+  /* This was previously run in identify_all_segments and not in identify_all_segments_for_terminals */
+  if (spansize /* +max_end_insertions */ > query_lastpos + index1interval) {
+    floors_from_xfirst = floors->scorefrom[query_lastpos+index1interval];
+  } else {
+    floors_from_xfirst = floors->scorefrom[spansize /* +max_end_insertions */];
+  }
+  if (query_lastpos-spansize /* -max_end_insertions */ < -index1interval) {
+    floors_to_xlast = floors->scoreto[-index1interval];
+  } else {
+    floors_to_xlast = floors->scoreto[query_lastpos-spansize /* -max_end_insertions */];
+  }
+#endif
+  floors_from_neg3 = floors->scorefrom[-index1interval];
+  floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
+
+
+  /* Initialize loop */
+  batch = heap[1];
+  first_querypos = last_querypos = querypos = batch->querypos;
+  last_diagonal = diagonal = batch->diagonal;
+
+  floor_incr = floors_from_neg3[first_querypos];
+  floor = floor_incr;
+  floor_xlast = floor_incr;
+  floor_xfirst = floors_from_xfirst[first_querypos] /* floors->scorefrom[xfirst_from][first_querypos] */;
+
+#ifdef OLD_FLOOR_ENDS
+  if (querypos < halfquery_lastpos) {
+    floor_left = floor_incr;
+  } else {
+    floor_left = floors->scorefrom[-index1interval][halfquery_lastpos];
+  }
+  if (querypos < halfquerylength) {
+    floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos];
+  } else {
+    floor_right = floors->scorefrom[halfquerylength-index1interval][first_querypos];
+  }
+#else
+  floor_left = floor_incr;
+#ifdef DEBUG1
+  floor_right = -99;
+#endif
+#endif
+
+
+  debug1(printf("multiple_mm_%s, diagonal %llu, querypos %d\n",
+		plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
+  debug1(printf("first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+	        first_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+  if (--batch->npositions <= 0) {
+    /* Use last entry in heap for insertion */
+    batch = heap[heapsize];
+    querypos = batch->querypos;
+    heap[heapsize--] = sentinel;
+
+  } else {
+    /* Use this batch for insertion (same querypos) */
+#ifdef LARGE_GENOMES
+    batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
+#elif defined(WORDS_BIGENDIAN)
+    batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
+#else
+    batch->diagonal = *(++batch->positions) + batch->diagterm;
+#endif
+#ifdef DIAGONAL_ADD_QUERYPOS
+    batch->diagonal_add_querypos = (UINT8) batch->diagonal;
+    batch->diagonal_add_querypos <<= 32;
+    batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/;
+#endif
+  }
+
+  /* heapify */
+  parenti = 1;
+#ifdef DIAGONAL_ADD_QUERYPOS
+  diagonal_add_querypos = batch->diagonal_add_querypos;
+  smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2;
+  while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) {
+    heap[parenti] = heap[smallesti];
+    parenti = smallesti;
+    smallesti = LEFT(parenti);
+    righti = smallesti+1;
+    if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) {
+      smallesti = righti;
+    }
+  }
+#else
+  diagonal = batch->diagonal;
+  smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
+	       ((heap[3]->diagonal == heap[2]->diagonal) &&
+		(heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
+  /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
+  while (diagonal > heap[smallesti]->diagonal ||
+	 (diagonal == heap[smallesti]->diagonal &&
+	  querypos > heap[smallesti]->querypos)) {
+    heap[parenti] = heap[smallesti];
+    parenti = smallesti;
+    smallesti = LEFT(parenti);
+    righti = smallesti+1;
+    if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
+		  ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
+		   (heap[righti]->querypos < heap[smallesti]->querypos))) {
+      smallesti = righti;
+    }
+  }
+#endif
+  heap[parenti] = batch;
+
+
+  /* Continue after initialization */
+  while (heapsize > 0) {
+    batch = heap[1];
+    querypos = batch->querypos;
+    diagonal = batch->diagonal;
+    debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos));
+
+    if (diagonal == last_diagonal) {
+      /* Continuing exact match or substitution */
+      floor_incr = floors->scorefrom[last_querypos][querypos];
+      floor += floor_incr;
+      floor_xfirst += floor_incr;
+      floor_xlast += floor_incr;
+
+#ifdef OLD_FLOOR_ENDS
+      /* Why is this here?  Just set floor_left at start and floor_right at end. */
+      if (querypos < halfquery_lastpos) {
+	floor_left += floor_incr;
+      } else if (last_querypos < halfquery_lastpos) {
+	/* Finish floor_left */
+	floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
+      }
+      if (querypos >= halfquerylength) {
+	if (last_querypos < halfquerylength) {
+	  /* Start floor_right */
+	  floor_right = floors->scorefrom[halfquerylength-index1interval][querypos];
+	} else {
+	  floor_right += floor_incr;
+	}
+      }
+#endif
+
+      debug1(printf("diagonal %llu unchanged: last_querypos = %d, querypos = %d => floor increments by %d\n",
+		    (unsigned long long) diagonal,last_querypos,querypos,floor_incr));
+      debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+		    plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos,
+		    floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+    } else {
+      /* End of diagonal */
+      floor_incr = floors_to_pos3[last_querypos]  /* floors->score[last_querypos][query_lastpos+index1interval] */;
+      floor += floor_incr;
+      floor_xfirst += floor_incr;
+      floor_xlast += floors_to_xlast[last_querypos];  /* floors->score[last_querypos][xlast_to]; */
+
+#ifdef OLD_FLOOR_ENDS
+      if (last_querypos < halfquery_lastpos) {
+	floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
+	floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos+index1interval];
+      }
+      if (last_querypos >= halfquerylength) {
+	floor_right += floor_incr;
+      }
+#else
+      floor_right = floor_incr;
+#endif
+
+      debug1(printf("new diagonal %llu > last diagonal %llu: last_querypos = %d => final values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+		    (unsigned long long) diagonal,(unsigned long long) last_diagonal,last_querypos,
+		    floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+      if (last_diagonal > chrhigh) {
+	if (ptr > ptr_chrstart) {
+	  /* Add chr marker segment */
+	  debug14(printf("=== ptr %p > ptr_chrstart %p, so adding chr marker segment\n",ptr,ptr_chrstart));
+	  ptr->diagonal = (Univcoord_T) -1;
+	  ptr_chrstart = ++ptr;
+	}
+
+	/* update chromosome bounds, based on low end */
+#ifdef SLOW_CHR_UPDATE
+	chrnum = Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength);
+	Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+	/* chrhigh += 1; */
+#else
+	j = 1;
+#ifdef NO_EXTENSIONS_BEFORE_ZERO
+	goal = last_diagonal - querylength + 1;
+#else
+	goal = last_diagonal + 1;
+#endif
+	while (j < nchromosomes_local && chrhighs_local[j] < goal) {
+	  j <<= 1;			/* gallop by 2 */
+	}
+	if (j >= nchromosomes_local) {
+	  j = binary_search(j >> 1,nchromosomes_local,chrhighs_local,goal);
+	} else {
+	  j = binary_search(j >> 1,j,chrhighs_local,goal);
+	}
+	chrnum += j;
+#ifdef DEBUG15
+	if (chrnum != Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)) {
+	  fprintf(stderr,"Got chrnum %d, but wanted %d\n",
+		  chrnum,Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength));
+	  abort();
+	}
+#endif
+	chroffset = chroffsets[chrnum-1];
+	chrhigh = chrhighs[chrnum-1];
+	chrlength = chrlengths[chrnum-1];
+	chrhighs_local += j;
+	nchromosomes_local -= j;
+#endif
+      }
+      if (last_diagonal <= chrhigh) { /* FORMULA for high position */
+	/* position of high end is within current chromosome */
+	debug1(printf("  => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+		      (unsigned long long) last_diagonal,first_querypos,last_querypos,
+		      (unsigned long long) chroffset,(unsigned long long) chrhigh,
+		      floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+	/* Save segment, but first advance splicesites past segment_left */
+	segment_left = last_diagonal - querylength;
+	max_distance = overall_max_distance;
+	if (splicesites_local[0] >= last_diagonal) {
+	  ptr->splicesites_i = -1;
+	} else if (Splicetrie_splicesite_p(segment_left,/*pos5*/1,/*pos3*/querylength) == false) {
+	  ptr->splicesites_i = -1;
+	} else {
+	  if (splicesites_local[0] < segment_left) {
+	    j = 1;
+	    while (j < nsplicesites_local && splicesites_local[j] < segment_left) {
+	      j <<= 1;		/* gallop by 2 */
+	    }
+	    if (j >= nsplicesites_local) {
+	      j = binary_search(j >> 1,nsplicesites_local,splicesites_local,segment_left);
+	    } else {
+	      j = binary_search(j >> 1,j,splicesites_local,segment_left);
+	    }
+	    joffset += j;
+	    splicesites_local += j;
+	    nsplicesites_local -= j;
+	  }
+	    
+	  if (splicesites_local[0] >= last_diagonal) {
+	    ptr->splicesites_i = -1;
+	  } else {
+	    ptr->splicesites_i = joffset;
+	    j = joffset;
+	    while (j < nsplicesites && splicesites[j] < last_diagonal) {
+	      if (splicedists[j] > max_distance) {
+		max_distance = splicedists[j];
+	      }
+	      j++;
+	    }
+	  }
+	}
+
+	/* Save segment */
+	ptr->diagonal = last_diagonal;
+	ptr->chrnum = chrnum;
+	ptr->chroffset = chroffset;
+	ptr->chrhigh = chrhigh;
+	ptr->chrlength = chrlength;
+	ptr->querypos5 = first_querypos;
+	ptr->querypos3 = last_querypos;
+
+	/* FORMULA */
+	if (plusp) {
+	  ptr->lowpos = ptr->diagonal - querylength + ptr->querypos5;
+	  ptr->highpos = ptr->diagonal - querylength + ptr->querypos3 + index1part;
+	} else {
+	  ptr->lowpos = ptr->diagonal - ptr->querypos3 - index1part - index1part;
+	  ptr->highpos = ptr->diagonal - ptr->querypos5 - index1part;
+	}
+
+	ptr->floor = floor;
+	ptr->floor_xfirst = floor_xfirst;
+	ptr->floor_xlast = floor_xlast;
+	ptr->floor_left = floor_left;
+	ptr->floor_right = floor_right;
+	ptr->leftmost = ptr->rightmost = -1;
+	ptr->left_splice_p = ptr->right_splice_p = false;
+	ptr->spliceable_low_p = last_spliceable_p;
+	/* ptr->spliceable_high_p = false; */
+#if 0
+	ptr->leftspan = ptr->rightspan = -1;
+#endif
+	ptr->usedp = false;
+	ptr->pairablep = false;
+
+#if 0
+	/* Not doing this, because the max_distance test is already good enough */
+	if (plusp) {
+	  /* For plus-strand splicing, require segmenti->querypos3 < segmentj->querypos5,
+	     so if segmenti->querypos3 is too high, then it is not spliceable */
+	  if (last_querypos > query_lastpos) {
+	    /* Not spliceable */
+	    last_spliceable_p = false;
+	  } else if (diagonal <= last_diagonal + max_distance) {
+	    *ptr_spliceable++ = ptr;
+	    ptr->spliceable_high_p = last_spliceable_p = true;
+	  }
+	} else {
+	  /* For minus-strand splicing, require segmenti->querypos5 > segmentj->querypos3,
+	     so if segmenti->querypos5 is too low, then it is not spliceable */
+	  if (first_querypos < index1part) {
+	    /* Not spliceable */
+	    last_spliceable_p = false;
+	  } else if (diagonal <= last_diagonal + max_distance) {
+	    *ptr_spliceable++ = ptr;
+	    ptr->spliceable_high_p = last_spliceable_p = true;
+	  }
+	}
+#endif
+	if (diagonal <= last_diagonal + max_distance) {
+	  *ptr_spliceable++ = ptr;
+	  ptr->spliceable_high_p = last_spliceable_p = true;
+	  debug4s(printf("%s diagonal %u is spliceable because next one is at %u\n",
+			 plusp ? "plus" : "minus",last_diagonal,diagonal));
+	} else {
+	  ptr->spliceable_high_p = last_spliceable_p = false;
+	  debug4s(printf("%s diagonal %u is not spliceable because next one is at %u\n",
+			 plusp ? "plus" : "minus",last_diagonal,diagonal));
+	}
+	debug14(printf("Saving segment at %u (%u), query %d..%d",last_diagonal,last_diagonal-chroffset,ptr->querypos5,ptr->querypos3));
+	*ptr_all++ = ptr;
+	if (last_querypos >= first_querypos + /*min_segment_length*/1) {
+	  debug14(printf(" ANCHOR"));
+	  *ptr_anchor++ = ptr;
+	}
+	debug14(printf("\n"));
+	ptr++;
+      }
+
+      /* Prepare next diagonal */
+      first_querypos = querypos;
+      last_diagonal = diagonal;
+      floor_incr = floors_from_neg3[first_querypos] /* floors->score[-index1interval][first_querypos] */;
+      floor = floor_incr;
+      floor_xlast = floor_incr;
+      floor_xfirst = floors_from_xfirst[first_querypos];  /* floors->score[xfirst_from][first_querypos]; */
+
+#ifdef OLD_FLOOR_ENDS
+      if (querypos < halfquery_lastpos) {
+	floor_left = floor_incr;
+      } else {
+	floor_left = floors->scorefrom[-index1interval][halfquery_lastpos];
+      }
+      if (querypos < halfquerylength) {
+	floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos];
+      } else {
+	floor_right = floors->scorefrom[halfquerylength-index1interval][first_querypos];
+      }
+#else
+      floor_left = floor_incr;
+#ifdef DEBUG1
+      floor_right = -99;	/* For debugging output */
+#endif
+#endif
+
+      debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d\n",
+		    plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos));
+      debug1(printf("start of diagonal %llu, first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+		    (unsigned long long) diagonal,first_querypos,
+		    floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+    }
+    last_querypos = querypos;
+
+
+    if (--batch->npositions <= 0) {
+      /* Use last entry in heap for insertion */
+      batch = heap[heapsize];
+      querypos = batch->querypos;
+      heap[heapsize--] = sentinel;
+
+    } else {
+      /* Use this batch for insertion (same querypos) */
+#ifdef LARGE_GENOMES
+    batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
+#elif defined(WORDS_BIGENDIAN)
+      batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
+#else
+      batch->diagonal = *(++batch->positions) + batch->diagterm;
+#endif
+#ifdef DIAGONAL_ADD_QUERYPOS
+      batch->diagonal_add_querypos = (UINT8) batch->diagonal;
+      batch->diagonal_add_querypos <<= 32;
+      batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/;
+#endif
+    }
+
+    /* heapify */
+    parenti = 1;
+#ifdef DIAGONAL_ADD_QUERYPOS
+    diagonal_add_querypos = batch->diagonal_add_querypos;
+    smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2;
+    while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) {
+      heap[parenti] = heap[smallesti];
+      parenti = smallesti;
+      smallesti = LEFT(parenti);
+      righti = smallesti+1;
+      if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) {
+	smallesti = righti;
+      }
+    }
+#else
+    diagonal = batch->diagonal;
+    smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
+		 ((heap[3]->diagonal == heap[2]->diagonal) &&
+		  (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
+    /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
+    while (diagonal > heap[smallesti]->diagonal ||
+	   (diagonal == heap[smallesti]->diagonal &&
+	    querypos > heap[smallesti]->querypos)) {
+      heap[parenti] = heap[smallesti];
+      parenti = smallesti;
+      smallesti = LEFT(parenti);
+      righti = smallesti+1;
+      if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
+	  ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
+	   (heap[righti]->querypos < heap[smallesti]->querypos))) {
+	smallesti = righti;
+      }
+    }
+#endif
+    heap[parenti] = batch;
+  }
+  debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos));
+  debug14(printf("\n"));
+
+  /* Terminate loop. */
+  floor_incr = floors_to_pos3[last_querypos];   /* floors->score[last_querypos][query_lastpos+index1interval]; */
+  floor += floor_incr;
+  floor_xfirst += floor_incr;
+  floor_xlast += floors_to_xlast[last_querypos];  /* floors->score[last_querypos][xlast_to]; */
+
+#ifdef OLD_FLOOR_ENDS
+  if (last_querypos < halfquery_lastpos) {
+    floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval];
+    floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos+index1interval];
+  }
+  if (last_querypos >= halfquerylength) {
+    floor_right += floor_incr;
+  }
+#else
+  floor_right = floor_incr;
+#endif
+  
+  debug1(printf("no more diagonals: last_querypos = %d => terminal values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+		last_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+  debug1(printf("last_diagonal %u vs chrhigh %u (looking for >)\n",last_diagonal,chrhigh));
+  if (last_diagonal > chrhigh) {
+    if (ptr > ptr_chrstart) {
+      /* Add chr marker segment */
+      debug14(printf("=== ptr %p > ptr_chrstart %p, so adding chr marker segment\n",ptr,ptr_chrstart));
+      ptr->diagonal = (Univcoord_T) -1;
+      ptr_chrstart = ++ptr;
+    }
+
+    /* update chromosome bounds, based on low end */
+#ifdef SLOW_CHR_UPDATE
+    chrnum = Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength);
+    Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint);
+    /* chrhigh += 1; */
+#else
+    j = 1;
+#ifdef NO_EXTENSIONS_BEFORE_ZERO
+    goal = last_diagonal - querylength + 1;
+#else
+    goal = last_diagonal + 1;
+#endif
+    while (j < nchromosomes_local && chrhighs_local[j] < goal) {
+      j <<= 1;			/* gallop by 2 */
+    }
+    if (j >= nchromosomes_local) {
+      j = binary_search(j >> 1,nchromosomes_local,chrhighs_local,goal);
+    } else {
+      j = binary_search(j >> 1,j,chrhighs_local,goal);
+    }
+    chrnum += j;
+#ifdef DEBUG15
+    if (chrnum != Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)) {
+      fprintf(stderr,"Got chrnum %d, but wanted %d\n",
+	      chrnum,Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength));
+      abort();
+    }
+#endif
+    chroffset = chroffsets[chrnum-1];
+    chrhigh = chrhighs[chrnum-1];
+    chrlength = chrlengths[chrnum-1];
+    chrhighs_local += j;
+    nchromosomes_local -= j;
+#endif
+  }
+
+  debug1(printf("last_diagonal %u vs chrhigh %u (looking for <=)\n",last_diagonal,chrhigh));
+  if (last_diagonal <= chrhigh) { /* FORMULA for high position */
+    /* position of high end is within current chromosome */
+    debug1(printf("  => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
+		  (unsigned long long) last_diagonal,first_querypos,last_querypos,
+		  (unsigned long long) chroffset,(unsigned long long) chrhigh,
+		  floor,floor_xfirst,floor_xlast,floor_left,floor_right));
+
+    /* Save segment, but first advance splicesites past segment_left */
+    segment_left = last_diagonal - querylength;
+#if 0
+    /* Last segment is not spliceable */
+    max_distance = overall_max_distance;
+#endif
+    if (splicesites_local[0] >= last_diagonal) {
+      ptr->splicesites_i = -1;
+    } else if (Splicetrie_splicesite_p(segment_left,/*pos5*/1,/*pos3*/querylength) == false) {
+      ptr->splicesites_i = -1;
+    } else {
+      if (splicesites_local[0] < segment_left) {
+	j = 1;
+	while (j < nsplicesites_local && splicesites_local[j] < segment_left) {
+	  j <<= 1;		/* gallop by 2 */
+	}
+	if (j >= nsplicesites_local) {
+	  j = binary_search(j >> 1,nsplicesites_local,splicesites_local,segment_left);
+	} else {
+	  j = binary_search(j >> 1,j,splicesites_local,segment_left);
+	}
+	joffset += j;
+	splicesites_local += j;
+	nsplicesites_local -= j;
+      }
+
+      if (splicesites_local[0] >= last_diagonal) {
+	ptr->splicesites_i = -1;
+      } else {
+	ptr->splicesites_i = joffset;
+#if 0
+	/* Last segment is not spliceable */
+	if (splicedists[joffset] > overall_max_distance) {
+	  max_distance = splicedists[joffset];
+	}
+#endif
+      }
+    }
+
+    /* Save segment */
+    ptr->diagonal = last_diagonal;
+    ptr->chrnum = chrnum;
+    ptr->chroffset = chroffset;
+    ptr->chrhigh = chrhigh;
+    ptr->chrlength = chrlength;
+    ptr->querypos5 = first_querypos;
+    ptr->querypos3 = last_querypos;
+
+    /* FORMULA */
+    if (plusp) {
+      ptr->lowpos = ptr->diagonal - querylength + ptr->querypos5;
+      ptr->highpos = ptr->diagonal - querylength + ptr->querypos3 + index1part;
+    } else {
+      ptr->lowpos = ptr->diagonal - ptr->querypos3 - index1part - index1part;
+      ptr->highpos = ptr->diagonal - ptr->querypos5 - index1part;
+    }
+
+    ptr->floor = floor;
+    ptr->floor_xfirst = floor_xfirst;
+    ptr->floor_xlast = floor_xlast;
+    ptr->floor_left = floor_left;
+    ptr->floor_right = floor_right;
+    ptr->leftmost = ptr->rightmost = -1;
+    ptr->left_splice_p = ptr->right_splice_p = false;
+    ptr->spliceable_low_p = last_spliceable_p;
+    ptr->spliceable_high_p = false;
+#if 0
+    ptr->leftspan = ptr->rightspan = -1;
+#endif
+    ptr->usedp = false;
+    ptr->pairablep = false;
+
+    /* Last segment is not spliceable */
+    debug14(printf("Saving segment at %u (%u), query %d..%d",last_diagonal,last_diagonal - chroffset,ptr->querypos5,ptr->querypos3));
+    *ptr_all++ = ptr;
+    if (last_querypos >= first_querypos + /*min_segment_length*/1) {
+      debug14(printf(" ANCHOR"));
+      *ptr_anchor++ = ptr;
+    }
+    debug14(printf("\n"));
+    ptr++;
+  }
+
+
+  if (ptr > ptr_chrstart) {
+    /* Final chr marker segment */
+    debug14(printf("=== ptr %p > ptr_chrstart %p, so adding final chr marker segment\n",ptr,ptr_chrstart));
+    ptr->diagonal = (Univcoord_T) -1;
+    /* ptr_chrstart = */ ++ptr;
+  }
+
+#ifdef DEBUG19
+  for (k = 0, ptr0 = segments; ptr0 < ptr; k++, ptr0++) {
+    printf("%d %llu\n",k,(unsigned long long) ptr0->diagonal);
+  }
+  printf("total_npositions = %d, nchromosomes = %d\n",total_npositions,nchromosomes);
+#endif
+
+  FREEA(heap);
+  FREEA(batchpool);
+
+  /* Note: segments is in descending diagonal order.  Will need to
+     reverse before solving middle deletions */
+
+  *nsegments = ptr - segments;
+  *nanchors = ptr_anchor - *anchor_segments;
+  *nspliceable = ptr_spliceable - *spliceable;
+  debug(printf("nsegments = %d, of which %d are spliceable (total_npositions = %d, nchromosomes = %d)\n",
+	       *nsegments,*nspliceable,total_npositions,nchromosomes));
+  debug1(printf("nsegments = %d, of which %d are spliceable (total_npositions = %d, nchromosomes = %d)\n",
+		*nsegments,*nspliceable,total_npositions,nchromosomes));
+
+  assert(*nsegments <= total_npositions + nchromosomes);
+  assert(*nanchors <= total_npositions);
+  assert(*nspliceable <= total_npositions);
+
+  n_all_segments = ptr_all - all_segments;
+  debug(printf("%d all segments\n",n_all_segments));
+  debug(printf("%d anchor segments\n",*nanchors));
+
+  if (n_all_segments <= max_anchors) {
+    /* Might as well use all segments */
+    FREE(*anchor_segments);
+    *anchor_segments = all_segments;
+    *nanchors = n_all_segments;
+
+  } else if (*nanchors <= max_anchors) {
+    /* Use only the good anchor segments */
+    FREE(all_segments);
+
+  } else {
+    /* Need to limit anchor segments */
+    FREE(all_segments);
+
+    /* Treat each mod separately */
+    qsort(*anchor_segments,*nanchors,sizeof(Segment_T),Segment_mod_length_cmp);
+
+    mod = 0;
+    i = 0;
+    while (mod < index1interval) {
+      j = i;
+      while (j < *nanchors && (*anchor_segments)[j]->querypos5 % index1interval == mod) {
+	j++;
+      }
+      nanchors_bymod[mod] = j - i;
+
+      if (j - i <= max_anchors) {
+	naccept_bymod[mod] = j - i;
+      } else {
+	k = i + max_anchors;
+	length_threshold = (*anchor_segments)[k]->querypos3 - (*anchor_segments)[k]->querypos5;
+	while (k < j && k < i + max_anchors + /*ties*/100 &&
+	       (*anchor_segments)[k]->querypos3 - (*anchor_segments)[k]->querypos5 == length_threshold) {
+	  k++;
+	}
+	naccept_bymod[mod] = k - i;
+      }
+
+      debug(printf("For mod %d, accepting %d out of %d anchor segments with length threshold %d\n",
+		   mod,naccept_bymod[mod],nanchors_bymod[mod],length_threshold));
+      i = j;
+      mod++;
+    }
+
+    /* Move good anchors to start of array */
+    dest = src = &((*anchor_segments)[0]);
+    *nanchors = 0;
+    for (mod = 0; mod < index1interval; mod++) {
+      memmove((void *) dest,(void *) src,naccept_bymod[mod] * sizeof(Segment_T));
+      dest += naccept_bymod[mod];
+      src += nanchors_bymod[mod];
+      *nanchors += naccept_bymod[mod];
+    }
+
+    /* Re-sort in diagonal order */
+    qsort(*anchor_segments,*nanchors,sizeof(Segment_T),Segment_diagonal_cmp);
+  }
+
+
+#ifdef DEBUG19
+  printf("%d total segments\n",*nsegments);
+  for (ptr0 = segments; ptr0 < ptr; ptr0++) {
+    printf("%u %d..%d\n",ptr0->diagonal,ptr0->querypos5,ptr0->querypos3);
+  }
+#endif
+
+#ifdef DEBUG
+  printf("%d selected anchor segments\n",*nanchors);
+  for (p = &(*anchor_segments)[0]; p< &((*anchor_segments)[*nanchors]); p++) {
+    segment = (Segment_T) *p;
+    printf("%u %d..%d spliceable_low:%d spliceable_high:%d\n",
+	   segment->diagonal,segment->querypos5,segment->querypos3,segment->spliceable_low_p,segment->spliceable_high_p);
+  }
+#endif
+
+  return segments;
+}
+
+
+#elif defined(USE_MERGE) || defined(USE_HEAP)
+
+/* TODO: Change spliceable to be an attribute of the segment.  Then we
+   can loop over anchor_segments only */
+static struct Segment_T *
+identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchors,
+		       Segment_T **spliceable, int *nspliceable,
+#ifdef LARGE_GENOMES
+		       unsigned char **positions_high, UINT4 **positions_low,
+#else
+		       Univcoord_T **positions,
+#endif
+		       int *npositions, bool *omitted, int querylength, int query_lastpos,
+		       Floors_T floors, bool plusp) {
+  struct Segment_T *segments = NULL;
+  Segment_T *all_segments, *ptr_all, *ptr_anchor, *dest, *src;
+  int length_threshold;
+  int n_all_segments, n;
+  int nanchors_bymod[MAX_INDEX1INTERVAL], naccept_bymod[MAX_INDEX1INTERVAL];
+  int mod;
+  int k;
+
+  struct Record_T *all_records;
+  Record_T *all_records_merged, *record_ptr, record;
+  int n_all_records, i;
+
+  List_T stream_list = NULL;
+  Intlist_T streamsize_list = NULL, querypos_list = NULL, diagterm_list = NULL;
+  int nstreams = 0;
+
+  int querypos, first_querypos, last_querypos;
+  int floor_left, floor_right, floor_incr;
   int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast;
   int *floors_from_neg3, *floors_to_pos3;
   /* int exclude_xfirst, exclude_xlast; */
@@ -4165,68 +5492,54 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
   halfquery_lastpos = halfquerylength - index1part;
 #endif
 
-  /* Create sentinel */
-#ifdef DIAGONAL_ADD_QUERYPOS
-  sentinel_struct.diagonal_add_querypos = (UINT8) -1; /* infinity */
-  sentinel_struct.diagonal_add_querypos <<= 32;
-#else
-  sentinel_struct.querypos = querylength; /* essentially infinity */
-  sentinel_struct.diagonal = (Univcoord_T) -1; /* infinity */
-#endif
-  sentinel = &sentinel_struct;
-
-  /* Set up batches */
-  batchpool = (struct Batch_T *) MALLOCA((query_lastpos+1) * sizeof(struct Batch_T));
-  heap = (Batch_T *) MALLOCA((2*(query_lastpos+1)+1+1) * sizeof(Batch_T));
-
   /* Don't add entries for compoundpos positions (skip querypos -2, -1, lastpos+1, lastpos+2) */
   if (plusp) {
-    for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
+    for (querypos = 0; querypos <= query_lastpos; querypos++) {
       if (omitted[querypos] == true) {
 	debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
 		      querypos,npositions[querypos],omitted[querypos]));
       } else if (npositions[querypos] > 0) {
 	debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n",
 		      querypos,npositions[querypos],omitted[querypos]));
-	batch = &(batchpool[i]);
+
 #ifdef LARGE_GENOMES
+	batch = &(batchpool[i]);
 	Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions_high[querypos],positions_low[querypos],
 		   npositions[querypos],querylength);
 #else
-	Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions[querypos],
-		   npositions[querypos],querylength);
+	stream_list = List_push(stream_list,(void *) positions[querypos]);
+	streamsize_list = Intlist_push(streamsize_list,npositions[querypos]);
+	querypos_list = Intlist_push(querypos_list,querypos);
+	diagterm_list = Intlist_push(diagterm_list,querylength - querypos);
 #endif
 	total_npositions += npositions[querypos];
-	if (batch->npositions > 0) {
-	  min_heap_insert(heap,&heapsize,batch);
-	  i++;
-	}
+	nstreams++;
       } else {
 	debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
 		      querypos,npositions[querypos],omitted[querypos]));
       }
     }
+
   } else {
-    for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) {
+    for (querypos = 0; querypos <= query_lastpos; querypos++) {
       if (omitted[querypos] == true) {
 	debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
 		      querypos,npositions[querypos],omitted[querypos]));
       } else if (npositions[querypos] > 0) {
 	debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n",
 		      querypos,npositions[querypos],omitted[querypos]));
-	batch = &(batchpool[i]);
 #ifdef LARGE_GENOMES
+	batch = &(batchpool[i]);
 	Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions_high[querypos],positions_low[querypos],
 		   npositions[querypos],querylength);
 #else
-	Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions[querypos],
-		   npositions[querypos],querylength);
+	stream_list = List_push(stream_list,(void *) positions[querypos]);
+	streamsize_list = Intlist_push(streamsize_list,npositions[querypos]);
+	querypos_list = Intlist_push(querypos_list,querypos);
+	diagterm_list = Intlist_push(diagterm_list,querypos + index1part);
 #endif
 	total_npositions += npositions[querypos];
-	if (batch->npositions > 0) {
-	  min_heap_insert(heap,&heapsize,batch);
-	  i++;
-	}
+	nstreams++;
       } else {
 	debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n",
 		      querypos,npositions[querypos],omitted[querypos]));
@@ -4236,17 +5549,20 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
   debug14(printf("Initial total_npositions = %d\n",total_npositions));
 
 
-  if (i == 0) {
-    FREEA(heap);
-    FREEA(batchpool);
+  if (nstreams == 0) {
     *nsegments = 0;
     return (struct Segment_T *) NULL;
+  } else {
+    all_records = (struct Record_T *) MALLOC(total_npositions * sizeof(struct Record_T));
+#ifdef USE_HEAP
+    record_ptr = all_records_merged = Merge_records_heap(&n_all_records,stream_list,streamsize_list,querypos_list,diagterm_list,
+                                                         all_records);
+#else
+    record_ptr = all_records_merged = Merge_records(&n_all_records,stream_list,streamsize_list,querypos_list,diagterm_list,
+						    all_records);
+#endif
   }
 
-  /* Set up rest of heap */
-  for (i = heapsize+1; i <= 2*heapsize+1; i++) {
-    heap[i] = sentinel;
-  }
 
   /* Putting chr marker "segments" after each chromosome */
   segments = (struct Segment_T *) MALLOC((total_npositions + nchromosomes) * sizeof(struct Segment_T));
@@ -4291,10 +5607,26 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
   floors_to_pos3 = floors->scoreto[query_lastpos+index1interval];
 
 
-  /* Initialize loop */
-  batch = heap[1];
-  first_querypos = last_querypos = querypos = batch->querypos;
-  last_diagonal = diagonal = batch->diagonal;
+  /* Skip records where diagonals < querylength, which leads to left < 0U */
+  debug1(printf("n_all_records at start: %d\n",n_all_records));
+  while (n_all_records > 0 && (*record_ptr)->diagonal < (Univcoord_T) querylength) {
+    debug1(printf("Skipping record with diagonal %d\n",(*record_ptr)->diagonal));
+    record_ptr++;
+    n_all_records -= 1;
+  }
+  debug1(printf("n_all_records at end: %d\n",n_all_records));
+  if (n_all_records == 0) {
+    FREE(all_segments);
+    FREE(segments);
+    FREE(all_records_merged);
+    FREE(all_records);
+    return (struct Segment_T *) NULL;
+  }
+
+
+  record = *record_ptr;
+  first_querypos = last_querypos = querypos = record->querypos;
+  last_diagonal = diagonal = record->diagonal;
 
   floor_incr = floors_from_neg3[first_querypos];
   floor = floor_incr;
@@ -4325,70 +5657,12 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
   debug1(printf("first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n",
 	        first_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right));
 
-  if (--batch->npositions <= 0) {
-    /* Use last entry in heap for insertion */
-    batch = heap[heapsize];
-    querypos = batch->querypos;
-    heap[heapsize--] = sentinel;
-
-  } else {
-    /* Use this batch for insertion (same querypos) */
-#ifdef LARGE_GENOMES
-    batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
-#elif defined(WORDS_BIGENDIAN)
-    batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
-#else
-    batch->diagonal = *(++batch->positions) + batch->diagterm;
-#endif
-#ifdef DIAGONAL_ADD_QUERYPOS
-    batch->diagonal_add_querypos = (UINT8) batch->diagonal;
-    batch->diagonal_add_querypos <<= 32;
-    batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/;
-#endif
-  }
-
-  /* heapify */
-  parenti = 1;
-#ifdef DIAGONAL_ADD_QUERYPOS
-  diagonal_add_querypos = batch->diagonal_add_querypos;
-  smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2;
-  while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) {
-    heap[parenti] = heap[smallesti];
-    parenti = smallesti;
-    smallesti = LEFT(parenti);
-    righti = smallesti+1;
-    if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) {
-      smallesti = righti;
-    }
-  }
-#else
-  diagonal = batch->diagonal;
-  smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
-	       ((heap[3]->diagonal == heap[2]->diagonal) &&
-		(heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
-  /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
-  while (diagonal > heap[smallesti]->diagonal ||
-	 (diagonal == heap[smallesti]->diagonal &&
-	  querypos > heap[smallesti]->querypos)) {
-    heap[parenti] = heap[smallesti];
-    parenti = smallesti;
-    smallesti = LEFT(parenti);
-    righti = smallesti+1;
-    if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
-		  ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
-		   (heap[righti]->querypos < heap[smallesti]->querypos))) {
-      smallesti = righti;
-    }
-  }
-#endif
-  heap[parenti] = batch;
-
 
   /* Continue after initialization */
-  while (heapsize > 0) {
-    batch = heap[1];
-    querypos = batch->querypos;
-    diagonal = batch->diagonal;
+  while (--n_all_records > 0) {
+    record = *++record_ptr;
+    querypos = record->querypos;
+    diagonal = record->diagonal;
     debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos));
 
     if (diagonal == last_diagonal) {
@@ -4643,69 +5917,14 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
 
     }
     last_querypos = querypos;
-
-
-    if (--batch->npositions <= 0) {
-      /* Use last entry in heap for insertion */
-      batch = heap[heapsize];
-      querypos = batch->querypos;
-      heap[heapsize--] = sentinel;
-
-    } else {
-      /* Use this batch for insertion (same querypos) */
-#ifdef LARGE_GENOMES
-    batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm;
-#elif defined(WORDS_BIGENDIAN)
-      batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm;
-#else
-      batch->diagonal = *(++batch->positions) + batch->diagterm;
-#endif
-#ifdef DIAGONAL_ADD_QUERYPOS
-      batch->diagonal_add_querypos = (UINT8) batch->diagonal;
-      batch->diagonal_add_querypos <<= 32;
-      batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/;
-#endif
-    }
-
-    /* heapify */
-    parenti = 1;
-#ifdef DIAGONAL_ADD_QUERYPOS
-    diagonal_add_querypos = batch->diagonal_add_querypos;
-    smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2;
-    while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) {
-      heap[parenti] = heap[smallesti];
-      parenti = smallesti;
-      smallesti = LEFT(parenti);
-      righti = smallesti+1;
-      if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) {
-	smallesti = righti;
-      }
-    }
-#else
-    diagonal = batch->diagonal;
-    smallesti = ((heap[3]->diagonal < heap[2]->diagonal) ||
-		 ((heap[3]->diagonal == heap[2]->diagonal) &&
-		  (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2;
-    /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */
-    while (diagonal > heap[smallesti]->diagonal ||
-	   (diagonal == heap[smallesti]->diagonal &&
-	    querypos > heap[smallesti]->querypos)) {
-      heap[parenti] = heap[smallesti];
-      parenti = smallesti;
-      smallesti = LEFT(parenti);
-      righti = smallesti+1;
-      if ((heap[righti]->diagonal < heap[smallesti]->diagonal) ||
-	  ((heap[righti]->diagonal == heap[smallesti]->diagonal) &&
-	   (heap[righti]->querypos < heap[smallesti]->querypos))) {
-	smallesti = righti;
-      }
-    }
-#endif
-    heap[parenti] = batch;
   }
   debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos));
   debug14(printf("\n"));
 
+  FREE(all_records_merged);
+  FREE(all_records);
+
+
   /* Terminate loop. */
   floor_incr = floors_to_pos3[last_querypos];   /* floors->score[last_querypos][query_lastpos+index1interval]; */
   floor += floor_incr;
@@ -4878,8 +6097,6 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
   printf("total_npositions = %d, nchromosomes = %d\n",total_npositions,nchromosomes);
 #endif
 
-  FREEA(heap);
-  FREEA(batchpool);
 
   /* Note: segments is in descending diagonal order.  Will need to
      reverse before solving middle deletions */
@@ -4977,12 +6194,9 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
 
   return segments;
 }
-#endif
-
 
+#elif defined(USE_LOSER_TREES)
 
-#ifndef USE_HEAP
-/* Uses a loser tree */
 /* TODO: Change spliceable to be an attribute of the segment.  Then we
    can loop over anchor_segments only */
 static struct Segment_T *
@@ -5083,6 +6297,7 @@ identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchor
       /* Skip */
     }
   }
+
   if (heapsize == 0) {
     *nsegments = 0;
     return (struct Segment_T *) NULL;
@@ -7862,7 +9077,7 @@ sufficient_splice_prob_halfintron (int support, int nmismatches, double splicepr
 
 
 
-/* Copied from sarray-read.c */
+/* Copied from sarray-search.c */
 static int
 donor_match_length_cmp (const void *a, const void *b) {
   Stage3end_T x = * (Stage3end_T *) a;
@@ -7880,7 +9095,7 @@ donor_match_length_cmp (const void *a, const void *b) {
   }
 }
 
-/* Copied from sarray-read.c */
+/* Copied from sarray-search.c */
 static int
 acceptor_match_length_cmp (const void *a, const void *b) {
   Stage3end_T x = * (Stage3end_T *) a;
@@ -8165,7 +9380,7 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
 	  }
 	}
 
-	/* Process results for segmenti, sense.  Modified from collect_elt_matches in sarray-read.c. */
+	/* Process results for segmenti, sense.  Modified from collect_elt_matches in sarray-search.c. */
 	if (spliceends_sense != NULL) {
 	  /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
 	  best_nmismatches = querylength;
@@ -8400,7 +9615,7 @@ find_singlesplices_plus (int *found_score, List_T hits, List_T *ambiguous, List_
 	  }
 	}
 
-	/* Process results for segmenti, antisense.  Modified from collect_elt_matches in sarray-read.c. */
+	/* Process results for segmenti, antisense.  Modified from collect_elt_matches in sarray-search.c. */
 	if (spliceends_antisense != NULL) {
 	  /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
 	  best_nmismatches = querylength;
@@ -8951,7 +10166,7 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
 	  }
 	}
 
-	/* Process results for segmenti, sense.  Modified from collect_elt_matches in sarray-read.c. */
+	/* Process results for segmenti, sense.  Modified from collect_elt_matches in sarray-search.c. */
 	if (spliceends_sense != NULL) {
 	  /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
 	  best_nmismatches = querylength;
@@ -9186,7 +10401,7 @@ find_singlesplices_minus (int *found_score, List_T hits, List_T *ambiguous, List
 	  }
 	}
 
-	/* Process results for segmenti, antisense.  Modified from collect_elt_matches in sarray-read.c. */
+	/* Process results for segmenti, antisense.  Modified from collect_elt_matches in sarray-search.c. */
 	if (spliceends_antisense != NULL) {
 	  /* nmismatches here may be different for spliceends from Splice_solve, so pick based on prob and nmismatches */
 	  best_nmismatches = querylength;
@@ -12753,7 +13968,8 @@ find_splicepairs_distant_dna (int *found_score, int *ndistantsplicepairs,
   }
 
   debug4l(printf("ndistantsplicepairs %d, maxchimerapaths %d\n",*ndistantsplicepairs,MAXCHIMERAPATHS));
-  if (*ndistantsplicepairs > MAXCHIMERAPATHS) {
+  debug4ld(printf("ndistantsplicepairs %d, maxchimerapaths %d\n",*ndistantsplicepairs,MAXCHIMERAPATHS));
+  if (0 && *ndistantsplicepairs > MAXCHIMERAPATHS) {
     /* Can afford to ignore these if MAXCHIMERAPATHS is set high enough */
     stage3list_gc(&distantsplicing);
     return distantsplicing_orig;
@@ -14284,9 +15500,15 @@ compute_floors (bool *any_omitted_p, bool *alloc_floors_p, Floors_T *floors_arra
 				   queryuc_ptr,querylength,query_lastpos);
   }
 
+#if 1
   debug(printf("Omitting frequent/repetitive oligos\n"));
   omit_oligos(&all_omitted_p,&(*any_omitted_p),this,query_lastpos,indexdb_size_threshold,
 	      omit_frequent_p,omit_repetitive_p);
+#else
+  debug(printf("Not omitting frequent/repetitive oligos\n"));
+  all_omitted_p = false;
+  *any_omitted_p = false;
+#endif
 
   if (all_omitted_p == true) {
     debug(printf("Aborting because all oligos are omitted\n"));
@@ -14728,13 +15950,13 @@ History_put (History_T this, Univinterval_T interval, List_T gmap_hits) {
 }
 
 
-/* Also defined in sarray-read.c and stage3hr.c */
+/* Also defined in sarray-search.c and stage3hr.c */
 #define add_bounded(x,plusterm,highbound) ((x + (plusterm) >= highbound) ? (highbound - 1) : x + (plusterm))
 #define subtract_bounded(x,minusterm,lowbound) ((x < lowbound + (minusterm)) ? lowbound : x - (minusterm))
 
 
 static List_T
-run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_history,
+run_gmap_for_region (bool *successp, bool *good_start_p, bool *good_end_p, History_T gmap_history,
 		     List_T hits, char *accession, char *queryuc_ptr, int querylength,
 		     int sense_try, bool favor_right_p, 
 		     
@@ -14759,10 +15981,11 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
   List_T pairs1, pairs2;
   struct Pair_T *pairarray1, *pairarray2;
   Univcoord_T start, end;
-  double min_splice_prob_1, min_splice_prob_2;
+  double avg_splice_score_1, avg_splice_score_2;
   int goodness1, goodness2;
   int npairs1, npairs2, nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
   int cdna_direction, sensedir;
+  double avg_splice_score;
   int matches1, unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
     ncanonical1, nsemicanonical1, nnoncanonical1;
   int matches2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
@@ -14781,6 +16004,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 		 (Chrpos_T) (mappingend-chroffset),watsonp,sense_try,querylength,
 		 (Chrpos_T) (knownsplice_limit_low-chroffset),(Chrpos_T) (knownsplice_limit_high-chroffset)));
 
+  *successp = false;
   *good_start_p = *good_end_p = false;
 
   /* It is possible for mappingend to equal mappingstart if the read
@@ -14793,11 +16017,18 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
       debug13(printf("Already ran these coordinates, and have results\n"));
       for (p = stored_hits; p != NULL; p = List_next(p)) {
 	if ((hit = (Stage3end_T) List_head(p)) != NULL) {
-	  if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+	  if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END &&
+	      Stage3end_gmap_goodness(hit) >= querylength + 12) {
+	    *successp = true;
 	    *good_start_p = true;
-	  }
-	  if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
 	    *good_end_p = true;
+	  } else {
+	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+	      *good_start_p = true;
+	    }
+	    if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+	      *good_end_p = true;
+	    }
 	  }
 	  hits = List_push(hits,(void *) Stage3end_copy(hit));
 	}
@@ -14866,7 +16097,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
 				       &ambig_prob_5_1,&ambig_prob_3_1,
 				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
-				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
 
 				       &pairarray2,&pairs2,&npairs2,&goodness2,
 				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
@@ -14874,7 +16105,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
 				       &ambig_prob_5_2,&ambig_prob_3_2,
 				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
-				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
 
 				       Stage2_middle(stage2),Stage2_all_starts(stage2),Stage2_all_ends(stage2),
 #ifdef END_KNOWNSPLICING_SHORTCUT
@@ -14896,7 +16127,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	debug13(printf("stage3 is NULL\n"));
 	stored_hits = List_push(stored_hits,(void *) NULL);
 
-      } else if (cdna_direction == 0) {
+      } else if (pairarray2 != NULL) {
 	debug13(printf("stage3 is not NULL, and cdna direction not determined\n"));
 	debug13a(Pair_dump_array(pairarray1,npairs1,true));
 
@@ -14910,7 +16141,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 					ambig_end_length_5_1,ambig_end_length_3_1,
 					ambig_splicetype_5_1,ambig_splicetype_3_1,
-					min_splice_prob_1,
+					avg_splice_score_1,goodness1,
 					pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 					/*left*/start,/*genomiclength*/end - start + 1,
 					/*plusp*/watsonp,genestrand,
@@ -14922,14 +16153,22 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	    FREE_OUT(pairarray1);
 
 	  } else {
-	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END &&
+		goodness1 >= querylength + 12) {
+	      *successp = true;
 	      *good_start_p = true;
-	    }
-	    if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
 	      *good_end_p = true;
+	    } else {
+	      if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+		*good_start_p = true;
+	      }
+	      if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+		*good_end_p = true;
+	      }
 	    }
 	    debug13(printf("Trim at start: %d, trim at end: %d\n",
 			   Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
+	    debug13(printf("Goodness %d, nmismatches %d\n",goodness1,nmismatches_whole));
 	    stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
 	    hits = List_push(hits,(void *) hit);
 	  }
@@ -14942,7 +16181,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 					ambig_end_length_5_1,ambig_end_length_3_1,
 					ambig_splicetype_5_1,ambig_splicetype_3_1,
-					min_splice_prob_1,
+					avg_splice_score_1,goodness1,
 					pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 					/*left*/end,/*genomiclength*/start - end + 1,
 					/*plusp*/watsonp,genestrand,
@@ -14954,14 +16193,22 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	    FREE_OUT(pairarray1);
 
 	  } else {
-	    if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END &&
+		goodness1 >= querylength + 12) {
+	      *successp = true;
 	      *good_start_p = true;
-	    }
-	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
 	      *good_end_p = true;
+	    } else {
+	      if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+		*good_start_p = true;
+	      }
+	      if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+		*good_end_p = true;
+	      }
 	    }
 	    debug13(printf("Trim at start: %d, trim at end: %d\n",
 			   Stage3end_trim_right(hit),Stage3end_trim_left(hit)));
+	    debug13(printf("Goodness %d, nmismatches %d\n",goodness1,nmismatches_whole));
 	    /* Don't throw away GMAP hits */
 	    stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
 	    hits = List_push(hits,(void *) hit);
@@ -14981,7 +16228,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
 					ambig_end_length_5_2,ambig_end_length_3_2,
 					ambig_splicetype_5_2,ambig_splicetype_3_2,
-					min_splice_prob_2,
+					avg_splice_score_2,goodness2,
 					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
 					/*left*/start,/*genomiclength*/end - start + 1,
 					/*plusp*/watsonp,genestrand,
@@ -14993,14 +16240,22 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	    FREE_OUT(pairarray2);
 
 	  } else {
-	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END &&
+		goodness2 >= querylength + 12) {
+	      *successp = true;
 	      *good_start_p = true;
-	    }
-	    if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
 	      *good_end_p = true;
+	    } else {
+	      if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+		*good_start_p = true;
+	      }
+	      if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+		*good_end_p = true;
+	      }
 	    }
 	    debug13(printf("Trim at start: %d, trim at end: %d\n",
 			   Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
+	    debug13(printf("Goodness %d, nmismatches %d\n",goodness2,nmismatches_whole));
 	    stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
 	    hits = List_push(hits,(void *) hit);
 	  }
@@ -15013,7 +16268,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
 					ambig_end_length_5_2,ambig_end_length_3_2,
 					ambig_splicetype_5_2,ambig_splicetype_3_2,
-					min_splice_prob_2,
+					avg_splice_score_2,goodness2,
 					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
 					/*left*/end,/*genomiclength*/start - end + 1,
 					/*plusp*/watsonp,genestrand,
@@ -15025,14 +16280,22 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	    FREE_OUT(pairarray2);
 
 	  } else {
-	    if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END &&
+		goodness2 >= querylength + 12) {
+	      *successp = true;
 	      *good_start_p = true;
-	    }
-	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
 	      *good_end_p = true;
+	    } else {
+	      if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+		*good_start_p = true;
+	      }
+	      if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+		*good_end_p = true;
+	      }
 	    }
 	    debug13(printf("Trim at start: %d, trim at end: %d\n",
 			   Stage3end_trim_right(hit),Stage3end_trim_left(hit)));
+	    debug13(printf("Goodness %d, nmismatches %d\n",goodness2,nmismatches_whole));
 	    /* Don't throw away GMAP hits */
 	    stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
 	    hits = List_push(hits,(void *) hit);
@@ -15054,7 +16317,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 					ambig_end_length_5_1,ambig_end_length_3_1,
 					ambig_splicetype_5_1,ambig_splicetype_3_1,
-					min_splice_prob_1,
+					avg_splice_score_1,goodness1,
 					pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 					/*left*/start,/*genomiclength*/end - start + 1,
 					/*plusp*/watsonp,genestrand,
@@ -15066,14 +16329,22 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	    FREE_OUT(pairarray1);
 
 	  } else {
-	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END &&
+		goodness1 >= querylength + 12) {
+	      *successp = true;
 	      *good_start_p = true;
-	    }
-	    if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
 	      *good_end_p = true;
+	    } else {
+	      if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+		*good_start_p = true;
+	      }
+	      if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+		*good_end_p = true;
+	      }
 	    }
 	    debug13(printf("Trim at start: %d, trim at end: %d\n",
 			   Stage3end_trim_left(hit),Stage3end_trim_right(hit)));
+	    debug13(printf("Goodness %d, nmismatches %d\n",goodness1,nmismatches_whole));
 	    stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
 	    hits = List_push(hits,(void *) hit);
 	  }
@@ -15086,7 +16357,7 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 					ambig_end_length_5_1,ambig_end_length_3_1,
 					ambig_splicetype_5_1,ambig_splicetype_3_1,
-					min_splice_prob_1,
+					avg_splice_score_1,goodness1,
 					pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 					/*left*/end,/*genomiclength*/start - end + 1,
 					/*plusp*/watsonp,genestrand,
@@ -15098,14 +16369,22 @@ run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_histor
 	    FREE_OUT(pairarray1);
 
 	  } else {
-	    if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END &&
+		goodness1 >= querylength + 12) {
+	      *successp = true;
 	      *good_start_p = true;
-	    }
-	    if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
 	      *good_end_p = true;
+	    } else {
+	      if (Stage3end_trim_right(hit) < GOOD_GMAP_END) {
+		*good_start_p = true;
+	      }
+	      if (Stage3end_trim_left(hit) < GOOD_GMAP_END) {
+		*good_end_p = true;
+	      }
 	    }
 	    debug13(printf("Trim at start: %d, trim at end: %d\n",
 			   Stage3end_trim_right(hit),Stage3end_trim_left(hit)));
+	    debug13(printf("Goodness %d, nmismatches %d\n",goodness1,nmismatches_whole));
 	    /* Don't throw away GMAP hits */
 	    stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit));
 	    hits = List_push(hits,(void *) hit);
@@ -15374,7 +16653,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
       /* 4 */
       if (close_mappingstart_p == true && close_mappingend_p == true) {
 	debug13(printf("Single hit: Running gmap with close mappingstart and close mappingend\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				   /*sense_try*/0,favor_right_p,close_mappingstart_last,close_mappingend_last,
 				   close_knownsplice_limit_low,close_knownsplice_limit_high,
 				   /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15388,7 +16667,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	} else if (good_start_p == true) {
 	  if (fallback_mappingend_p == true) {
 	    debug13(printf("Single hit: Re-running gmap with close mappingstart only\n"));
-	    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				       /*sense_try*/0,favor_right_p,close_mappingstart_last,mappingend,
 				       close_knownsplice_limit_low,knownsplice_limit_high,
 				       /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15398,7 +16677,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	} else if (good_end_p == true) {
 	  if (fallback_mappingstart_p == true) {
 	    debug13(printf("Single hit: Re-running gmap with close mappingend only\n"));
-	    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				       /*sense_try*/0,favor_right_p,mappingstart,close_mappingend_last,
 				       knownsplice_limit_low,close_knownsplice_limit_high,
 				       /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15408,7 +16687,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	} else {
 	  if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
 	    debug13(printf("Single hit: Re-running gmap with far mappingstart and mappingend\n"));
-	    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				       /*sense_try*/0,favor_right_p,mappingstart,mappingend,
 				       knownsplice_limit_low,close_knownsplice_limit_high,
 				       /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15419,7 +16698,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 
       } else if (close_mappingstart_p == true) {
 	debug13(printf("Single hit: Running gmap with close mappingstart\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				   /*sense_try*/0,favor_right_p,close_mappingstart_last,mappingend,
 				   close_knownsplice_limit_low,knownsplice_limit_high,
 				   /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15431,7 +16710,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	  debug13(printf("Skipping re-run of gmap\n"));
 	} else if (fallback_mappingstart_p == true) {
 	  debug13(printf("Single hit: Re-running gmap with far mappingstart\n"));
-	  hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	  hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				     /*sense_try*/0,favor_right_p,mappingstart,mappingend,
 				     knownsplice_limit_low,knownsplice_limit_high,
 				     /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15441,7 +16720,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 
       } else if (close_mappingend_p == true) {
 	debug13(printf("Single hit: Running gmap with close mappingend\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				   /*sense_try*/0,favor_right_p,mappingstart,close_mappingend_last,
 				   knownsplice_limit_low,close_knownsplice_limit_high,
 				   /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15453,7 +16732,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	  debug13(printf("Skipping re-run of gmap\n"));
 	} else if (fallback_mappingend_p == true) {
 	  debug13(printf("Single hit: Re-running gmap with far mappingend\n"));
-	  hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	  hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				     /*sense_try*/0,favor_right_p,mappingstart,mappingend,
 				     knownsplice_limit_low,knownsplice_limit_high,
 				     /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15463,7 +16742,7 @@ convert_plus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 
       } else {
 	debug13(printf("Single hit: Running gmap with far mappingstart and mappingend\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				   /*sense_try*/0,favor_right_p,mappingstart,mappingend,
 				   knownsplice_limit_low,knownsplice_limit_high,
 				   /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength,
@@ -15673,7 +16952,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
       /* 4 */
       if (close_mappingstart_p == true && close_mappingend_p == true) {
 	debug13(printf("Single hit: Running gmap with close mappingstart and close mappingend\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				   /*sense_try*/0,favor_right_p,query_compress_fwd,query_compress_rev,
 				   close_mappingstart_last,close_mappingend_last,
 				   close_knownsplice_limit_low,close_knownsplice_limit_high,
@@ -15688,7 +16967,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	} else if (good_start_p == true) {
 	  if (fallback_mappingend_p == true) {
 	    debug13(printf("Single hit: Re-running gmap with close mappingstart only\n"));
-	    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				       /*sense_try*/0,favor_right_p,
 				       query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
 				       close_knownsplice_limit_low,knownsplice_limit_high,
@@ -15699,7 +16978,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	} else if (good_end_p == true) {
 	  if (fallback_mappingstart_p == true) {
 	    debug13(printf("Single hit: Re-running gmap with close mappingend only\n"));
-	    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				       /*sense_try*/0,favor_right_p,
 				       query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
 				       knownsplice_limit_low,close_knownsplice_limit_high,
@@ -15710,7 +16989,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	} else {
 	  if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
 	    debug13(printf("Single hit: Re-running gmap with far mappingstart and mappingend\n"));
-	    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				       /*sense_try*/0,favor_right_p,
 				       query_compress_fwd,query_compress_rev,mappingstart,mappingend,
 				       knownsplice_limit_low,close_knownsplice_limit_high,
@@ -15722,7 +17001,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	
       } else if (close_mappingstart_p == true) {
 	debug13(printf("Single hit: Running gmap with close mappingstart\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				   /*sense_try*/0,favor_right_p,
 				   query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend,
 				   close_knownsplice_limit_low,knownsplice_limit_high,
@@ -15735,7 +17014,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	  debug13(printf("Skipping re-run of gmap\n"));
 	} else if (fallback_mappingstart_p == true) {
 	  debug13(printf("Single hit: Re-running gmap with far mappingstart\n"));
-	  hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	  hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				     /*sense_try*/0,favor_right_p,
 				     query_compress_fwd,query_compress_rev,mappingstart,mappingend,
 				     knownsplice_limit_low,knownsplice_limit_high,
@@ -15746,7 +17025,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	
       } else if (close_mappingend_p == true) {
 	debug13(printf("Single hit: Running gmap with close mappingend\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				   /*sense_try*/0,favor_right_p,
 				   query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last,
 				   knownsplice_limit_low,close_knownsplice_limit_high,
@@ -15759,7 +17038,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	  debug13(printf("Skipping re-run of gmap\n"));
 	} else if (fallback_mappingend_p == true) {
 	  debug13(printf("Single hit: Re-running gmap with far mappingend\n"));
-	  hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	  hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				     /*sense_try*/0,favor_right_p,
 				     query_compress_fwd,query_compress_rev,mappingstart,mappingend,
 				     knownsplice_limit_low,knownsplice_limit_high,
@@ -15770,7 +17049,7 @@ convert_minus_segments_to_gmap_via_region (History_T gmap_history, List_T hits,
 	
       } else {
 	debug13(printf("Single hit: Running gmap with far mappingstart and mappingend\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength,
 				   /*sense_try*/0,favor_right_p,
 				   query_compress_fwd,query_compress_rev,mappingstart,mappingend,
 				   knownsplice_limit_low,knownsplice_limit_high,
@@ -15837,8 +17116,8 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
     max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
     unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
     ncanonical2, nsemicanonical2, nnoncanonical2;
-  double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
-  double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+  double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1;
+  double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2;
   Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
   Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
   Univcoord_T start, end, left;
@@ -16262,41 +17541,44 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	debug13(printf("plus left diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
 		       (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
 		       segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
-
-	querypos = segment->querypos5;
-	if (querypos < boundpos) {
-	  left = segment->diagonal - querylength; /* FORMULA */
-	  genomepos = (left - chroffset) + querypos;
-	  if (genomepos < min_genomepos) {
-	    seglength = (segment->querypos3 + index1part) - querypos;
-	    Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*right*/chroffset+genomepos+seglength,
-					   seglength,chroffset,/*revcomp*/false);
-	    
-	    i = 0;
-	    while (i < seglength && querypos < boundpos && genomepos < min_genomepos) {
-	      c = queryuc_ptr[querypos];
-	      g = gsequence_orig[i];
-	      g_alt = gsequence_alt[i];
-	      if (g == c || g_alt == c) {
-		debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
-		unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
-					       /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
-					       /*dynprogindex*/0);
+	if (anchor_segment->diagonal > chroffset + chrlength && segment->diagonal < chroffset + chrlength) {
+	  debug13(printf("Cannot cross circular origin\n"));
+	} else {
+	  querypos = segment->querypos5;
+	  if (querypos < boundpos) {
+	    left = segment->diagonal - querylength; /* FORMULA */
+	    genomepos = (left - chroffset) + querypos;
+	    if (genomepos < min_genomepos) {
+	      seglength = (segment->querypos3 + index1part) - querypos;
+	      Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*right*/chroffset+genomepos+seglength,
+					     seglength,chroffset,/*revcomp*/false);
+	      
+	      i = 0;
+	      while (i < seglength && querypos < boundpos && genomepos < min_genomepos) {
+		c = queryuc_ptr[querypos];
+		g = gsequence_orig[i];
+		g_alt = gsequence_alt[i];
+		if (g == c || g_alt == c) {
+		  debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+		  unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+						 /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+						 /*dynprogindex*/0);
 #if 0
-	      } else {
-		/* Let stage 3 handle mismatches */
-		debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
-		unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
-					       /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
-					       /*dynprogindex*/0);
+		} else {
+		  /* Let stage 3 handle mismatches */
+		  debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+		  unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+						 /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+						 /*dynprogindex*/0);
 #endif
+		}
+		querypos++;
+		genomepos++;
+		i++;
 	      }
-	      querypos++;
-	      genomepos++;
-	      i++;
+	      boundpos = segment->querypos5;
+	      min_genomepos = (left - chroffset) + segment->querypos5;
 	    }
-	    boundpos = segment->querypos5;
-	    min_genomepos = (left - chroffset) + segment->querypos5;
 	  }
 	}
       }
@@ -16309,45 +17591,49 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	debug13(printf("plus right diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
 		       (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
 		       segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
-	querypos = segment->querypos5;
-	seglength = (segment->querypos3 + index1part) - querypos;
+	if (anchor_segment->diagonal < chroffset + chrlength && segment->diagonal > chroffset + chrlength) {
+	  debug13(printf("Cannot cross circular origin\n"));
+	} else {
+	  querypos = segment->querypos5;
+	  seglength = (segment->querypos3 + index1part) - querypos;
       
-	left = segment->diagonal - querylength; /* FORMULA */
-	genomepos = left - chroffset + querypos;
-	Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/chroffset+genomepos,
-					seglength,chrhigh,/*revcomp*/false);
+	  left = segment->diagonal - querylength; /* FORMULA */
+	  genomepos = left - chroffset + querypos;
+	  Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/chroffset+genomepos,
+					  seglength,chrhigh,/*revcomp*/false);
       
-	i = 0;
-	while (i < seglength && (querypos <= boundpos || genomepos <= max_genomepos)) {
-	  querypos++;
-	  genomepos++;
-	  i++;
-	}
-
-	while (i < seglength) {
-	  c = queryuc_ptr[querypos];
-	  g = gsequence_orig[i];
-	  g_alt = gsequence_alt[i];
-	  if (g == c || g_alt == c) {
-	    debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
-	    unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
-					   /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
-					   /*dynprogindex*/0);
+	  i = 0;
+	  while (i < seglength && (querypos <= boundpos || genomepos <= max_genomepos)) {
+	    querypos++;
+	    genomepos++;
+	    i++;
+	  }
+
+	  while (i < seglength) {
+	    c = queryuc_ptr[querypos];
+	    g = gsequence_orig[i];
+	    g_alt = gsequence_alt[i];
+	    if (g == c || g_alt == c) {
+	      debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+	      unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+					     /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+					     /*dynprogindex*/0);
 #if 0
-	  } else {
-	    /* Let stage 3 handle mismatches */
-	    debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
-	    unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
-					   /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
-					   /*dynprogindex*/0);
+	    } else {
+	      /* Let stage 3 handle mismatches */
+	      debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+	      unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+					     /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+					     /*dynprogindex*/0);
 #endif
+	    }
+	    querypos++;
+	    genomepos++;
+	    i++;
 	  }
-	  querypos++;
-	  genomepos++;
-	  i++;
+	  boundpos = segment->querypos3 + index1part;
+	  max_genomepos = genomepos - 1;
 	}
-	boundpos = segment->querypos3 + index1part;
-	max_genomepos = genomepos - 1;
       }
 
 
@@ -16383,7 +17669,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 					      &ambig_splicetype_5_1,&ambig_splicetype_3_1,
 					      &ambig_prob_5_1,&ambig_prob_3_1,
 					      &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
-					      &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+					      &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
 
 					      &pairarray2,&pairs2,&npairs2,&goodness2,
 					      &matches2,&nmatches_posttrim_2,&max_match_length_2,
@@ -16391,7 +17677,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 					      &ambig_splicetype_5_2,&ambig_splicetype_3_2,
 					      &ambig_prob_5_2,&ambig_prob_3_2,
 					      &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
-					      &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+					      &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
 
 					      stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL,
 #ifdef END_KNOWNSPLICING_SHORTCUT
@@ -16411,7 +17697,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 					      oligoindices_minor,diagpool,cellpool)) == NULL) {
 	/* hit = (T) NULL; */
 
-      } else if (cdna_direction == 0) {
+      } else if (pairarray2 != NULL) {
 	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
 					 pairarray1,npairs1);
 	start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
@@ -16422,7 +17708,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				      ambig_end_length_5_1,ambig_end_length_3_1,
 				      ambig_splicetype_5_1,ambig_splicetype_3_1,
-				      min_splice_prob_1,
+				      avg_splice_score_1,goodness1,
 				      pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				      /*left*/start,/*genomiclength*/end - start + 1,
 				      /*plusp*/true,genestrand,
@@ -16445,7 +17731,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
 				      ambig_end_length_5_2,ambig_end_length_3_2,
 				      ambig_splicetype_5_2,ambig_splicetype_3_2,
-				      min_splice_prob_2,
+				      avg_splice_score_2,goodness2,
 				      pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
 				      /*left*/start,/*genomiclength*/end - start + 1,
 				      /*plusp*/true,genestrand,
@@ -16469,7 +17755,7 @@ convert_plus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				      ambig_end_length_5_1,ambig_end_length_3_1,
 				      ambig_splicetype_5_1,ambig_splicetype_3_1,
-				      min_splice_prob_1,
+				      avg_splice_score_1,goodness1,
 				      pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				      /*left*/start,/*genomiclength*/end - start + 1,
 				      /*plusp*/true,genestrand,
@@ -16562,8 +17848,8 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
     max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
     unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
     ncanonical2, nsemicanonical2, nnoncanonical2;
-  double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
-  double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+  double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1;
+  double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2;
   Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
   Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
   Univcoord_T start, end, left;
@@ -16986,45 +18272,49 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	debug13(printf("minus left diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
 		       (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
 		       segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
-	querypos = segment->querypos5;
-	seglength = (segment->querypos3 + index1part) - querypos;
-
-	/* left = segment->diagonal - querylength; -- FORMULA */
-	genomepos = chrhigh - (segment->diagonal - 1) + querypos;
-	Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*right*/segment->diagonal - querypos /*- seglength*/,
-				       seglength,chroffset,/*revcomp*/true);
-
-	i = 0;
-	while (i < seglength && (querypos <= boundpos || genomepos <= max_genomepos)) {
-	  querypos++;
-	  genomepos++;
-	  i++;
-	}
+	if (anchor_segment->diagonal > chroffset + chrlength && segment->diagonal < chroffset + chrlength) {
+	  debug13(printf("Cannot cross circular origin\n"));
+	} else {
+	  querypos = segment->querypos5;
+	  seglength = (segment->querypos3 + index1part) - querypos;
 
-	while (i < seglength) {
-	  c = queryuc_ptr[querypos];
-	  g = gsequence_orig[i];
-	  g_alt = gsequence_alt[i];
-	  if (g == c || g_alt == c) {
-	    debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
-	    unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
-					   /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
-					   /*dynprogindex*/0);
+	  /* left = segment->diagonal - querylength; -- FORMULA */
+	  genomepos = chrhigh - (segment->diagonal - 1) + querypos;
+	  Genome_get_segment_blocks_left(gsequence_orig,gsequence_alt,/*right*/segment->diagonal - querypos /*- seglength*/,
+					 seglength,chroffset,/*revcomp*/true);
+
+	  i = 0;
+	  while (i < seglength && (querypos <= boundpos || genomepos <= max_genomepos)) {
+	    querypos++;
+	    genomepos++;
+	    i++;
+	  }
+
+	  while (i < seglength) {
+	    c = queryuc_ptr[querypos];
+	    g = gsequence_orig[i];
+	    g_alt = gsequence_alt[i];
+	    if (g == c || g_alt == c) {
+	      debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+	      unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+					     /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+					     /*dynprogindex*/0);
 #if 0
-	  } else {
-	    /* Let stage 3 handle mismatches */
-	    debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
-	    unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
-					   /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
-					   /*dynprogindex*/0);
+	    } else {
+	      /* Let stage 3 handle mismatches */
+	      debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+	      unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+					     /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+					     /*dynprogindex*/0);
 #endif
+	    }
+	    querypos++;
+	    genomepos++;
+	    i++;
 	  }
-	  querypos++;
-	  genomepos++;
-	  i++;
+	  boundpos = segment->querypos3 + index1part;
+	  max_genomepos = genomepos - 1;
 	}
-	boundpos = segment->querypos3 + index1part;
-	max_genomepos = genomepos - 1;
       }
 
       /* F.  Make stage2pairs (right) */
@@ -17035,40 +18325,44 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	debug13(printf("minus right diagonal %u (%llu), querypos %d..%d, usedp %d, pairablep %d\n",
 		       (Chrpos_T) (segment->diagonal - chroffset),(unsigned long long) segment->diagonal,
 		       segment->querypos5,segment->querypos3,segment->usedp,segment->pairablep));
-	querypos = segment->querypos5;
-	if (querypos < boundpos) {
-	  /* left = segment->diagonal - querylength; -- FORMULA */
-	  genomepos = chrhigh - (segment->diagonal - 1) + querypos;
-	  if (genomepos < min_genomepos) {
-	    seglength = (segment->querypos3 + index1part) - querypos;
-	    Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/segment->diagonal - querypos - seglength,
-					    seglength,chrhigh,/*revcomp*/true);
-
-	    i = 0;
-	    while (i < seglength && querypos < boundpos && genomepos < min_genomepos) {
-	      c = queryuc_ptr[querypos];
-	      g = gsequence_orig[i];
-	      g_alt = gsequence_alt[i];
-	      if (g == c || g_alt == c) {
-		debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
-		unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
-					       /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
-					       /*dynprogindex*/0);
+	if (anchor_segment->diagonal < chroffset + chrlength && segment->diagonal > chroffset + chrlength) {
+	  debug13(printf("Cannot cross circular origin\n"));
+	} else {
+	  querypos = segment->querypos5;
+	  if (querypos < boundpos) {
+	    /* left = segment->diagonal - querylength; -- FORMULA */
+	    genomepos = chrhigh - (segment->diagonal - 1) + querypos;
+	    if (genomepos < min_genomepos) {
+	      seglength = (segment->querypos3 + index1part) - querypos;
+	      Genome_get_segment_blocks_right(gsequence_orig,gsequence_alt,/*left*/segment->diagonal - querypos - seglength,
+					      seglength,chrhigh,/*revcomp*/true);
+
+	      i = 0;
+	      while (i < seglength && querypos < boundpos && genomepos < min_genomepos) {
+		c = queryuc_ptr[querypos];
+		g = gsequence_orig[i];
+		g_alt = gsequence_alt[i];
+		if (g == c || g_alt == c) {
+		  debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MATCH_COMP,g,querypos,genomepos));
+		  unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+						 /*cdna*/c,/*comp*/MATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+						 /*dynprogindex*/0);
 #if 0
-	      } else {
-		/* Let stage 3 handle mismatches */
-		debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
-		unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
-					       /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
-					       /*dynprogindex*/0);
+		} else {
+		  /* Let stage 3 handle mismatches */
+		  debug13(printf("Pushing %c %c %c at %d,%u\n",c,/*comp*/MISMATCH_COMP,g,querypos,genomepos));
+		  unsorted_pairs = Pairpool_push(unsorted_pairs,pairpool,querypos,genomepos,
+						 /*cdna*/c,/*comp*/MISMATCH_COMP,/*genome*/g,/*genomealt*/g_alt,
+						 /*dynprogindex*/0);
 #endif
+		}
+		querypos++;
+		genomepos++;
+		i++;
 	      }
-	      querypos++;
-	      genomepos++;
-	      i++;
+	      boundpos = segment->querypos5;
+	      min_genomepos = chrhigh - (segment->diagonal - 1) + segment->querypos5;
 	    }
-	    boundpos = segment->querypos5;
-	    min_genomepos = chrhigh - (segment->diagonal - 1) + segment->querypos5;
 	  }
 	}
       }
@@ -17106,7 +18400,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 					      &ambig_splicetype_5_1,&ambig_splicetype_3_1,
 					      &ambig_prob_5_1,&ambig_prob_3_1,
 					      &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
-					      &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+					      &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
 
 					      &pairarray2,&pairs2,&npairs2,&goodness2,
 					      &matches2,&nmatches_posttrim_2,&max_match_length_2,
@@ -17114,7 +18408,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 					      &ambig_splicetype_5_2,&ambig_splicetype_3_2,
 					      &ambig_prob_5_2,&ambig_prob_3_2,
 					      &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
-					      &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+					      &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
 
 					      stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL,
 #ifdef END_KNOWNSPLICING_SHORTCUT
@@ -17134,7 +18428,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 					      oligoindices_minor,diagpool,cellpool)) == NULL) {
 	/* hit = (T) NULL; */
 	  
-      } else if (cdna_direction == 0) {
+      } else if (pairarray2 != NULL) {
 	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
 					 pairarray1,npairs1);
 	start = add_bounded(chroffset + Pair_genomepos(&(pairarray1[0])),
@@ -17145,7 +18439,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				      ambig_end_length_5_1,ambig_end_length_3_1,
 				      ambig_splicetype_5_1,ambig_splicetype_3_1,
-				      min_splice_prob_1,
+				      avg_splice_score_1,goodness1,
 				      pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				      /*left*/end,/*genomiclength*/start - end + 1,
 				      /*plusp*/false,genestrand,
@@ -17167,7 +18461,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
 				      ambig_end_length_5_2,ambig_end_length_3_2,
 				      ambig_splicetype_5_2,ambig_splicetype_3_2,
-				      min_splice_prob_2,
+				      avg_splice_score_2,goodness2,
 				      pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
 				      /*left*/end,/*genomiclength*/start - end + 1,
 				      /*plusp*/false,genestrand,
@@ -17190,7 +18484,7 @@ convert_minus_segments_to_gmap (List_T hits, char *queryuc_ptr, int querylength,
 	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				      ambig_end_length_5_1,ambig_end_length_3_1,
 				      ambig_splicetype_5_1,ambig_splicetype_3_1,
-				      min_splice_prob_1,
+				      avg_splice_score_1,goodness1,
 				      pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				      /*left*/end,/*genomiclength*/start - end + 1,
 				      /*plusp*/false,genestrand,
@@ -17251,6 +18545,7 @@ align_singleend_with_gmap (List_T result, char *queryuc_ptr, int querylength,
   result = Stage3end_sort_bymatches(result);
   
   for (p = result, i = 0; p != NULL && i < max_gmap_improvement; p = p->rest, i++) {
+    /* This is an expensive operation.  Need to limit by max_gmap_improvement */
     hit = (Stage3end_T) List_head(p);
     genestrand = Stage3end_genestrand(hit);
     
@@ -17364,7 +18659,7 @@ align_end (int *cutoff_level, T this,
 	   bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches,
 	   bool keep_floors_p, int genestrand, bool first_read_p) {
   List_T hits, greedy = NULL, subs = NULL, terminals = NULL, indels = NULL,
-    singlesplicing = NULL, doublesplicing = NULL, shortendsplicing = NULL,
+    singlesplicing = NULL, shortendsplicing = NULL,
     longsinglesplicing = NULL, distantsplicing = NULL, gmap_hits = NULL, q;
   Segment_T *plus_anchor_segments = NULL, *minus_anchor_segments = NULL;
   int n_plus_anchors = 0, n_minus_anchors = 0;
@@ -17450,6 +18745,7 @@ align_end (int *cutoff_level, T this,
   nmisses_allowed_sarray = *cutoff_level;
   
   if (use_only_sarray_p == true || (use_sarray_p == true && querylength < min_kmer_readlength)) {
+    debug(printf("Trying suffix array\n"));
     hits = Sarray_search_greedy(&(*cutoff_level),
 				queryuc_ptr,queryrc,querylength,query_compress_fwd,query_compress_rev,
 				nmisses_allowed_sarray,genestrand);
@@ -17475,7 +18771,7 @@ align_end (int *cutoff_level, T this,
     spanningsetp = true;
   } else {
     spanningsetp = false;	/* Suffix array search replaces spanning set */
-    
+
     debug(printf("Trying suffix array\n"));
     greedy = Sarray_search_greedy(&found_score,queryuc_ptr,queryrc,querylength,query_compress_fwd,query_compress_rev,
 				  nmisses_allowed_sarray,genestrand);
@@ -17576,6 +18872,7 @@ align_end (int *cutoff_level, T this,
   debug(printf("completesetp %d\n",completesetp));
 
 #if 0
+  /* SPEED */
   if (found_score <= done_level) {
     debug(printf("Test for completeset: false because found_score %d >done_level %d\n",found_score,done_level));
     completesetp = false;
@@ -17606,10 +18903,13 @@ align_end (int *cutoff_level, T this,
 			   querylength,query_lastpos,floors,indel_penalty_middle,indel_penalty_end,
 			   allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches,
 			   fast_level,genestrand);
+#if 0
+    /* SPEED */
     if (found_score <= done_level) {
       debug(printf("Test for completeset: false because found_score %d >done_level %d\n",found_score,done_level));
       completesetp = false;
     }
+#endif
   }
 
 
@@ -17617,8 +18917,10 @@ align_end (int *cutoff_level, T this,
 
   /* 6/7/8/9.  Splicing.  Requires compress and all positions fetched */
   /* SPEED: For more hits, turn off first branch */
-  if (use_sarray_p == true && completesetp == false) {
-      /* Skip.  Suffix array already found something.  Also, get memory errors if run both algorithms.  */
+  /* However, going for speed does miss correct results, so turning off first branch */
+  if (require_completeset_p == false && use_sarray_p == true && completesetp == false) {
+    /* Skip.  Suffix array already found something.  Also, get memory errors if run both algorithms.  */
+    /* Note: Turning this branch off results in a 3x slowdown */
 
   } else if (knownsplicingp || novelsplicingp || find_dna_chimeras_p) {
     /* 6.  Single splicing */
@@ -17801,7 +19103,9 @@ align_end (int *cutoff_level, T this,
   debug(printf("  subs: %d\n",List_length(subs)));
   debug(printf("  indels: %d\n",List_length(indels)));
   debug(printf("  singlesplicing %d\n",List_length(singlesplicing)));
+#ifdef PERFORM_DOUBLESPLICING
   debug(printf("  doublesplicing %d\n",List_length(doublesplicing)));
+#endif
   debug(printf("  shortendsplicing: %d\n",List_length(shortendsplicing)));
   debug(printf("  done_level: %d\n",done_level));
 
@@ -17811,8 +19115,8 @@ align_end (int *cutoff_level, T this,
 					     List_append(indels,
 							 List_append(singlesplicing,shortendsplicing)))));
 
-  if (knownsplicingp || novelsplicingp || find_dna_chimeras_p) {
-    /* Search 7: Distant splicing */
+  if (knownsplicingp == true || novelsplicingp == true) {
+    /* Search 7: Distant RNA splicing */
     min_trim = querylength;
     for (q = hits; q != NULL; q = q->rest) {
       hit = (Stage3end_T) q->first;
@@ -17825,119 +19129,10 @@ align_end (int *cutoff_level, T this,
       /* Want < and not <=, because otherwise distant splicing does not work on 50-bp reads */
       /* Want <= and not <, because distant splicing needs to be better than other alternatives */
       /* Don't find distant splicing */
-      debug(printf("Skipping distant splicing because done_level %d < distantsplicing_penalty %d and min_trim %d < %d\n",
+      debug(printf("Skipping distant RNA splicing because done_level %d < distantsplicing_penalty %d and min_trim %d < %d\n",
 		   done_level,distantsplicing_penalty,min_trim,min_distantsplicing_end_matches));
 
-    } else if (find_dna_chimeras_p == true &&
-	       (max_splice_mismatches = done_level - distantsplicing_penalty) >= 0) {
-      /* 9 (DNA).  Find distant splicing for DNA */
-      debug(printf("*** Stage 9 (DNA).  Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches));
-
-      startfrags_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
-      endfrags_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
-      startfrags_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
-      endfrags_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
-
-      if (floors_computed_p == false) {
-	floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,queryuc_ptr,querylength,query_lastpos,
-				plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
-				/*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
-	floors_computed_p = true;
-      }
-
-      debug(printf("Starting find_spliceends_distant_dna_plus\n"));
-      find_spliceends_distant_dna_plus(&startfrags_plus,&endfrags_plus,plus_anchor_segments,n_plus_anchors,
-#ifdef DEBUG4E
-				       /*queryptr*/queryuc_ptr,
-#endif
-				       floors,querylength,query_lastpos,/*query_compress*/query_compress_fwd,
-				       max_splice_mismatches,genestrand);
-      debug(printf("Finished find_spliceends_distant_dna_plus\n"));
-
-      debug(printf("Starting find_spliceends_distant_dna_minus\n"));
-      find_spliceends_distant_dna_minus(&startfrags_minus,&endfrags_minus,minus_anchor_segments,n_minus_anchors,
-#ifdef DEBUG4E
-					/*queryptr*/queryrc,
-#endif
-					floors,querylength,query_lastpos,/*query_compress*/query_compress_rev,
-					max_splice_mismatches,genestrand);
-      debug(printf("Finished find_spliceends_distant_dna_minus\n"));
-
-      nmismatches = 0;
-      ambiguousp = false;
-      while (longsinglesplicing == NULL &&
-	     nmismatches <= done_level - distantsplicing_penalty &&
-	     nsplicepairs < MAXCHIMERAPATHS && ambiguousp == false) {
-	debug(printf("*** Stage 9 (DNA).  Distant splicing, allowing %d mismatches ***\n",nmismatches));
-
-	debug4e(printf("Sorting splice ends\n"));
-	startfrags_plus[nmismatches] = Substring_sort_siteN_halves(startfrags_plus[nmismatches],/*ascendingp*/true);
-	endfrags_plus[nmismatches] = Substring_sort_siteN_halves(endfrags_plus[nmismatches],/*ascendingp*/true);
-
-	startfrags_minus[nmismatches] = Substring_sort_siteN_halves(startfrags_minus[nmismatches],/*ascendingp*/false);
-	endfrags_minus[nmismatches] = Substring_sort_siteN_halves(endfrags_minus[nmismatches],/*ascendingp*/false);
-
-	debug4e(printf("Splice ends at %d nmismatches: +startfrags/endfrags %d/%d, -startfrags/endfrags %d/%d\n",
-		       nmismatches,
-		       List_length(startfrags_plus[nmismatches]),List_length(endfrags_plus[nmismatches]),
-		       List_length(startfrags_minus[nmismatches]),List_length(endfrags_minus[nmismatches])));
-
-	distantsplicing = find_splicepairs_distant_dna(&found_score,&nsplicepairs,&longsinglesplicing,distantsplicing,
-						       startfrags_plus,endfrags_plus,startfrags_minus,endfrags_minus,
-						       localsplicing_penalty,distantsplicing_penalty,
-						       querylength,nmismatches,first_read_p);
-#if 0
-	assert(List_length(distantsplicing) <= 1);
-#endif
-
-#if 0
-	/* Mark ambiguous splices only for single-end reads */
-	distantsplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,distantsplicing);
-#endif
-
-	/* Excess distant splicing should be freed already in find_splicepairs_distant_rna */
-	debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
-	distantsplicing = Stage3end_optimal_score(distantsplicing,query_compress_fwd,query_compress_rev,querylength,
-						  /*keep_gmap_p*/true,/*finalp*/false);
-	debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
-
-	if (distantsplicing) {
-	  opt_level = (found_score < opt_level) ? found_score : opt_level;
-	  if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
-	    done_level = user_maxlevel;
-	  }
-	  debug(printf("9 (DNA)> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
-	}
-	nmismatches++;
-
-      }
-
-      if (longsinglesplicing != NULL) {
-	debug(printf("Entering Stage3end_optimal_score with %d longsinglesplicing hits\n",List_length(longsinglesplicing)));
-	longsinglesplicing = Stage3end_optimal_score(longsinglesplicing,query_compress_fwd,query_compress_rev,querylength,
-						     /*keep_gmap_p*/true,/*finalp*/false);
-	debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(longsinglesplicing)));
-
-	opt_level = (found_score < opt_level) ? found_score : opt_level;
-	if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
-	  done_level = user_maxlevel;
-	}
-	debug(printf("9 (DNA)> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
-      }
-
-      for (i = 0; i <= max_splice_mismatches; i++) {
-	substringlist_gc(&(startfrags_plus[i]));
-	substringlist_gc(&(endfrags_plus[i]));
-	substringlist_gc(&(startfrags_minus[i]));
-	substringlist_gc(&(endfrags_minus[i]));
-      }
-      FREEA(startfrags_plus);
-      FREEA(endfrags_plus);
-      FREEA(startfrags_minus);
-      FREEA(endfrags_minus);
-
-    } else if ((knownsplicingp || novelsplicingp) &&
-	       (max_splice_mismatches = done_level - distantsplicing_penalty) >= 0) {
+    } else if ((max_splice_mismatches = done_level - distantsplicing_penalty) >= 0) {
       /* 9 (RNA).  Find distant splicing for RNA iteratively using both known and novel splice sites */
       debug(printf("*** Stage 9 (RNA).  Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches));
 
@@ -18034,8 +19229,8 @@ align_end (int *cutoff_level, T this,
 	  }
 	  debug(printf("9 (RNA)> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
 	}
-	nmismatches++;
 
+	nmismatches++;
       }
 
       if (longsinglesplicing != NULL) {
@@ -18061,26 +19256,153 @@ align_end (int *cutoff_level, T this,
 	substringlist_gc(&(acceptors_minus[i]));
 	substringlist_gc(&(antiacceptors_minus[i]));
       }
-      FREEA(donors_plus);
-      FREEA(antidonors_plus);
-      FREEA(acceptors_plus);
-      FREEA(antiacceptors_plus);
-      FREEA(donors_minus);
-      FREEA(antidonors_minus);
-      FREEA(acceptors_minus);
-      FREEA(antiacceptors_minus);
+      FREEA(donors_plus);
+      FREEA(antidonors_plus);
+      FREEA(acceptors_plus);
+      FREEA(antiacceptors_plus);
+      FREEA(donors_minus);
+      FREEA(antidonors_minus);
+      FREEA(acceptors_minus);
+      FREEA(antiacceptors_minus);
+    }
+
+    debug(printf("%d single splices, %d long single splices, %d distant splices",
+                 List_length(singlesplicing),List_length(longsinglesplicing),List_length(distantsplicing)));
+#ifdef PERFORM_DOUBLESPLICING
+    debug(printf(", %d double splices\n",List_length(doublesplicing)));
+#endif
+    debug(printf("\n"));
+
+    hits = List_append(hits,
+		       List_append(longsinglesplicing,distantsplicing));
+  }
+
+
+  if (find_dna_chimeras_p == true) {
+    /* Search 8: Distant DNA splicing */
+    min_trim = querylength;
+    for (q = hits; q != NULL; q = q->rest) {
+      hit = (Stage3end_T) q->first;
+      if ((trim = Stage3end_total_trim(hit)) < min_trim) {
+	min_trim = trim;
+      }
+    }
+
+    if (done_level < distantsplicing_penalty && min_trim < min_distantsplicing_end_matches) {
+      /* Want < and not <=, because otherwise distant splicing does not work on 50-bp reads */
+      /* Want <= and not <, because distant splicing needs to be better than other alternatives */
+      /* Don't find distant splicing */
+      debug(printf("Skipping distant DNA splicing because done_level %d < distantsplicing_penalty %d and min_trim %d < %d\n",
+		   done_level,distantsplicing_penalty,min_trim,min_distantsplicing_end_matches));
+
+    } else if ((max_splice_mismatches = done_level - distantsplicing_penalty) >= 0) {
+      /* 9 (DNA).  Find distant splicing for DNA */
+      debug(printf("*** Stage 9 (DNA).  Distant splice ends, allowing %d mismatches ***\n",max_splice_mismatches));
+
+      startfrags_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+      endfrags_plus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+      startfrags_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+      endfrags_minus = (List_T *) CALLOCA(max_splice_mismatches+1,sizeof(List_T));
+
+      if (floors_computed_p == false) {
+	floors = compute_floors(&any_omitted_p,&alloc_floors_p,floors_array,this,queryuc_ptr,querylength,query_lastpos,
+				plus_indexdb,minus_indexdb,indexdb_size_threshold,max_end_insertions,
+				/*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+	floors_computed_p = true;
+      }
+
+      debug(printf("Starting find_spliceends_distant_dna_plus\n"));
+      find_spliceends_distant_dna_plus(&startfrags_plus,&endfrags_plus,plus_anchor_segments,n_plus_anchors,
+#ifdef DEBUG4E
+				       /*queryptr*/queryuc_ptr,
+#endif
+				       floors,querylength,query_lastpos,/*query_compress*/query_compress_fwd,
+				       max_splice_mismatches,genestrand);
+      debug(printf("Finished find_spliceends_distant_dna_plus\n"));
+
+      debug(printf("Starting find_spliceends_distant_dna_minus\n"));
+      find_spliceends_distant_dna_minus(&startfrags_minus,&endfrags_minus,minus_anchor_segments,n_minus_anchors,
+#ifdef DEBUG4E
+					/*queryptr*/queryrc,
+#endif
+					floors,querylength,query_lastpos,/*query_compress*/query_compress_rev,
+					max_splice_mismatches,genestrand);
+      debug(printf("Finished find_spliceends_distant_dna_minus\n"));
+
+      nmismatches = 0;
+      while (nmismatches <= done_level - distantsplicing_penalty /* && nsplicepairs < MAXCHIMERAPATHS */) {
+	debug(printf("*** Stage 9 (DNA).  Distant splicing, allowing %d mismatches ***\n",nmismatches));
+	longsinglesplicing = (List_T) NULL;
+	distantsplicing = (List_T) NULL;
+
+	debug4e(printf("Sorting splice ends\n"));
+	startfrags_plus[nmismatches] = Substring_sort_siteN_halves(startfrags_plus[nmismatches],/*ascendingp*/true);
+	endfrags_plus[nmismatches] = Substring_sort_siteN_halves(endfrags_plus[nmismatches],/*ascendingp*/true);
+
+	startfrags_minus[nmismatches] = Substring_sort_siteN_halves(startfrags_minus[nmismatches],/*ascendingp*/false);
+	endfrags_minus[nmismatches] = Substring_sort_siteN_halves(endfrags_minus[nmismatches],/*ascendingp*/false);
+
+	debug4e(printf("Splice ends at %d nmismatches: +startfrags/endfrags %d/%d, -startfrags/endfrags %d/%d\n",
+		       nmismatches,
+		       List_length(startfrags_plus[nmismatches]),List_length(endfrags_plus[nmismatches]),
+		       List_length(startfrags_minus[nmismatches]),List_length(endfrags_minus[nmismatches])));
+
+	distantsplicing = find_splicepairs_distant_dna(&found_score,&nsplicepairs,&longsinglesplicing,distantsplicing,
+						       startfrags_plus,endfrags_plus,startfrags_minus,endfrags_minus,
+						       localsplicing_penalty,distantsplicing_penalty,
+						       querylength,nmismatches,first_read_p);
+#if 0
+	/* Mark ambiguous splices only for single-end reads */
+	distantsplicing = Stage3end_mark_ambiguous_splices(&ambiguousp,distantsplicing);
+#endif
+
+	if (longsinglesplicing != NULL) {
+	  debug(printf("Entering Stage3end_optimal_score with %d longsinglesplicing hits\n",List_length(longsinglesplicing)));
+	  longsinglesplicing = Stage3end_optimal_score(longsinglesplicing,query_compress_fwd,query_compress_rev,querylength,
+						       /*keep_gmap_p*/true,/*finalp*/false);
+	  debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(longsinglesplicing)));
+	  hits = List_append(hits,longsinglesplicing);
+
+	  opt_level = (found_score < opt_level) ? found_score : opt_level;
+	  if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+	    done_level = user_maxlevel;
+	  }
+	}
+
+	if (distantsplicing != NULL) {
+	  /* Excess distant splicing should be freed already in find_splicepairs_distant_rna */
+	  debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
+	  distantsplicing = Stage3end_optimal_score(distantsplicing,query_compress_fwd,query_compress_rev,querylength,
+						    /*keep_gmap_p*/true,/*finalp*/false);
+	  debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(distantsplicing)));
+
+	  hits = List_append(hits,distantsplicing);
+
+	  opt_level = (found_score < opt_level) ? found_score : opt_level;
+	  if ((done_level = opt_level + subopt_levels) > user_maxlevel) {
+	    done_level = user_maxlevel;
+	  }
+	}
+	debug(printf("9 (DNA)> found_score = %d, opt_level %d, done_level %d\n",found_score,opt_level,done_level));
+
+	nmismatches++;
+      }
+
+      for (i = 0; i <= max_splice_mismatches; i++) {
+	substringlist_gc(&(startfrags_plus[i]));
+	substringlist_gc(&(endfrags_plus[i]));
+	substringlist_gc(&(startfrags_minus[i]));
+	substringlist_gc(&(endfrags_minus[i]));
+      }
+      FREEA(startfrags_plus);
+      FREEA(endfrags_plus);
+      FREEA(startfrags_minus);
+      FREEA(endfrags_minus);
     }
-
-    debug(printf("%d single splices, %d double splices, %d long single splices, %d distant splices\n",
-		 List_length(singlesplicing),List_length(doublesplicing),
-		 List_length(longsinglesplicing),List_length(distantsplicing)));
   }
 
 
-  hits = List_append(hits,
-		     List_append(longsinglesplicing,distantsplicing));
-
-  /* Search 8: Terminals */
+  /* Search 9: Terminals */
 
   /* Previously criterion for skipping find_terminals was (greedy ||
      subs || indels || singlesplicing || doublesplicing ||
@@ -18100,10 +19422,13 @@ align_end (int *cutoff_level, T this,
   } else if (found_score < trigger_score_for_gmap) {
     debug(printf("Test for stage 9: false because found_score %d < trigger_score_for_gmap %d\n",found_score,trigger_score_for_gmap));
     gmapp = false;
-  } else if (min_trim < min_distantsplicing_end_matches) {
-    gmapp = false;
-  } else if (distantsplicing != NULL) {
-    gmapp = false;
+  } else if (knownsplicingp || novelsplicingp || find_dna_chimeras_p) {
+    /* if-condition matches Search 7 above */
+    if (min_trim < min_distantsplicing_end_matches) {
+      gmapp = false;
+    } else if (distantsplicing != NULL) {
+      gmapp = false;
+    }
   }
 
 
@@ -18140,15 +19465,17 @@ align_end (int *cutoff_level, T this,
     /* 11.  GMAP terminal */
     
     /* This is done for paired-ends, but should not be necessary for single-end */
+    /* Need finalp to be true to limit align_singleend_with_gmap, which is an expensive operation */
     debug13(printf("Before remove overlaps at cutoff level %d: %d hits\n",opt_level,List_length(hits)));
-    hits = Stage3end_sort_bymatches(Stage3end_remove_overlaps(hits,/*finalp*/false));
+    hits = Stage3end_sort_bymatches(Stage3end_remove_overlaps(hits,/*finalp*/true));
     debug13(printf("After remove overlaps: %d\n",List_length(hits)));
 
     i = 0;
     debug13(printf("%d hits\n",List_length(hits)));
     debug13(printf("For each hit, running GMAP on single end to match with hit\n"));
 
-    for (q = hits; q != NULL && i < max_gmap_improvement; q = q->rest) {
+    for (q = hits; q != NULL && i < max_gmap_improvement; q = q->rest, i++) {
+      /* This is an expensive operation.  Need to limit by max_gmap_improvement */
       hit = (Stage3end_T) q->first;
       align_single_hit_with_gmap(&gmap1,&gmap2,hit,queryuc_ptr,querylength,
 #ifdef END_KNOWNSPLICING_SHORTCUT
@@ -18197,7 +19524,8 @@ align_end (int *cutoff_level, T this,
     hits = Stage3end_optimal_score(hits,query_compress_fwd,query_compress_rev,
 				   querylength,/*keep_gmap_p*/true,/*finalp*/false);
     /* Don't reject based on trimlength until after GMAP improvements */
-    hits = Stage3end_remove_overlaps(hits,/*finalp*/false);
+    /* Need finalp to be true to limit align_singleend_with_gmap, which is an expensive operation */
+    hits = Stage3end_remove_overlaps(hits,/*finalp*/true);
     hits = Stage3end_optimal_score(hits,query_compress_fwd,query_compress_rev,
 				   querylength,/*keep_gmap_p*/false,/*finalp*/false);
     hits = Stage3end_resolve_multimapping(hits);
@@ -18250,12 +19578,12 @@ single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *sec
 
 #ifdef HAVE_ALLOCA
   if (querylength <= MAX_STACK_READLENGTH) {
-    queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
+    queryrc = (char *) ALLOCA((querylength+1)*sizeof(char));
   } else {
-    queryrc = (char *) MALLOC((querylength+1)*sizeof(int));
+    queryrc = (char *) MALLOC((querylength+1)*sizeof(char));
   }
 #else
-  queryrc = (char *) MALLOC((querylength+1)*sizeof(int));
+  queryrc = (char *) MALLOC((querylength+1)*sizeof(char));
 #endif
 
   if (user_maxlevel_float < 0.0) {
@@ -18358,12 +19686,12 @@ single_read_tolerant_nonstranded (int *npaths_primary, int *npaths_altloc, int *
 
 #ifdef HAVE_ALLOCA
   if (querylength <= MAX_STACK_READLENGTH) {
-    queryrc = (char *) ALLOCA((querylength+1)*sizeof(int));
+    queryrc = (char *) ALLOCA((querylength+1)*sizeof(char));
   } else {
-    queryrc = (char *) MALLOC((querylength+1)*sizeof(int));
+    queryrc = (char *) MALLOC((querylength+1)*sizeof(char));
   }
 #else
-  queryrc = (char *) MALLOC((querylength+1)*sizeof(int));
+  queryrc = (char *) MALLOC((querylength+1)*sizeof(char));
 #endif
 
   if (user_maxlevel_float < 0.0) {
@@ -18506,20 +19834,209 @@ Stage1_single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, i
 /* #define HITARRAY_DISTANTSPLICING 4 */
 
 
+/* Picks a region nearby */
 static List_T
-align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3, 
-			     Shortread_T queryseq5, Shortread_T queryseq3,
-			     char *queryuc_ptr, int querylength, int query_lastpos,
+align_halfmapping_with_gmap_close (bool *successp, List_T hits, History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3, 
+				   Shortread_T queryseq5, Shortread_T queryseq3,
+				   char *queryuc_ptr, int querylength, int query_lastpos,
 #ifdef END_KNOWNSPLICING_SHORTCUT
-			     char *queryrc, bool invertedp,
+				   char *queryrc, bool invertedp,
 #endif
-			     struct Segment_T *plus_segments, int plus_nsegments,
-			     struct Segment_T *minus_segments, int minus_nsegments,
-			     Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
-			     Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
-			     Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
-			     Chrpos_T pairmax, Chrpos_T shortsplicedist, int genestrand) {
-  List_T hits = NULL;
+				   Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+				   Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+				   Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+				   Chrpos_T pairmax, Chrpos_T shortsplicedist, int genestrand) {
+  int sensedir, sense_try;
+
+  /* int zero_offset = 0; */
+  Univcoord_T genomicbound, mappingstart, mappingend,
+    chroffset, chrhigh;
+  Chrpos_T chrlength;
+  Chrnum_T chrnum;
+  bool good_start_p, good_end_p, watsonp, favor_right_p;
+
+  *successp = false;
+  debug(printf("Trying halfmapping close\n"));
+
+  if (hit3 == NULL) {
+    /* Both events are tested by Stage3end_anomalous_splice_p */
+    if ((chrnum = Stage3end_chrnum(hit5)) == 0) {
+      /* Translocation */
+      return (List_T) NULL;
+
+    } else if (Stage3end_hittype(hit5) == SAMECHR_SPLICE) {
+      /* A genomic event that doesn't get reflected in chrnum */
+      return (List_T) NULL;
+
+    } else if ((watsonp = Stage3end_plusp(hit5)) == true) {
+      chroffset = Stage3end_chroffset(hit5);
+      chrhigh = Stage3end_chrhigh(hit5);
+      chrlength = Stage3end_chrlength(hit5);
+
+      if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+	/* Go from genomicstart */
+	debug13(printf("Found primers\n"));
+	genomicbound = Stage3end_genomicstart(hit5);
+
+      } else if (Stage3end_anomalous_splice_p(hit5) == true) {
+	/* Go from genomicstart */
+	debug13(printf("Anomalous splice\n"));
+	genomicbound = Stage3end_genomicstart(hit5);
+
+      } else {
+	genomicbound = Stage3end_genomicend(hit5);
+      }
+
+      debug13(printf("Case 1: hit5 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
+		     Stage3end_hittype_string(hit5),
+		     Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
+		     Stage3end_sensedir(hit5),genomicbound - chroffset));
+
+      mappingstart = genomicbound;
+      mappingend = add_bounded(Stage3end_genomicend(hit5),expected_pairlength + pairlength_deviation + querylength,chrhigh);
+      favor_right_p = false;
+
+    } else {
+      chroffset = Stage3end_chroffset(hit5);
+      chrhigh = Stage3end_chrhigh(hit5);
+      chrlength = Stage3end_chrlength(hit5);
+
+      if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+	/* Go from genomicstart */
+	debug13(printf("Found primers\n"));
+	genomicbound = Stage3end_genomicstart(hit5);
+
+      } else if (Stage3end_anomalous_splice_p(hit5) == true) {
+	/* Go from genomicstart */
+	debug13(printf("Anomalous splice\n"));
+	genomicbound = Stage3end_genomicstart(hit5);
+
+      } else {
+	genomicbound = Stage3end_genomicend(hit5);
+      }
+
+      debug13(printf("Case 2: hit5 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
+		     Stage3end_hittype_string(hit5),
+		     Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset,
+		     Stage3end_sensedir(hit5),genomicbound - chroffset));
+
+      mappingend = genomicbound;
+      mappingstart = subtract_bounded(Stage3end_genomicend(hit5),expected_pairlength + pairlength_deviation + querylength,chroffset);
+      favor_right_p = false;
+    }
+
+    if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) {
+      sense_try = +1;
+    } else if (sensedir == SENSE_ANTI) {
+      sense_try = -1;
+    } else {
+      sense_try = 0;
+    }
+
+  } else if (hit5 == NULL) {
+    /* Both events are tested by Stage3end_anomalous_splice_p */
+    if ((chrnum = Stage3end_chrnum(hit3)) == 0) {
+      /* Translocation */
+      return (List_T) NULL;
+
+    } else if (Stage3end_hittype(hit3) == SAMECHR_SPLICE) {
+      /* A genomic event that doesn't get reflected in chrnum */
+      return (List_T) NULL;
+
+    } else if ((watsonp = Stage3end_plusp(hit3)) == true) {
+      chroffset = Stage3end_chroffset(hit3);
+      chrhigh = Stage3end_chrhigh(hit3);
+      chrlength = Stage3end_chrlength(hit3);
+
+      if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+	/* Go from genomicend */
+	debug13(printf("Found primers\n"));
+	genomicbound = Stage3end_genomicend(hit3);
+
+      } else if (Stage3end_anomalous_splice_p(hit3) == true) {
+	/* Go from genomicend */
+	debug13(printf("Anomalous splice\n"));
+	genomicbound = Stage3end_genomicend(hit3);
+
+      } else {
+	genomicbound = Stage3end_genomicstart(hit3);
+      }
+
+      debug13(printf("Case 3: hit3 plus %s %u..%u (sensedir %d) => genomicbound %u\n",
+		     Stage3end_hittype_string(hit3),
+		     Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
+		     Stage3end_sensedir(hit3),genomicbound - chroffset));
+
+      mappingend = genomicbound;
+      mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),expected_pairlength + pairlength_deviation + querylength,chroffset);
+      favor_right_p = true;
+
+    } else {
+      chroffset = Stage3end_chroffset(hit3);
+      chrhigh = Stage3end_chrhigh(hit3);
+      chrlength = Stage3end_chrlength(hit3);
+
+      if (Shortread_find_primers(queryseq5,queryseq3) == true) {
+	/* Go from genomicend */
+	debug13(printf("Found primers\n"));
+	genomicbound = Stage3end_genomicend(hit3);
+
+      } else if (Stage3end_anomalous_splice_p(hit3) == true) {
+	/* Go from genomicend */
+	debug13(printf("Anomalous splice\n"));
+	genomicbound = Stage3end_genomicend(hit3);
+
+      } else {
+	genomicbound = Stage3end_genomicstart(hit3);
+      }
+
+      debug13(printf("Case 4: hit3 minus %s %u..%u (sensedir %d) => genomicbound %u\n",
+		     Stage3end_hittype_string(hit3),
+		     Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset,
+		     Stage3end_sensedir(hit3),genomicbound - chroffset));
+
+      mappingstart = genomicbound;
+      mappingend = add_bounded(Stage3end_genomicstart(hit3),expected_pairlength + pairlength_deviation + querylength,chrhigh);
+      favor_right_p = true;
+    }
+
+    if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) {
+      sense_try = +1;
+    } else if (sensedir == SENSE_ANTI) {
+      sense_try = -1;
+    } else {
+      sense_try = 0;
+    }
+
+  } else {
+    abort();
+  }
+
+  debug13(printf("Halfmapping close: Running gmap with mappingstart %u and mappingend %u\n",mappingstart,mappingend));
+  return run_gmap_for_region(&(*successp),&good_start_p,&good_end_p,gmap_history,
+			     hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
+			     mappingstart,mappingend,
+			     /*knownsplice_limit_low*/mappingstart,/*knownsplice_limit_high*/mappingend,
+			     watsonp,genestrand,chrnum,chroffset,chrhigh,chrlength,
+			     oligoindices_major,oligoindices_minor,
+			     pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR);
+}
+
+
+/* Uses segments to find a region to search */
+static List_T
+align_halfmapping_with_gmap_far (List_T hits, History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3, 
+				 Shortread_T queryseq5, Shortread_T queryseq3,
+				 char *queryuc_ptr, int querylength, int query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+				 char *queryrc, bool invertedp,
+#endif
+				 struct Segment_T *plus_segments, int plus_nsegments,
+				 struct Segment_T *minus_segments, int minus_nsegments,
+				 Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+				 Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+				 Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+				 Chrpos_T pairmax, Chrpos_T shortsplicedist, int genestrand) {
   int sensedir, sense_try;
 
   /* int zero_offset = 0; */
@@ -18539,10 +20056,12 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
   bool close_mappingstart_p = false, close_mappingend_p = false;
   bool middle_mappingstart_p = false, middle_mappingend_p = false;
   bool fallback_mappingstart_p, fallback_mappingend_p;
-  bool good_start_p, good_end_p, watsonp, favor_right_p;
+  bool successp, good_start_p, good_end_p, watsonp, favor_right_p;
 
   int starti, endi, i;
 
+  debug(printf("Trying halfmapping far\n"));
+
   if (hit3 == NULL) {
     /* Both events are tested by Stage3end_anomalous_splice_p */
     if ((chrnum = Stage3end_chrnum(hit5)) == 0) {
@@ -18590,7 +20109,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
 		     Stage3end_sensedir(hit5),genomicbound - chroffset));
 
       knownsplice_limit_low = mappingstart = segmentstart = genomicbound;
-      knownsplice_limit_high =  add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chrhigh);
+      knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chrhigh);
       segmentend = add_bounded(Stage3end_genomicend(hit5),pairmax + PAIRMAX_ADDITIONAL,chrhigh);
 #ifdef LONG_ENDSPLICES
       mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chrhigh);
@@ -19270,7 +20789,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
 
   if (close_mappingstart_p == true && close_mappingend_p == true) {
     debug13(printf("Halfmapping: Running gmap with close mappingstart and close mappingend\n"));
-    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 			       hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 			       close_mappingstart_last,close_mappingend_last,
 			       close_knownsplice_limit_low,close_knownsplice_limit_high,
@@ -19285,7 +20804,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
     } else if (/* require both ends to be good */ 0 && good_start_p == true) {
       if (fallback_mappingend_p == true) {
 	debug13(printf("Halfmapping: Re-running gmap with close mappingstart only\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 				   hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 				   close_mappingstart_last,mappingend,
 				   close_knownsplice_limit_low,knownsplice_limit_high,
@@ -19297,7 +20816,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
     } else if (/* require both ends to be good */ 0 && good_end_p == true) {
       if (fallback_mappingstart_p == true) {
 	debug13(printf("Halfmapping: Re-running gmap with close mappingend only\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 				   hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 				   mappingstart,close_mappingend_last,
 				   knownsplice_limit_low,close_knownsplice_limit_high,
@@ -19308,7 +20827,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
     } else {
       if (fallback_mappingstart_p == true && fallback_mappingend_p == true) {
 	debug13(printf("Halfmapping: Re-running gmap with far mappingstart and mappingend\n"));
-	hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+	hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 				   hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 				   mappingstart,mappingend,
 				   knownsplice_limit_low,knownsplice_limit_high,
@@ -19320,7 +20839,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
 
   } else if (close_mappingstart_p == true) {
     debug13(printf("Halfmapping: Running gmap with close mappingstart\n"));
-    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 			       hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 			       close_mappingstart_last,mappingend,
 			       close_knownsplice_limit_low,knownsplice_limit_high,
@@ -19334,7 +20853,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
       debug13(printf("Skipping re-run of gmap\n"));
     } else if (fallback_mappingstart_p == true) {
       debug13(printf("Halfmapping: Re-running gmap with far mappingstart\n"));
-      hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+      hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 				 hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 				 mappingstart,mappingend,
 				 knownsplice_limit_low,knownsplice_limit_high,
@@ -19345,7 +20864,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
 
   } else if (close_mappingend_p == true) {
     debug13(printf("Halfmapping: Running gmap with close mappingend\n"));
-    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 			       hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 			       mappingstart,close_mappingend_last,
 			       knownsplice_limit_low,close_knownsplice_limit_high,
@@ -19359,7 +20878,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
       debug13(printf("Skipping re-run of gmap\n"));
     } else if (fallback_mappingend_p == true) {
       debug13(printf("Halfmapping: Re-running gmap with far mappingend\n"));
-      hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+      hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 				 hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 				 mappingstart,mappingend,
 				 knownsplice_limit_low,knownsplice_limit_high,
@@ -19370,7 +20889,7 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
 
   } else {
     debug13(printf("Halfmapping: Running gmap with far mappingstart and mappingend\n"));
-    hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,
+    hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,
 			       hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p,
 			       mappingstart,mappingend,
 			       knownsplice_limit_low,knownsplice_limit_high,
@@ -19384,6 +20903,43 @@ align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end
 
 
 static List_T
+align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3, 
+			     Shortread_T queryseq5, Shortread_T queryseq3,
+			     char *queryuc_ptr, int querylength, int query_lastpos,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+			     char *queryrc, bool invertedp,
+#endif
+			     struct Segment_T *plus_segments, int plus_nsegments,
+			     struct Segment_T *minus_segments, int minus_nsegments,
+			     Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor,
+			     Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool,
+			     Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+			     Chrpos_T pairmax, Chrpos_T shortsplicedist, int genestrand) {
+  List_T hits = NULL;
+  bool successp;
+
+  debug(printf("Trying halfmapping close\n"));
+  hits = align_halfmapping_with_gmap_close(&successp,hits,gmap_history,hit5,hit3, 
+					   queryseq5,queryseq3,queryuc_ptr,querylength,query_lastpos,
+					   oligoindices_major,oligoindices_minor,
+					   pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+					   pairmax,shortsplicedist,genestrand);
+  if (successp == true) {
+    debug(printf("Trying halfmapping close succeeded\n"));
+    return hits;
+  } else {
+    debug(printf("Trying halfmapping close failed.  Trying halfmapping far\n"));
+    return align_halfmapping_with_gmap_far(hits,gmap_history,hit5,hit3, 
+					   queryseq5,queryseq3,queryuc_ptr,querylength,query_lastpos,
+					   plus_segments,plus_nsegments,minus_segments,minus_nsegments,
+					   oligoindices_major,oligoindices_minor,
+					   pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,
+					   pairmax,shortsplicedist,genestrand);
+  }
+}
+
+
+static List_T
 align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
 		      char *queryuc_ptr_5, char *queryuc_ptr_3,
 		      int querylength5, int querylength3,
@@ -19520,6 +21076,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
 
 	  } else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
 	    debug13(printf("  => not concordant, so eliminating\n"));
+	    Stage3pair_free(&newpair);
 
 	  } else if (replacedp == false) {
 	    /* Convert to gmap-gmap */
@@ -19570,6 +21127,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
 
 	} else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
 	  debug13(printf("  => not concordant, so eliminating\n"));
+	  Stage3pair_free(&newpair);
 
 	} else if (replacedp == false) {
 	  /* Convert to gmap-xx */
@@ -19607,6 +21165,7 @@ align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result,
 
 	} else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
 	  debug13(printf("  => not concordant, so eliminating\n"));
+	  Stage3pair_free(&newpair);
 
 	} else if (replacedp == false) {
 	  /* Convert to xx-gmap */
@@ -19772,10 +21331,10 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
     mismatch_level_5, mismatch_level_3, nmismatches;
   int max_splice_mismatches_5 = -1, max_splice_mismatches_3 = -1, i;
   int nhits5 = 0, nhits3 = 0, nsplicepairs5 = 0, nsplicepairs3 = 0;
-  List_T *donors_plus_5, *antidonors_plus_5, *acceptors_plus_5, *antiacceptors_plus_5,
-    *donors_minus_5, *antidonors_minus_5, *acceptors_minus_5, *antiacceptors_minus_5;
-  List_T *donors_plus_3, *antidonors_plus_3, *acceptors_plus_3, *antiacceptors_plus_3,
-    *donors_minus_3, *antidonors_minus_3, *acceptors_minus_3, *antiacceptors_minus_3;
+  List_T *donors_plus, *antidonors_plus, *acceptors_plus, *antiacceptors_plus,
+    *donors_minus, *antidonors_minus, *acceptors_minus, *antiacceptors_minus;
+  List_T *startfrags_plus_5, *endfrags_plus_5, *startfrags_minus_5, *endfrags_minus_5,
+    *startfrags_plus_3, *endfrags_plus_3, *startfrags_minus_3, *endfrags_minus_3;
 
   bool spanningset5p, spanningset3p, completeset5p, completeset3p, gmap5p, gmap3p;
   bool did_alignment_p, did_singlesplicing5_p, did_singlesplicing3_p;
@@ -20012,6 +21571,20 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 	  debug(printf("spanningset5p %d, spanningset3p %d\n",spanningset5p,spanningset3p));
 	}
       }
+
+    } else {
+      if (nhits5 > nconcordant) {
+	/* Missing hits on 3' end */
+	debug(printf("nhits5 %d > nconcordant %d, so running spanningset on 3' end\n",nhits5,nconcordant));
+        spanningset3p = true;
+      }
+
+      if (nhits3 > nconcordant) {
+	/* Missing hits on 5' end */
+	debug(printf("nhits3 %d > nconcordant %d, so running spanningset on 5' end\n",nhits3,nconcordant));
+        spanningset5p = true;
+      }
+
 #if 0
     } else if (*found_score >= done_level_5 + done_level_3) {
       /* Can eliminate this, because nconcordant no longer includes terminals, which are our main concern */
@@ -20086,6 +21659,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					 hitarray3,/*narray3*/HITARRAY_SUBS+1,
 					 *cutoff_level_5,*cutoff_level_3,
 					 querylength5,querylength3,maxpairedpaths,genestrand);
+
     debug(printf("After pairing exact, found %d concordant, %d samechr, %d terminals, found_score %d\n",
 		 nconcordant,nsamechr,List_length(*terminals),*found_score));
     if (*abort_pairing_p == true) {
@@ -20135,6 +21709,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					   hitarray3,/*narray3*/HITARRAY_SUBS+1,
 					   *cutoff_level_5,*cutoff_level_3,
 					   querylength5,querylength3,maxpairedpaths,genestrand);
+
       debug(printf("After pairing one mismatch, found %d concordant, %d samechr, %d terminals, found_score %d\n",
 		   nconcordant,nsamechr,List_length(*terminals),*found_score));
       if (*abort_pairing_p == true) {
@@ -20242,12 +21817,58 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 	if (Stage3end_score(Stage3pair_hit5(newpair)) > done_level_5) {
 	  completeset5p = true;
 	}
-	if (Stage3end_score(Stage3pair_hit3(newpair)) > done_level_3) {
-	  completeset3p = true;
-	}
-	debug(printf("completeset5p %d, completeset3p %d\n",completeset5p,completeset3p));
+	if (Stage3end_score(Stage3pair_hit3(newpair)) > done_level_3) {
+	  completeset3p = true;
+	}
+	debug(printf("completeset5p %d, completeset3p %d\n",completeset5p,completeset3p));
+      }
+    }
+
+  } else {
+    debug(printf("Looking at greedy5\n"));
+    for (p = greedy5; p != NULL; p = List_next(p)) {
+      hit5 = (Stage3end_T) List_head(p);
+      if (Stage3end_paired_usedp(hit5) == false) {
+	debug(printf("Saw an unpaired hit5, so setting complete3p to be true\n"));
+	completeset3p = true;
+      }
+    }
+#if 1
+    /* This check is necessary to get accurate results */
+    if (completeset3p == false) {
+      debug(printf("Looking at subs5\n"));
+      for (p = subs5; p != NULL; p = List_next(p)) {
+	hit5 = (Stage3end_T) List_head(p);
+	if (Stage3end_paired_usedp(hit5) == false) {
+	  debug(printf("Saw an unpaired hit5, so setting complete3p to be true\n"));
+	  completeset3p = true;
+	}
+      }
+    }
+#endif
+
+    debug(printf("Looking at greedy3\n"));
+    for (p = greedy3; p != NULL; p = List_next(p)) {
+      hit3 = (Stage3end_T) List_head(p);
+      if (Stage3end_paired_usedp(hit3) == false) {
+	debug(printf("Saw an unpaired hit3, so setting complete5p to be true\n"));
+	completeset5p = true;
+      }
+    }
+#if 1
+    /* This check is necessary to get accurate results */
+    if (completeset5p == false) {
+      debug(printf("Looking at subs3\n"));
+      for (p = subs3; p != NULL; p = List_next(p)) {
+	hit3 = (Stage3end_T) List_head(p);
+	if (Stage3end_paired_usedp(hit3) == false) {
+	  debug(printf("Saw an unpaired hit3, so setting complete5p to be true\n"));
+	  completeset5p = true;
+	}
       }
-    }
+    }      
+#endif
+
 
 #if 0
   } else if (*found_score <= done_level_5 + done_level_3) {
@@ -20356,16 +21977,30 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					 hitarray3,/*narray3*/HITARRAY_INDELS+1,
 					 *cutoff_level_5,*cutoff_level_3,
 					 querylength5,querylength3,maxpairedpaths,genestrand);
+
     debug(printf("After pairing complete set mismatches and indels, found %d concordant, %d nsamechr, %d terminals, found_score %d\n",
 		 nconcordant,nsamechr,List_length(*terminals),*found_score));
     if (*abort_pairing_p == true) {
+      if (alloc_floors_p_5 == true) {
+	Floors_free(&floors5);
+      }
+      if (alloc_floors_p_3 == true) {
+	Floors_free(&floors3);
+      }
       *hits5 = List_append(greedy5,List_append(subs5,indels5));
       *hits3 = List_append(greedy3,List_append(subs3,indels3));
 #if 0
       hitpairs = Stage3pair_remove_circular_alias(hitpairs);
 #endif
       hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
+
+      FREE(plus_anchor_segments_5);
+      FREE(minus_anchor_segments_5);
+      FREE(plus_anchor_segments_3);
+      FREE(minus_anchor_segments_3);
+
       return hitpairs;
+
     } else {
       opt_level = (*found_score < opt_level) ? *found_score : opt_level;
       if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
@@ -20377,11 +22012,14 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
       debug(printf("4/5> found_score = %d, opt_level %d, done_level %d,%d\n",*found_score,opt_level,done_level_5,done_level_3));
     }
 
+#if 0
+    /* SPEED */
     if (*found_score <= done_level_5 + done_level_3) {
       debug(printf("Test for completeset: false because *found_score %d <done_level_5 %d + done_level_3 %d\n",
 		   *found_score,done_level_5,done_level_3));
       completeset5p = completeset3p = false;
     }
+#endif
   }
 
 
@@ -20391,8 +22029,10 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
   /* Subtract 1 from done_level for previous hits */
   did_singlesplicing5_p = false;
   /* SPEED: For more hits, turn off first branch */
-  if (use_sarray_p == true && completeset5p == false) {
+  /* For more accuracy, turned off test for completeset above, and checking greedy5 and greedy3 for paired_usedp == false */
+  if (require_completeset_p == false && use_sarray_p == true && completeset5p == false) {
     /* Skip.  Suffix array already found something */
+    /* Note: Turning this branch off results in a 3x slowdown */
     debug(printf("Skipping complete set on 5', because sarray found a hitpair\n"));
 
   } else if (knownsplicingp || novelsplicingp) {
@@ -20446,8 +22086,10 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 
   did_singlesplicing3_p = false;
   /* SPEED: For more hits, turn off first branch */
-  if (use_sarray_p == true && completeset3p == false) {
+  /* For more accuracy, turned off test for completeset above, and checking greedy5 and greedy3 for paired_usedp == false */
+  if (require_completeset_p == false && use_sarray_p == true && completeset3p == false) {
     /* Skip.  Suffix array already found something */
+    /* Note: Turning this branch off results in a 3x slowdown */
     debug(printf("Skipping complete set on 3', because sarray found a hitpair\n"));
 
   } else if (knownsplicingp || novelsplicingp) {
@@ -20510,6 +22152,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					 hitarray3,/*narray3*/HITARRAY_SINGLESPLICING+1,
 					 *cutoff_level_5,*cutoff_level_3,
 					 querylength5,querylength3,maxpairedpaths,genestrand);
+
     debug(printf("After pairing single splicing, found %d concordant, %d nsamechr, %d terminals, found_score %d\n",
 		 nconcordant,nsamechr,List_length(*terminals),*found_score));
     if (*abort_pairing_p == true) {
@@ -20525,6 +22168,12 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
       hitpairs = Stage3pair_remove_circular_alias(hitpairs);
 #endif
       hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
+
+      FREE(plus_anchor_segments_5);
+      FREE(minus_anchor_segments_5);
+      FREE(plus_anchor_segments_3);
+      FREE(minus_anchor_segments_3);
+
       return hitpairs;
 
     } else {
@@ -20609,6 +22258,12 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 #if 0
       hitpairs = Stage3pair_remove_overlaps(hitpairs,/*translocp*/false,/*finalp*/true);
 #endif
+
+      FREE(plus_anchor_segments_5);
+      FREE(minus_anchor_segments_5);
+      FREE(plus_anchor_segments_3);
+      FREE(minus_anchor_segments_3);
+
       return hitpairs;
 
     } else {
@@ -20642,8 +22297,8 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
     debug13(printf("abort_pairing_p is true, so setting gmap5p and gmap3p false\n"));
     gmap5p = gmap3p = false;
   } else if (nconcordant > 0) {
-    /* Rely upon GMAP improvement instead */
-    debug13(printf("nconcordant > 0, so setting gmap5p and gmap3p false\n"));
+    /* Rely upon GMAP pairsearch/halfmapping in stage 9 instead */
+    debug13(printf("nconcordant > 0, so setting gmap5p and gmap3p false at this point\n"));
     gmap5p = gmap3p = false;
   } else if (*found_score < trigger_score_for_gmap) {
     debug13(printf("found_score %d < trigger_score_for_gmap %d, so setting gmap5p and gmap3p false\n",
@@ -20762,6 +22417,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					 /*hitarray3*/&(*hits3),/*narray3*/1,
 					 *cutoff_level_5,*cutoff_level_3,
 					 querylength5,querylength3,maxpairedpaths,genestrand);
+
     debug(printf("11> After pairing GMAP, found %d concordant, %d samechr, %d terminals, found_score %d\n",
 		 nconcordant,nsamechr,List_length(*terminals),*found_score));
     if (*abort_pairing_p == false) {
@@ -20778,7 +22434,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
   }
 
 
-  /* Search 7: Distant splicing */
+  /* Search 7: Distant RNA splicing */
   min_trim = querylength5 + querylength3;
   for (p = hitpairs; p != NULL; p = List_next(p)) {
     newpair = (Stage3pair_T) List_head(p);
@@ -20787,32 +22443,33 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
     }
   }
 
-  debug(printf("? distant splicing.  nconcordant is %d.  min_trim is %d.  done_level is %d + %d\n",
+  debug(printf("? distant RNA splicing.  nconcordant is %d.  min_trim is %d.  done_level is %d + %d\n",
 	       nconcordant,min_trim,done_level_5,done_level_3));
   if (nconcordant > 0 && min_trim < min_distantsplicing_end_matches) {
     /* Skip search for distant splicing */
+    debug(printf("Skipping search for distant RNA splicing, because nconcordant > 0 && min_trim < min_distantsplicing_end_matches\n"));
 
   } else if (*abort_pairing_p == true) {
     /* Skip further searching */
+    debug(printf("Skipping search for distant RNA splicing, because abort pairing is true\n"));
 
   } else if (knownsplicingp == false && novelsplicingp == false) {
-    /* TODO: Find distant splicing for DNA */
+    debug(printf("Skipping search for distant RNA splicing, because knownsplicing and novelsplicing are false\n"));
 
   } else {
-    /* Find distant splicing for RNA */
-    if (done_level_5 >= distantsplicing_penalty) {
+    if ((max_splice_mismatches_5 = done_level_5 - distantsplicing_penalty) >= 0) {
+      /* Find distant splicing for RNA */
       /* Want >= and not >, because otherwise distant splicing does not work on 50-bp reads */
       /* Want > and not >=, because distant splicing needs to be better than other alternatives */
-      max_splice_mismatches_5 = done_level_5 - distantsplicing_penalty;
-
-      donors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
-      antidonors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
-      acceptors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
-      antiacceptors_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
-      donors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
-      antidonors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
-      acceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
-      antiacceptors_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+      
+      donors_plus = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+      antidonors_plus = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+      acceptors_plus = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+      antiacceptors_plus = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+      donors_minus = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+      antidonors_minus = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+      acceptors_minus = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+      antiacceptors_minus = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
 
       if (floors5_computed_p == false) {
 	floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,queryuc_ptr_5,
@@ -20822,9 +22479,9 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 	floors5_computed_p = true;
       }
 
-      /* 11A.  Distant splicing */
+      /* 11A.  Distant RNA splicing */
       debug(printf("Starting find_spliceends (plus) on 5' end with %d anchor segments\n",n_plus_anchors_5));
-      find_spliceends_distant_rna(&donors_plus_5,&antidonors_plus_5,&acceptors_plus_5,&antiacceptors_plus_5,
+      find_spliceends_distant_rna(&donors_plus,&antidonors_plus,&acceptors_plus,&antiacceptors_plus,
 				  plus_anchor_segments_5,n_plus_anchors_5,
 #ifdef DEBUG4E
 				  /*queryptr*/queryuc_ptr_5,
@@ -20834,7 +22491,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
       debug(printf("Finished find_spliceends (plus)\n"));
 
       debug(printf("Starting find_spliceends (minus) on 5' end with %d anchor segments\n",n_minus_anchors_5));
-      find_spliceends_distant_rna(&antidonors_minus_5,&donors_minus_5,&antiacceptors_minus_5,&acceptors_minus_5,
+      find_spliceends_distant_rna(&antidonors_minus,&donors_minus,&antiacceptors_minus,&acceptors_minus,
 				  minus_anchor_segments_5,n_minus_anchors_5,
 #ifdef DEBUG4E
 				  /*queryptr*/queryrc5,
@@ -20844,7 +22501,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
       debug(printf("Finished find_spliceends (minus)\n"));
 
 
-      /* 11A.  Distant splicing */
+      /* 11A.  Distant RNA splicing */
       nmismatches = 0;
       while (longsinglesplicing5 == NULL &&
 	     nmismatches <= max_splice_mismatches_5 /* && nsplicepairs5 < MAXCHIMERAPATHS */) {
@@ -20852,29 +22509,29 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 		     nmismatches,max_splice_mismatches_5));
 	
 	debug4e(printf("Sorting splice ends\n"));
-	donors_plus_5[nmismatches] = Substring_sort_siteD_halves(donors_plus_5[nmismatches],/*ascendingp*/true);
-	acceptors_plus_5[nmismatches] = Substring_sort_siteA_halves(acceptors_plus_5[nmismatches],/*ascendingp*/true);
+	donors_plus[nmismatches] = Substring_sort_siteD_halves(donors_plus[nmismatches],/*ascendingp*/true);
+	acceptors_plus[nmismatches] = Substring_sort_siteA_halves(acceptors_plus[nmismatches],/*ascendingp*/true);
 
-	antidonors_plus_5[nmismatches] = Substring_sort_siteD_halves(antidonors_plus_5[nmismatches],/*ascendingp*/false);
-	antiacceptors_plus_5[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus_5[nmismatches],/*ascendingp*/false);
+	antidonors_plus[nmismatches] = Substring_sort_siteD_halves(antidonors_plus[nmismatches],/*ascendingp*/false);
+	antiacceptors_plus[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus[nmismatches],/*ascendingp*/false);
 
-	donors_minus_5[nmismatches] = Substring_sort_siteD_halves(donors_minus_5[nmismatches],/*ascendingp*/false);
-	acceptors_minus_5[nmismatches] = Substring_sort_siteA_halves(acceptors_minus_5[nmismatches],/*ascendingp*/false);
+	donors_minus[nmismatches] = Substring_sort_siteD_halves(donors_minus[nmismatches],/*ascendingp*/false);
+	acceptors_minus[nmismatches] = Substring_sort_siteA_halves(acceptors_minus[nmismatches],/*ascendingp*/false);
 
-	antidonors_minus_5[nmismatches] = Substring_sort_siteD_halves(antidonors_minus_5[nmismatches],/*ascendingp*/true);
-	antiacceptors_minus_5[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus_5[nmismatches],/*ascendingp*/true);
+	antidonors_minus[nmismatches] = Substring_sort_siteD_halves(antidonors_minus[nmismatches],/*ascendingp*/true);
+	antiacceptors_minus[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus[nmismatches],/*ascendingp*/true);
 
 	debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n",
 		       nmismatches,
-		       List_length(donors_plus_5[nmismatches]),List_length(acceptors_plus_5[nmismatches]),
-		       List_length(antidonors_plus_5[nmismatches]),List_length(antiacceptors_plus_5[nmismatches]),
-		       List_length(donors_minus_5[nmismatches]),List_length(acceptors_minus_5[nmismatches]),
-		       List_length(antidonors_minus_5[nmismatches]),List_length(antiacceptors_minus_5[nmismatches])));
+		       List_length(donors_plus[nmismatches]),List_length(acceptors_plus[nmismatches]),
+		       List_length(antidonors_plus[nmismatches]),List_length(antiacceptors_plus[nmismatches]),
+		       List_length(donors_minus[nmismatches]),List_length(acceptors_minus[nmismatches]),
+		       List_length(antidonors_minus[nmismatches]),List_length(antiacceptors_minus[nmismatches])));
 
 	ignore_found_score = *found_score;
 	distantsplicing5 = find_splicepairs_distant_rna(&ignore_found_score,&nsplicepairs5,&longsinglesplicing5,/*hits*/distantsplicing5,
-							donors_plus_5,antidonors_plus_5,acceptors_plus_5,antiacceptors_plus_5,
-							donors_minus_5,antidonors_minus_5,acceptors_minus_5,antiacceptors_minus_5,
+							donors_plus,antidonors_plus,acceptors_plus,antiacceptors_plus,
+							donors_minus,antidonors_minus,acceptors_minus,antiacceptors_minus,
 							localsplicing_penalty,distantsplicing_penalty,
 							querylength5,nmismatches,/*first_read_p*/true);
 	debug(printf("Found %d distant splices on 5' end\n",List_length(distantsplicing5)));
@@ -20883,38 +22540,37 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 
       /* Clean up 5 */
       for (i = 0; i <= max_splice_mismatches_5; i++) {
-	substringlist_gc(&(donors_plus_5[i]));
-	substringlist_gc(&(antidonors_plus_5[i]));
-	substringlist_gc(&(acceptors_plus_5[i]));
-	substringlist_gc(&(antiacceptors_plus_5[i]));
-	substringlist_gc(&(donors_minus_5[i]));
-	substringlist_gc(&(antidonors_minus_5[i]));
-	substringlist_gc(&(acceptors_minus_5[i]));
-	substringlist_gc(&(antiacceptors_minus_5[i]));
-      }
-      FREEA(donors_plus_5);
-      FREEA(antidonors_plus_5);
-      FREEA(acceptors_plus_5);
-      FREEA(antiacceptors_plus_5);
-      FREEA(donors_minus_5);
-      FREEA(antidonors_minus_5);
-      FREEA(acceptors_minus_5);
-      FREEA(antiacceptors_minus_5);
-    }
-
-    if (done_level_3 >= distantsplicing_penalty) {
+	substringlist_gc(&(donors_plus[i]));
+	substringlist_gc(&(antidonors_plus[i]));
+	substringlist_gc(&(acceptors_plus[i]));
+	substringlist_gc(&(antiacceptors_plus[i]));
+	substringlist_gc(&(donors_minus[i]));
+	substringlist_gc(&(antidonors_minus[i]));
+	substringlist_gc(&(acceptors_minus[i]));
+	substringlist_gc(&(antiacceptors_minus[i]));
+      }
+      FREEA(donors_plus);
+      FREEA(antidonors_plus);
+      FREEA(acceptors_plus);
+      FREEA(antiacceptors_plus);
+      FREEA(donors_minus);
+      FREEA(antidonors_minus);
+      FREEA(acceptors_minus);
+      FREEA(antiacceptors_minus);
+    }
+
+    if ((max_splice_mismatches_3 = done_level_3 - distantsplicing_penalty) >= 0) {
       /* Want >= and not >, because otherwise distant splicing does not work on 50-bp reads */
       /* Want > and not >=, because distant splicing needs to be better than other alternatives */
-      max_splice_mismatches_3 = done_level_3 - distantsplicing_penalty;
 
-      donors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
-      antidonors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
-      acceptors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
-      antiacceptors_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
-      donors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
-      antidonors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
-      acceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
-      antiacceptors_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+      donors_plus = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+      antidonors_plus = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+      acceptors_plus = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+      antiacceptors_plus = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+      donors_minus = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+      antidonors_minus = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+      acceptors_minus = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+      antiacceptors_minus = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
 
       if (floors3_computed_p == false) {
 	floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,queryuc_ptr_3,
@@ -20926,7 +22582,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 
       /* 11B.  Distant splicing */
       debug(printf("Starting find_spliceends (plus) on 3' end with %d anchor segments\n",n_plus_anchors_3));
-      find_spliceends_distant_rna(&donors_plus_3,&antidonors_plus_3,&acceptors_plus_3,&antiacceptors_plus_3,
+      find_spliceends_distant_rna(&donors_plus,&antidonors_plus,&acceptors_plus,&antiacceptors_plus,
 				  plus_anchor_segments_3,n_plus_anchors_3,
 #ifdef DEBUG4E
 				  /*queryptr*/queryuc_ptr_3,
@@ -20936,7 +22592,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
       debug(printf("Finished find_spliceends (plus)\n"));
 
       debug(printf("Starting find_spliceends (minus) on 3' end with %d anchor segments\n",n_minus_anchors_3));
-      find_spliceends_distant_rna(&antidonors_minus_3,&donors_minus_3,&antiacceptors_minus_3,&acceptors_minus_3,
+      find_spliceends_distant_rna(&antidonors_minus,&donors_minus,&antiacceptors_minus,&acceptors_minus,
 				  minus_anchor_segments_3,n_minus_anchors_3,
 #ifdef DEBUG4E
 				  /*queryptr*/queryrc3,
@@ -20953,29 +22609,29 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 		     nmismatches,max_splice_mismatches_3));
 
 	debug4e(printf("Sorting splice ends\n"));
-	donors_plus_3[nmismatches] = Substring_sort_siteD_halves(donors_plus_3[nmismatches],/*ascendingp*/true);
-	acceptors_plus_3[nmismatches] = Substring_sort_siteA_halves(acceptors_plus_3[nmismatches],/*ascendingp*/true);
+	donors_plus[nmismatches] = Substring_sort_siteD_halves(donors_plus[nmismatches],/*ascendingp*/true);
+	acceptors_plus[nmismatches] = Substring_sort_siteA_halves(acceptors_plus[nmismatches],/*ascendingp*/true);
 
-	antidonors_plus_3[nmismatches] = Substring_sort_siteD_halves(antidonors_plus_3[nmismatches],/*ascendingp*/false);
-	antiacceptors_plus_3[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus_3[nmismatches],/*ascendingp*/false);
+	antidonors_plus[nmismatches] = Substring_sort_siteD_halves(antidonors_plus[nmismatches],/*ascendingp*/false);
+	antiacceptors_plus[nmismatches] = Substring_sort_siteA_halves(antiacceptors_plus[nmismatches],/*ascendingp*/false);
 
-	donors_minus_3[nmismatches] = Substring_sort_siteD_halves(donors_minus_3[nmismatches],/*ascendingp*/false);
-	acceptors_minus_3[nmismatches] = Substring_sort_siteA_halves(acceptors_minus_3[nmismatches],/*ascendingp*/false);
+	donors_minus[nmismatches] = Substring_sort_siteD_halves(donors_minus[nmismatches],/*ascendingp*/false);
+	acceptors_minus[nmismatches] = Substring_sort_siteA_halves(acceptors_minus[nmismatches],/*ascendingp*/false);
 
-	antidonors_minus_3[nmismatches] = Substring_sort_siteD_halves(antidonors_minus_3[nmismatches],/*ascendingp*/true);
-	antiacceptors_minus_3[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus_3[nmismatches],/*ascendingp*/true);
+	antidonors_minus[nmismatches] = Substring_sort_siteD_halves(antidonors_minus[nmismatches],/*ascendingp*/true);
+	antiacceptors_minus[nmismatches] = Substring_sort_siteA_halves(antiacceptors_minus[nmismatches],/*ascendingp*/true);
 
 	debug4e(printf("Splice ends at %d nmismatches: +donors/acceptors %d/%d, +antidonors/antiacceptors %d/%d, -donors/acceptors %d/%d, -antidonors/antiacceptors %d/%d\n",
 		       nmismatches,
-		       List_length(donors_plus_3[nmismatches]),List_length(acceptors_plus_3[nmismatches]),
-		       List_length(antidonors_plus_3[nmismatches]),List_length(antiacceptors_plus_3[nmismatches]),
-		       List_length(donors_minus_3[nmismatches]),List_length(acceptors_minus_3[nmismatches]),
-		       List_length(antidonors_minus_3[nmismatches]),List_length(antiacceptors_minus_3[nmismatches])));
+		       List_length(donors_plus[nmismatches]),List_length(acceptors_plus[nmismatches]),
+		       List_length(antidonors_plus[nmismatches]),List_length(antiacceptors_plus[nmismatches]),
+		       List_length(donors_minus[nmismatches]),List_length(acceptors_minus[nmismatches]),
+		       List_length(antidonors_minus[nmismatches]),List_length(antiacceptors_minus[nmismatches])));
 
 	ignore_found_score = *found_score;
 	distantsplicing3 = find_splicepairs_distant_rna(&ignore_found_score,&nsplicepairs3,&longsinglesplicing3,/*hits*/distantsplicing3,
-							donors_plus_3,antidonors_plus_3,acceptors_plus_3,antiacceptors_plus_3,
-							donors_minus_3,antidonors_minus_3,acceptors_minus_3,antiacceptors_minus_3,
+							donors_plus,antidonors_plus,acceptors_plus,antiacceptors_plus,
+							donors_minus,antidonors_minus,acceptors_minus,antiacceptors_minus,
 							localsplicing_penalty,distantsplicing_penalty,
 							querylength3,nmismatches,/*first_read_p*/false);
 	debug(printf("Found %d distant splices on 5' end\n",List_length(distantsplicing3)));
@@ -20984,27 +22640,26 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 
       /* Clean up 3 */
       for (i = 0; i <= max_splice_mismatches_3; i++) {
-	substringlist_gc(&(donors_plus_3[i]));
-	substringlist_gc(&(antidonors_plus_3[i]));
-	substringlist_gc(&(acceptors_plus_3[i]));
-	substringlist_gc(&(antiacceptors_plus_3[i]));
-	substringlist_gc(&(donors_minus_3[i]));
-	substringlist_gc(&(antidonors_minus_3[i]));
-	substringlist_gc(&(acceptors_minus_3[i]));
-	substringlist_gc(&(antiacceptors_minus_3[i]));
-      }
-      FREEA(donors_plus_3);
-      FREEA(antidonors_plus_3);
-      FREEA(acceptors_plus_3);
-      FREEA(antiacceptors_plus_3);
-      FREEA(donors_minus_3);
-      FREEA(antidonors_minus_3);
-      FREEA(acceptors_minus_3);
-      FREEA(antiacceptors_minus_3);
+	substringlist_gc(&(donors_plus[i]));
+	substringlist_gc(&(antidonors_plus[i]));
+	substringlist_gc(&(acceptors_plus[i]));
+	substringlist_gc(&(antiacceptors_plus[i]));
+	substringlist_gc(&(donors_minus[i]));
+	substringlist_gc(&(antidonors_minus[i]));
+	substringlist_gc(&(acceptors_minus[i]));
+	substringlist_gc(&(antiacceptors_minus[i]));
+      }
+      FREEA(donors_plus);
+      FREEA(antidonors_plus);
+      FREEA(acceptors_plus);
+      FREEA(antiacceptors_plus);
+      FREEA(donors_minus);
+      FREEA(antidonors_minus);
+      FREEA(acceptors_minus);
+      FREEA(antiacceptors_minus);
     }
 
     /* 11.  Pairing after distant splicing using longsinglesplicing */
-
     if (longsinglesplicing5 != NULL || longsinglesplicing3 != NULL) {
 #if 0
       /* Note: cannot use hitarray after we have removed overlapping alignments.  Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
@@ -21025,9 +22680,10 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					   /*hitarray3*/&(*hits3),/*narray3*/1,
 					   *cutoff_level_5,*cutoff_level_3,
 					   querylength5,querylength3,maxpairedpaths,genestrand);
+	
       debug(printf("10> After pairing long single splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
 		   nconcordant,nsamechr,List_length(*terminals),*found_score));
-
+	
       if (*abort_pairing_p == false) {
 	opt_level = (*found_score < opt_level) ? *found_score : opt_level;
 	if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
@@ -21038,7 +22694,6 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 	}
 	debug(printf("10> found_score = %d, opt_level %d, done_level %d,%d\n",*found_score,opt_level,done_level_5,done_level_3));
       }
-
     }
 
     /* 11.  Pairing after distant splicing using distantsplicing */
@@ -21063,9 +22718,10 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					   /*hitarray3*/&(*hits3),/*narray3*/1,
 					   *cutoff_level_5,*cutoff_level_3,
 					   querylength5,querylength3,maxpairedpaths,genestrand);
+	
       debug(printf("11> After pairing distant splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
 		   nconcordant,nsamechr,List_length(*terminals),*found_score));
-
+	
       if (*abort_pairing_p == false) {
 	opt_level = (*found_score < opt_level) ? *found_score : opt_level;
 	if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
@@ -21079,7 +22735,296 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
     }
   }
 
-  /* Search 8: Terminals */
+
+  /* Search 8: Distant DNA splicing */
+  min_trim = querylength5 + querylength3;
+  for (p = hitpairs; p != NULL; p = List_next(p)) {
+    newpair = (Stage3pair_T) List_head(p);
+    if ((trim = Stage3pair_total_trim(newpair)) < min_trim) {
+      min_trim = trim;
+    }
+  }
+
+  debug(printf("? distant DNA splicing.  nconcordant is %d.  min_trim is %d.  done_level is %d + %d\n",
+	       nconcordant,min_trim,done_level_5,done_level_3));
+  if (nconcordant > 0 && min_trim < min_distantsplicing_end_matches) {
+    /* Skip search for distant splicing */
+    debug(printf("Skipping search for distant DNA splicing, because nconcordant > 0 && min_trim < min_distantsplicing_end_matches\n"));
+
+  } else if (*abort_pairing_p == true) {
+    /* Skip further searching */
+    debug(printf("Skipping search for distant DNA splicing, because abort pairing is true\n"));
+
+  } else if (find_dna_chimeras_p == false) {
+    debug(printf("Skipping search for distant DNA splicing, because find_dna_chimeras_p is false\n"));
+
+  } else {
+    max_splice_mismatches_5 = done_level_5 - distantsplicing_penalty;
+    max_splice_mismatches_3 = done_level_3 - distantsplicing_penalty;
+
+    if (max_splice_mismatches_5 >= 0 || max_splice_mismatches_3 >= 0) {
+      /* Find distant splicing for DNA */
+      if (distantsplicing5 != NULL) {
+	/* Skip: Already found distant splicing using RNA */
+	max_splice_mismatches_5 = -1;
+
+      } else if (max_splice_mismatches_5 >= 0) {
+	startfrags_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+	endfrags_plus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+	startfrags_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+	endfrags_minus_5 = (List_T *) CALLOCA(max_splice_mismatches_5+1,sizeof(List_T));
+
+	if (floors5_computed_p == false) {
+	  floors5 = compute_floors(&any_omitted_p_5,&alloc_floors_p_5,floors_array,this5,queryuc_ptr_5,querylength5,query5_lastpos,
+				   plus_indexdb_5,minus_indexdb_5,indexdb_size_threshold,max_end_insertions,
+				   /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+	  floors5_computed_p = true;
+	}
+
+	debug(printf("Starting find_spliceends_distant_dna_plus\n"));
+	find_spliceends_distant_dna_plus(&startfrags_plus_5,&endfrags_plus_5,plus_anchor_segments_5,n_plus_anchors_5,
+#ifdef DEBUG4E
+					 /*queryptr*/queryuc_ptr_5,
+#endif
+					 floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_fwd,
+					 max_splice_mismatches_5,genestrand);
+	debug(printf("Finished find_spliceends_distant_dna_plus\n"));
+
+	debug(printf("Starting find_spliceends_distant_dna_minus\n"));
+	find_spliceends_distant_dna_minus(&startfrags_minus_5,&endfrags_minus_5,minus_anchor_segments_5,n_minus_anchors_5,
+#ifdef DEBUG4E
+					  /*queryptr*/queryrc5,
+#endif
+					  floors5,querylength5,query5_lastpos,/*query_compress*/query5_compress_rev,
+					  max_splice_mismatches_5,genestrand);
+	debug(printf("Finished find_spliceends_distant_dna_minus\n"));
+      }
+
+      if (distantsplicing3 != NULL) {
+	/* Skip: Already found distant splicing using RNA */
+	max_splice_mismatches_3 = -1;
+
+      } else if (max_splice_mismatches_3 >= 0) {
+	startfrags_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+	endfrags_plus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+	startfrags_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+	endfrags_minus_3 = (List_T *) CALLOCA(max_splice_mismatches_3+1,sizeof(List_T));
+
+	if (floors3_computed_p == false) {
+	  floors3 = compute_floors(&any_omitted_p_3,&alloc_floors_p_3,floors_array,this3,queryuc_ptr_3,querylength3,query3_lastpos,
+				   plus_indexdb_3,minus_indexdb_3,indexdb_size_threshold,max_end_insertions,
+				   /*omit_frequent_p*/true,/*omit_repetitive_p*/true,keep_floors_p);
+	  floors3_computed_p = true;
+	}
+
+	debug(printf("Starting find_spliceends_distant_dna_plus\n"));
+	find_spliceends_distant_dna_plus(&startfrags_plus_3,&endfrags_plus_3,plus_anchor_segments_3,n_plus_anchors_3,
+#ifdef DEBUG4E
+					 /*queryptr*/queryuc_ptr_3,
+#endif
+					 floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_fwd,
+					 max_splice_mismatches_3,genestrand);
+	debug(printf("Finished find_spliceends_distant_dna_plus\n"));
+
+	debug(printf("Starting find_spliceends_distant_dna_minus\n"));
+	find_spliceends_distant_dna_minus(&startfrags_minus_3,&endfrags_minus_3,minus_anchor_segments_3,n_minus_anchors_3,
+#ifdef DEBUG4E
+					  /*queryptr*/queryrc3,
+#endif
+					  floors3,querylength3,query3_lastpos,/*query_compress*/query3_compress_rev,
+					  max_splice_mismatches_3,genestrand);
+	debug(printf("Finished find_spliceends_distant_dna_minus\n"));
+      }
+
+
+      nmismatches = 0;
+      while (nmismatches <= done_level_5 - distantsplicing_penalty ||
+	     nmismatches <= done_level_3 - distantsplicing_penalty) {
+	debug(printf("*** Stage 9 (DNA).  Distant splicing, allowing %d mismatches ***\n",nmismatches));
+	longsinglesplicing5 = (List_T) NULL;
+	longsinglesplicing3 = (List_T) NULL;
+	distantsplicing5 = (List_T) NULL;
+	distantsplicing3 = (List_T) NULL;
+	  
+	if (max_splice_mismatches_5 >= 0) {
+	  debug4e(printf("Sorting splice ends\n"));
+	  startfrags_plus_5[nmismatches] = Substring_sort_siteN_halves(startfrags_plus_5[nmismatches],/*ascendingp*/true);
+	  endfrags_plus_5[nmismatches] = Substring_sort_siteN_halves(endfrags_plus_5[nmismatches],/*ascendingp*/true);
+	  
+	  startfrags_minus_5[nmismatches] = Substring_sort_siteN_halves(startfrags_minus_5[nmismatches],/*ascendingp*/false);
+	  endfrags_minus_5[nmismatches] = Substring_sort_siteN_halves(endfrags_minus_5[nmismatches],/*ascendingp*/false);
+	  
+	  debug4e(printf("5' splice ends at %d nmismatches: +startfrags/endfrags %d/%d, -startfrags/endfrags %d/%d\n",
+			 nmismatches,
+			 List_length(startfrags_plus_5[nmismatches]),List_length(endfrags_plus_5[nmismatches]),
+			 List_length(startfrags_minus_5[nmismatches]),List_length(endfrags_minus_5[nmismatches])));
+
+	  ignore_found_score = *found_score;
+	  distantsplicing5 = find_splicepairs_distant_dna(&ignore_found_score,&nsplicepairs5,&longsinglesplicing5,distantsplicing5,
+							  startfrags_plus_5,endfrags_plus_5,startfrags_minus_5,endfrags_minus_5,
+							  localsplicing_penalty,distantsplicing_penalty,
+							  querylength5,nmismatches,/*first_read_p*/true);
+
+	  if (longsinglesplicing5 != NULL) {
+	    debug(printf("Entering Stage3end_optimal_score with %d longsinglesplicing hits\n",List_length(longsinglesplicing5)));
+	    longsinglesplicing5 = Stage3end_optimal_score(longsinglesplicing5,query5_compress_fwd,query5_compress_rev,querylength5,
+							  /*keep_gmap_p*/true,/*finalp*/false);
+	    debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(longsinglesplicing5)));
+	  }
+
+	  if (distantsplicing5 != NULL) {
+	    /* Excess distant splicing should be freed already in find_splicepairs_distant_dna */
+	    debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(distantsplicing5)));
+	    distantsplicing5 = Stage3end_optimal_score(distantsplicing5,query5_compress_fwd,query5_compress_rev,querylength5,
+						       /*keep_gmap_p*/true,/*finalp*/false);
+	    debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(distantsplicing5)));
+	  }
+	}
+
+	if (max_splice_mismatches_3 >= 0) {
+	  debug4e(printf("Sorting splice ends\n"));
+	  startfrags_plus_3[nmismatches] = Substring_sort_siteN_halves(startfrags_plus_3[nmismatches],/*ascendingp*/true);
+	  endfrags_plus_3[nmismatches] = Substring_sort_siteN_halves(endfrags_plus_3[nmismatches],/*ascendingp*/true);
+	  
+	  startfrags_minus_3[nmismatches] = Substring_sort_siteN_halves(startfrags_minus_3[nmismatches],/*ascendingp*/false);
+	  endfrags_minus_3[nmismatches] = Substring_sort_siteN_halves(endfrags_minus_3[nmismatches],/*ascendingp*/false);
+	  
+	  debug4e(printf("3' splice ends at %d nmismatches: +startfrags/endfrags %d/%d, -startfrags/endfrags %d/%d\n",
+			 nmismatches,
+			 List_length(startfrags_plus_3[nmismatches]),List_length(endfrags_plus_3[nmismatches]),
+			 List_length(startfrags_minus_3[nmismatches]),List_length(endfrags_minus_3[nmismatches])));
+
+	  ignore_found_score = *found_score;
+	  distantsplicing3 = find_splicepairs_distant_dna(&ignore_found_score,&nsplicepairs3,&longsinglesplicing3,distantsplicing3,
+							  startfrags_plus_3,endfrags_plus_3,startfrags_minus_3,endfrags_minus_3,
+							  localsplicing_penalty,distantsplicing_penalty,
+							  querylength3,nmismatches,/*first_read_p*/false);
+
+	  if (longsinglesplicing3 != NULL) {
+	    debug(printf("Entering Stage3end_optimal_score with %d longsinglesplicing hits\n",List_length(longsinglesplicing3)));
+	    longsinglesplicing3 = Stage3end_optimal_score(longsinglesplicing3,query3_compress_fwd,query3_compress_rev,querylength3,
+							  /*keep_gmap_p*/true,/*finalp*/false);
+	    debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(longsinglesplicing3)));
+	  }
+
+	  if (distantsplicing3 != NULL) {
+	    /* Excess distant splicing should be freed already in find_splicepairs_distant_dna */
+	    debug(printf("Entering Stage3end_optimal_score with %d hits\n",List_length(distantsplicing3)));
+	    distantsplicing3 = Stage3end_optimal_score(distantsplicing3,query3_compress_fwd,query3_compress_rev,querylength3,
+						       /*keep_gmap_p*/true,/*finalp*/false);
+	    debug(printf("Exiting Stage3end_optimal_score with %d hits\n",List_length(distantsplicing3)));
+	  }
+	}
+
+	/* 11.  Pairing after distant splicing using longsinglesplicing */
+	if (longsinglesplicing5 != NULL || longsinglesplicing3 != NULL) {
+#if 0
+	  /* Note: cannot use hitarray after we have removed overlapping alignments.  Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
+	  hitarray5[HITARRAY_LONGSINGLESPLICING] = longsinglesplicing5;
+	  hitarray3[HITARRAY_LONGSINGLESPLICING] = longsinglesplicing3;
+#else
+	  if (longsinglesplicing5 != NULL) {
+	    *hits5 = List_append(*hits5,longsinglesplicing5);
+	  }
+	  if (longsinglesplicing3 != NULL) {
+	    *hits3 = List_append(*hits3,longsinglesplicing3);
+	  }
+#endif
+	  /* Note: cannot use hitarray after we have removed overlapping alignments.  Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
+	  hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
+					       &(*samechr),&(*conc_transloc),&(*terminals),
+					       hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
+					       /*hitarray3*/&(*hits3),/*narray3*/1,
+					       *cutoff_level_5,*cutoff_level_3,
+					       querylength5,querylength3,maxpairedpaths,genestrand);
+	
+	  debug(printf("10> After pairing long single splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+		       nconcordant,nsamechr,List_length(*terminals),*found_score));
+	
+	  if (*abort_pairing_p == false) {
+	    opt_level = (*found_score < opt_level) ? *found_score : opt_level;
+	    if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
+	      done_level_5 = user_maxlevel_5;
+	    }
+	    if ((done_level_3 = opt_level + subopt_levels) > user_maxlevel_3) {
+	      done_level_3 = user_maxlevel_3;
+	    }
+	    debug(printf("10> found_score = %d, opt_level %d, done_level %d,%d\n",*found_score,opt_level,done_level_5,done_level_3));
+	  }
+	}
+
+	/* 11.  Pairing after distant splicing using distantsplicing */
+#if 0
+	/* Note: cannot use hitarray after we have removed overlapping alignments.  Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
+	hitarray5[HITARRAY_DISTANTSPLICING] = distantsplicing5;
+	hitarray3[HITARRAY_DISTANTSPLICING] = distantsplicing3;
+#else
+	if (distantsplicing5 != NULL) {
+	  *hits5 = List_append(*hits5,distantsplicing5);
+	}
+	if (distantsplicing3 != NULL) {
+	  *hits3 = List_append(*hits3,distantsplicing3);
+	}
+#endif
+
+	/* Previously also checked for nconcordant == 0 */
+	if (distantsplicing5 != NULL || distantsplicing3 != NULL) {
+	  /* Note: cannot use hitarray after we have removed overlapping alignments.  Have to point to hits5 and hits3 and set narray5 = narray3 = 1 */
+	  hitpairs = Stage3_pair_up_concordant(&(*abort_pairing_p),&(*found_score),&nconcordant,&nsamechr,
+					       &(*samechr),&(*conc_transloc),&(*terminals),
+					       hitpairs,/*hitarray5*/&(*hits5),/*narray5*/1,
+					       /*hitarray3*/&(*hits3),/*narray3*/1,
+					       *cutoff_level_5,*cutoff_level_3,
+					       querylength5,querylength3,maxpairedpaths,genestrand);
+	
+	  debug(printf("11> After pairing distant splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
+		       nconcordant,nsamechr,List_length(*terminals),*found_score));
+	
+	  if (*abort_pairing_p == false) {
+	    opt_level = (*found_score < opt_level) ? *found_score : opt_level;
+	    if ((done_level_5 = opt_level + subopt_levels) > user_maxlevel_5) {
+	      done_level_5 = user_maxlevel_5;
+	    }
+	    if ((done_level_3 = opt_level + subopt_levels) > user_maxlevel_3) {
+	      done_level_3 = user_maxlevel_3;
+	    }
+	    debug(printf("10> found_score = %d, opt_level %d, done_level %d,%d\n",*found_score,opt_level,done_level_5,done_level_3));
+	  }
+	}
+
+	nmismatches++;
+      }
+
+      if (max_splice_mismatches_3 >= 0) {
+	for (i = 0; i <= max_splice_mismatches_3; i++) {
+	  substringlist_gc(&(startfrags_plus_3[i]));
+	  substringlist_gc(&(endfrags_plus_3[i]));
+	  substringlist_gc(&(startfrags_minus_3[i]));
+	  substringlist_gc(&(endfrags_minus_3[i]));
+	}
+	FREEA(startfrags_plus_3);
+	FREEA(endfrags_plus_3);
+	FREEA(startfrags_minus_3);
+	FREEA(endfrags_minus_3);
+      }
+
+      if (max_splice_mismatches_5 >= 0) {
+	for (i = 0; i <= max_splice_mismatches_5; i++) {
+	  substringlist_gc(&(startfrags_plus_5[i]));
+	  substringlist_gc(&(endfrags_plus_5[i]));
+	  substringlist_gc(&(startfrags_minus_5[i]));
+	  substringlist_gc(&(endfrags_minus_5[i]));
+	}
+	FREEA(startfrags_plus_5);
+	FREEA(endfrags_plus_5);
+	FREEA(startfrags_minus_5);
+	FREEA(endfrags_minus_5);
+      }
+    }
+  }
+
+
+  /* Search 9: Terminals */
   if (0) {
   if (nconcordant == 0 || *found_score > opt_level) {
     terminals5 = find_terminals(plus_anchor_segments_5,minus_anchor_segments_5,
@@ -21102,6 +23047,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					 /*hitarray3*/&(*hits3),/*narray3*/1,
 					 *cutoff_level_5,*cutoff_level_3,
 					 querylength5,querylength3,maxpairedpaths,genestrand);
+
     debug(printf("After pairing terminals, found %d concordant, %d nsamechr, %d terminals, found_score %d\n",
 		 nconcordant,nsamechr,List_length(*terminals),*found_score));
   }
@@ -21109,12 +23055,17 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 
 
   /* Search 9: GMAP pairsearch/halfmapping */
+  debug(printf("Stage 9 (GMAP pairsearch/halfmapping)\n"));
   if (gmap_pairsearch_p == true) {
     if (*abort_pairing_p == true) {
       /* Don't do GMAP */
       gmap5p = gmap3p = false;
+#if 0
     } else if (nconcordant > 0) {
+      /* Cannot make this shortcut, especially if we disallow the shortcut above for GMAP */
+      /* Also, the purpose of this stage 9 is to improve the concordant alignments */
       gmap5p = gmap3p = false;
+#endif
     } else if (*found_score >= trigger_score_for_gmap) {
       debug(printf("Test for stage 9: true because found_score %d >= trigger_score_for_gmap %d\n",*found_score,trigger_score_for_gmap));
       gmap5p = gmap3p = true;
@@ -21155,6 +23106,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 		     best_score_paired));
       for (p = *hits5; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) {
 	hit5 = (Stage3end_T) List_head(p);
+	debug13(printf("hit5 has score %d.  Used in pair %d\n",Stage3end_score(hit5),Stage3end_paired_usedp(hit5)));
 	if (Stage3end_hittype(hit5) == TRANSLOC_SPLICE) {
 	  debug13(printf("No GMAP on transloc splice\n"));
 	} else if (Stage3end_paired_usedp(hit5) == false && Stage3end_score(hit5) <= best_score_paired) {
@@ -21213,6 +23165,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 
 	    } else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
 	      debug13(printf("  => not concordant, so eliminating\n"));
+	      Stage3pair_free(&newpair);
 
 	    } else {
 	      nconcordant += 1;
@@ -21239,6 +23192,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 		     best_score_paired));
       for (p = *hits3; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) {
 	hit3 = (Stage3end_T) List_head(p);
+	debug13(printf("hit3 has score %d.  Used in pair %d\n",Stage3end_score(hit3),Stage3end_paired_usedp(hit3)));
 	if (Stage3end_hittype(hit3) == TRANSLOC_SPLICE) {
 	  debug13(printf("Not GMAP on transloc splice\n"));
 	} else if (Stage3end_paired_usedp(hit3) == false && Stage3end_score(hit3) <= best_score_paired) {
@@ -21297,6 +23251,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 
 	    } else if (Stage3pair_determine_pairtype(newpair) != CONCORDANT) {
 	      debug13(printf("  => not concordant, so eliminating\n"));
+	      Stage3pair_free(&newpair);
 
 	    } else {
 	      nconcordant += 1;
@@ -21428,6 +23383,7 @@ align_pair (bool *abort_pairing_p, int *found_score, int *cutoff_level_5, int *c
 					 hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1,
 					 *cutoff_level_5,*cutoff_level_3,
 					 querylength5,querylength3,maxpairedpaths,genestrand);
+
     debug(printf("After pairing short-overlap splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n",
 		 nconcordant,nsamechr,List_length(*terminals),*found_score));
     if (*abort_pairing_p == false) {
@@ -21648,37 +23604,35 @@ choose_among_paired (int *best_nmatches_paired, int *best_nmatches_5, int *best_
     }
   }
 
-  if (hitpairs == NULL) {
-    for (p = terminals; p != NULL; p = p->rest) {
-      hitpair = (Stage3pair_T) p->first;
-      if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
-	final_pairtype = PAIRED_TERMINALS;
-	*best_nmatches_paired = nmatches;
-	*best_nmatches_5 = nmatches5;
-	*best_nmatches_3 = nmatches3;
-      }
+  for (p = terminals; p != NULL; p = p->rest) {
+    hitpair = (Stage3pair_T) p->first;
+    if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
+      final_pairtype = PAIRED_TERMINALS;
+      *best_nmatches_paired = nmatches;
+      *best_nmatches_5 = nmatches5;
+      *best_nmatches_3 = nmatches3;
     }
+  }
 
-    *best_nmatches_paired += 1; /* penalty for choosing translocation over others */
+  *best_nmatches_paired += 1; /* penalty for choosing translocation over others */
 
-    for (p = conc_transloc; p != NULL; p = p->rest) {
-      hitpair = (Stage3pair_T) p->first;
-      if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
-	final_pairtype = CONCORDANT_TRANSLOCATIONS;
-	*best_nmatches_paired = nmatches;
-	*best_nmatches_5 = nmatches5;
-	*best_nmatches_3 = nmatches3;
-      }
+  for (p = conc_transloc; p != NULL; p = p->rest) {
+    hitpair = (Stage3pair_T) p->first;
+    if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
+      final_pairtype = CONCORDANT_TRANSLOCATIONS;
+      *best_nmatches_paired = nmatches;
+      *best_nmatches_5 = nmatches5;
+      *best_nmatches_3 = nmatches3;
     }
+  }
 
-    for (p = samechr; p != NULL; p = p->rest) {
-      hitpair = (Stage3pair_T) p->first;
-      if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
-	final_pairtype = PAIRED_UNSPECIFIED;
-	*best_nmatches_paired = nmatches;
-	*best_nmatches_5 = nmatches5;
-	*best_nmatches_3 = nmatches3;
-      }
+  for (p = samechr; p != NULL; p = p->rest) {
+    hitpair = (Stage3pair_T) p->first;
+    if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) {
+      final_pairtype = PAIRED_UNSPECIFIED;
+      *best_nmatches_paired = nmatches;
+      *best_nmatches_5 = nmatches5;
+      *best_nmatches_3 = nmatches3;
     }
   }
 
@@ -21856,9 +23810,10 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
   int best_nmatches_paired, best_nmatches_paired_5, best_nmatches_paired_3, best_nmatches_5, best_nmatches_3;
 
   
-  debug16(printf("Entered consolidate_paired_results.  Passing pointer %p\n",&best_nmatches_paired));
   *final_pairtype = choose_among_paired(&best_nmatches_paired,&best_nmatches_paired_5,&best_nmatches_paired_3,
 					hitpairs,samechr,conc_transloc,terminals);
+  debug16(printf("Entered consolidate_paired_results with final_pairtype %d\n",*final_pairtype));
+
 
   if (*final_pairtype == CONCORDANT) {
     /* Have concordant results */
@@ -21891,6 +23846,13 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 					query5_compress_fwd,query5_compress_rev,
 					query3_compress_fwd,query3_compress_rev,
 					querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
+
+      /* result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3,
+	                                     query5_compress_fwd,query5_compress_rev,
+					     query3_compress_fwd,query3_compress_rev,
+					     pairpool,dynprogL,dynprogM,dynprogR,
+					     oligoindices_minor,diagpool,cellpool); */
+
       result = Stage3pair_resolve_multimapping(result);
       /* result = Stage3pair_sort_distance(result); */
       debug16(printf("After removing overlaps, %d results\n",List_length(result)));
@@ -21932,6 +23894,12 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 					query3_compress_fwd,query3_compress_rev,
 					querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
 
+      result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3,
+	                                     query5_compress_fwd,query5_compress_rev,
+					     query3_compress_fwd,query3_compress_rev,
+					     pairpool,dynprogL,dynprogM,dynprogR,
+					     oligoindices_minor,diagpool,cellpool);
+
       /* TODO: Resolve terminals by doing full GMAP, and then redo optimal_score */
 
       result = Stage3pair_resolve_multimapping(result);
@@ -21996,16 +23964,24 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 					query5_compress_fwd,query5_compress_rev,
 					query3_compress_fwd,query3_compress_rev,
 					querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
-      result = Stage3pair_resolve_multimapping(result);
 
       if (Stage3pair_concordantp(result) == true) {
 	debug16(printf("Found remaining concordant solution, so removing non-concordant ones\n"));
 	*final_pairtype = CONCORDANT;
 	result = Stage3pair_filter_nonconcordant(result);
+
+	result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3,
+					    query5_compress_fwd,query5_compress_rev,
+					    query3_compress_fwd,query3_compress_rev,
+					    pairpool,dynprogL,dynprogM,dynprogR,
+					    oligoindices_minor,diagpool,cellpool);
+
 	debug16(printf("Concordant results: %d\n",List_length(result)));
       } else {
 	*final_pairtype = PAIRED_UNSPECIFIED;
       }
+
+      result = Stage3pair_resolve_multimapping(result);
     }
 
   } else if (*final_pairtype == PAIRED_TERMINALS) {
@@ -22068,6 +24044,13 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 					query5_compress_fwd,query5_compress_rev,
 					query3_compress_fwd,query3_compress_rev,
 					querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
+
+      result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3,
+					  query5_compress_fwd,query5_compress_rev,
+					  query3_compress_fwd,query3_compress_rev,
+					  pairpool,dynprogL,dynprogM,dynprogR,
+					  oligoindices_minor,diagpool,cellpool);
+
       result = Stage3pair_resolve_multimapping(result);
 
 #if 0
@@ -22119,6 +24102,13 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 				      query5_compress_fwd,query5_compress_rev,
 				      query3_compress_fwd,query3_compress_rev,
 				      querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
+
+    result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3,
+					query5_compress_fwd,query5_compress_rev,
+					query3_compress_fwd,query3_compress_rev,
+					pairpool,dynprogL,dynprogM,dynprogR,
+					oligoindices_minor,diagpool,cellpool);
+
     result = Stage3pair_resolve_multimapping(result);
     debug16(printf("Finally, have %d concordant translocation results\n",List_length(result)));
 
@@ -22206,6 +24196,13 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 					  query5_compress_fwd,query5_compress_rev,
 					  query3_compress_fwd,query3_compress_rev,
 					  querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
+
+	/* result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3,
+					    query5_compress_fwd,query5_compress_rev,
+					    query3_compress_fwd,query3_compress_rev,
+					    pairpool,dynprogL,dynprogM,dynprogR,
+					    oligoindices_minor,diagpool,cellpool); */
+
 	result = Stage3pair_resolve_multimapping(result);
 	/* result = Stage3pair_sort_distance(result); */
 	debug16(printf("After removing overlaps, %d results\n",List_length(result)));
@@ -22217,6 +24214,9 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 					  query5_compress_fwd,query5_compress_rev,
 					  query3_compress_fwd,query3_compress_rev,
 					  querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/false);
+
+	/* Do align_pair_with_gmap before trying resolve_insides */
+
 	result = Stage3pair_resolve_multimapping(result);
 	/* result = Stage3pair_sort_distance(result); */
 	debug16(printf("After removing overlaps, %d results\n",List_length(result)));
@@ -22242,6 +24242,13 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 					  query5_compress_fwd,query5_compress_rev,
 					  query3_compress_fwd,query3_compress_rev,
 					  querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true);
+
+	result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3,
+					    query5_compress_fwd,query5_compress_rev,
+					    query3_compress_fwd,query3_compress_rev,
+					    pairpool,dynprogL,dynprogM,dynprogR,
+					    oligoindices_minor,diagpool,cellpool);
+
 	result = Stage3pair_resolve_multimapping(result);
       }
     }
@@ -22288,8 +24295,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
       if ((newpair = Stage3pair_new(hit5,hit3,/*genestrand*/0,pairtype,
 				    /*private5p*/false,/*private3p*/false,
 				    /*expect_concordant_p*/pairtype == CONCORDANT ? true : false)) != NULL) {
-	stage3pairarray = (Stage3pair_T *) CALLOC_OUT(1,sizeof(Stage3pair_T));
-	stage3pairarray[0] = newpair;
+	result = List_push(NULL,(void *) newpair);
 	    
 	*nhits5_primary = *nhits5_altloc = 0;
 	*nhits3_primary = *nhits3_altloc = 0;
@@ -22305,10 +24311,22 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
 	if (pairtype == CONCORDANT) {
 	  debug16(printf("final pairtype is CONCORDANT\n"));
 	  *final_pairtype = CONCORDANT;
+
+	  result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3,
+					      query5_compress_fwd,query5_compress_rev,
+					      query3_compress_fwd,query3_compress_rev,
+					      pairpool,dynprogL,dynprogM,dynprogR,
+					      oligoindices_minor,diagpool,cellpool);
+
 	} else {
 	  debug16(printf("final pairtype is PAIRED_UNSPECIFIED\n"));
 	  *final_pairtype = PAIRED_UNSPECIFIED;
 	}
+
+	stage3pairarray = (Stage3pair_T *) CALLOC_OUT(1,sizeof(Stage3pair_T));
+	stage3pairarray[0] = (Stage3pair_T) List_head(result);
+	List_free(&result);
+
 	Stage3pair_privatize(stage3pairarray,/*npairs*/1);
 	Stage3pair_eval_and_sort(/*npaths*/(*npaths_primary) + (*npaths_altloc),
 				 &(*first_absmq),&(*second_absmq),stage3pairarray,maxpaths_search,queryseq5,queryseq3,
@@ -22395,6 +24413,7 @@ consolidate_paired_results (int *npaths_primary, int *npaths_altloc, int *first_
     return (Stage3pair_T *) NULL;
 
   } else {
+    debug16(printf("final pairtype is %d\n",*final_pairtype));
     debug16(printf("Result is not NULL (%d paths), and we fall through to concordant, paired, or transloc pairs\n",
 		   List_length(result)));
 
@@ -22999,7 +25018,8 @@ Stage1hr_cleanup () {
 
 
 void
-Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_in, int index1interval_in,
+Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int require_completeset_p_in,
+		int index1part_in, int index1interval_in,
 		int spansize_in, int max_anchors_in, Univ_IIT_T chromosome_iit_in, int nchromosomes_in,
 		Genome_T genome_in, Genome_T genomealt, Mode_T mode_in, int maxpaths_search_in,
 
@@ -23010,7 +25030,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
 		bool distances_observed_p_in, int subopt_levels_in, int min_indel_end_matches_in,
 		int max_middle_insertions_in, int max_middle_deletions_in,
 		Chrpos_T shortsplicedist_in, Chrpos_T shortsplicedist_known_in, Chrpos_T shortsplicedist_novelend_in,
-		Chrpos_T min_intronlength_in,
+		Chrpos_T min_intronlength_in, Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in,
 
 		int min_distantsplicing_end_matches_in, int min_distantsplicing_identity_in,
 
@@ -23024,6 +25044,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
 
   use_sarray_p = use_sarray_p_in;
   use_only_sarray_p = use_only_sarray_p_in;
+  require_completeset_p = require_completeset_p_in;
 
   index1part = index1part_in;
   index1interval = index1interval_in;
@@ -23088,6 +25109,8 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
   }
 
   min_intronlength = min_intronlength_in;
+  expected_pairlength = expected_pairlength_in;
+  pairlength_deviation = pairlength_deviation_in;
   min_distantsplicing_end_matches = min_distantsplicing_end_matches_in;
   min_distantsplicing_identity = min_distantsplicing_identity_in;
 
@@ -23099,7 +25122,6 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
 
   gmap_segments_p = false;
   gmap_pairsearch_p = false;
-  gmap_indel_knownsplice_p = false;
   gmap_improvement_p = false;
 
   fprintf(stderr,"GMAP modes:");
@@ -23112,15 +25134,6 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
     fprintf(stderr," pairsearch");
     gmap_pairsearch_p = true;
   }
-  if ((gmap_mode & GMAP_INDEL_KNOWNSPLICE) != 0) {
-    if (gmapp == true) {
-      fprintf(stderr,",");
-    } else {
-      gmapp = true;
-    }
-    fprintf(stderr," indel_knownsplice");
-    gmap_indel_knownsplice_p = true;
-  }
   if ((gmap_mode & GMAP_TERMINAL) != 0) {
     if (gmapp == true) {
       fprintf(stderr,",");
diff --git a/src/stage1hr.h b/src/stage1hr.h
index 144490c..36d2c32 100644
--- a/src/stage1hr.h
+++ b/src/stage1hr.h
@@ -1,4 +1,4 @@
-/* $Id: stage1hr.h 196434 2016-08-16 20:21:03Z twu $ */
+/* $Id: stage1hr.h 206053 2017-05-08 17:37:04Z twu $ */
 #ifndef STAGE1HR_INCLUDED
 #define STAGE1HR_INCLUDED
 
@@ -34,8 +34,7 @@ typedef enum {MASK_NONE, MASK_FREQUENT, MASK_REPETITIVE, MASK_GREEDY_FREQUENT, M
 
 #define GMAP_IMPROVEMENT 1
 #define GMAP_TERMINAL 2
-#define GMAP_INDEL_KNOWNSPLICE 4
-#define GMAP_PAIRSEARCH 8
+#define GMAP_PAIRSEARCH 4
 
 
 typedef struct Floors_T *Floors_T;
@@ -88,7 +87,8 @@ extern void
 Stage1hr_cleanup ();
 
 extern void
-Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_in, int index1interval_in,
+Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int speed_level_in,
+		int index1part_in, int index1interval_in,
 		int spansize_in, int max_anchors_in, Univ_IIT_T chromosome_iit_in, int nchromosomes_in,
 		Genome_T genome_in, Genome_T genomealt, Mode_T mode_in, int maxpaths_search_in,
 
@@ -99,7 +99,7 @@ Stage1hr_setup (bool use_sarray_p_in, bool use_only_sarray_p_in, int index1part_
 		bool distances_observed_p_in, int subopt_levels_in, int min_indel_end_matches_in,
 		int max_middle_insertions_in, int max_middle_deletions_in,
 		Chrpos_T shortsplicedist_in, Chrpos_T shortsplicedist_known_in, Chrpos_T shortsplicedist_novelend_in,
-		Chrpos_T min_intronlength_in,
+		Chrpos_T min_intronlength_in, Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in,
 
 		int min_distantsplicing_end_matches_in, int min_distantsplicing_identity_in,
 
diff --git a/src/stage2.c b/src/stage2.c
index d8f3eb8..65d4f23 100644
--- a/src/stage2.c
+++ b/src/stage2.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage2.c 198275 2016-09-24 00:44:53Z twu $";
+static char rcsid[] = "$Id: stage2.c 207199 2017-06-12 18:31:34Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -36,6 +36,7 @@ static char rcsid[] = "$Id: stage2.c 198275 2016-09-24 00:44:53Z twu $";
 /* #define SUFF_PCTCOVERAGE_STAGE2 0.10 */
 #define SUFF_NCOVERED 200
 #define SUFF_MAXNCONSECUTIVE 20
+#define GREEDY_NCONSECUTIVE 100
 
 #define MAX_NACTIVE 100	/* 100 previously considered too low, but may
 			   be okay in conjunction with
@@ -102,8 +103,8 @@ static char rcsid[] = "$Id: stage2.c 198275 2016-09-24 00:44:53Z twu $";
 
 #define SHIFT_EXTRA 15
 
-#define ONE 1.0
-#define TEN_THOUSAND 10000.0
+#define ONE 1
+#define TEN_THOUSAND 8192	/* Power of 2 */
 #define HUNDRED_THOUSAND 100000.0
 #define ONE_MILLION 1000000.0
 
@@ -207,7 +208,7 @@ Stage2_setup (bool splicingp_in, bool cross_species_p,
 #endif
 
 /* Dynamic programming */
-/* Can also define debug9(x) as: if (querypos == XX) {x;} */
+/* Can also define debug9(x) as: if (curr_querypos == XX) {x;} */
 #ifdef DEBUG9
 #define debug9(x) x
 #else 
@@ -345,13 +346,13 @@ struct Link_T {
   int fwd_consecutive;
   int fwd_rootposition;
   /*int fwd_rootnlinks;*/		/* Number of links in last branch */
-  int fwd_score;
+  /* int fwd_score; */                  /* Kept as a separate structure */
 
   int fwd_pos;
   int fwd_hit;
+  int fwd_tracei;		/* Corresponds to a distinct set of branches */
 
 #ifdef DEBUG9
-  int fwd_tracei;		/* Corresponds to a distinct set of branches */
   int fwd_intronnfwd;
   int fwd_intronnrev;
   int fwd_intronnunk;
@@ -469,11 +470,11 @@ Linkmatrix_print_both (struct Link_T **links, Chrpos_T **mappings, int length1,
 
 /* For PMAP, indexsize is in aa */
 static void
-Linkmatrix_print_fwd (struct Link_T **links, Chrpos_T **mappings, int length1,
-		      int *npositions, char *queryseq_ptr, int indexsize) {
+print_fwd (struct Link_T **links, int **fwd_scores,
+	   Chrpos_T **mappings, int length1,
+	   int *npositions, char *queryseq_ptr, int indexsize) {
   int i, j, lastpos;
   char *oligo;
-  Intlist_T p, q;
 
   oligo = (char *) MALLOCA((indexsize+1) * sizeof(char));
   lastpos = length1 - indexsize;
@@ -483,7 +484,7 @@ Linkmatrix_print_fwd (struct Link_T **links, Chrpos_T **mappings, int length1,
     printf("Querypos %d (%s, %d positions):",i,oligo,npositions[i]);
     for (j = 0; j < npositions[i]; j++) {
       printf(" %d.%u:%d(%d,%d)[%u]",
-	     j,mappings[i][j],links[i][j].fwd_score,
+	     j,mappings[i][j],fwd_scores[i][j],
 	     links[i][j].fwd_pos,links[i][j].fwd_hit,links[i][j].fwd_tracei);
     }
     printf("\n");
@@ -540,6 +541,7 @@ mappings_dump_R (Chrpos_T **mappings, int *npositions, int length1,
 }
     
 
+#if 0
 static void
 best_path_dump_R (struct Link_T **links, Chrpos_T **mappings,
 		  int querypos, int hit, bool fwdp, char *varname) {
@@ -614,6 +616,7 @@ best_path_dump_R (struct Link_T **links, Chrpos_T **mappings,
 
   return;
 }
+#endif
 
 static void
 active_bounds_dump_R (Chrpos_T *minactive, Chrpos_T *maxactive,
@@ -912,14 +915,141 @@ while (prevhit != -1 && (prevposition = mappings[prev_querypos][prevhit]) + inde
 #endif
 
 
-static void
-score_querypos_lookback_one (
+#if 0
+/* SIMD version */
+_positions = _mm_set1_epi32(position - indexsize_nt);
+_querydistance = _mm_set1_epi32(querydistance);
+_splicing_querydist_penalty = _mm_set1_epi32(querydist_penalty+1+NINTRON_PENALTY_MISMATCH);
+_max_scores = _mm_set1_epi32(-1000);
+
+prevhit = low_hit;
+while (prevhit + 4 < high_hit) {
+  /* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */
+  _prevpositions = _mm_loadu_epi32(&(mappings[prev_querypos][prevhit]));
+  _gendistance = _mm_sub_epi32(_positions,_prevpositions);
+  if (_mm_cmpgt_epi32(_gendistance,_zeroes) == 0) {
+    break;
+  } else {
+    _diffdistance = _mm_abs_epi32(_mm_sub_epi32(_gendistance,_querydistance));
+  
+    _prev_scores = _mm_loadu_epi32(&(fwd_scores[prev_querypos][prevhit]));
+  
+    _scores_close = _mm_add_epi32(_prev_scores,_mm_set1_epi32(CONSEC_POINTS_PER_MATCH));
+    /* Right shift of 13 bits gives division by 8192 */
+    _scores_splice = _mm_add_epi32(_prev_scores,_mm_sub_epi32(_mm_srli_epi32(_diffdistance,13),_splicing_querydist_penalty));
+
+    _scores = _mm_blendv_ps(_scores_close,_scores_splice,_mm_cmpgt_epi32(_diffdistance,_mm_set1_epi32(EQUAL_DISTANCE_NOT_SPLICING)));
+    
+    _mm_storeu_epi32(_scores);
+
+    _max_scores = _mm_max_epi32(_max_scores,_scores);
+    prevhit += 4;
+  }
+}
+
+/* Take care of serial cases */
+
+
+
+
+/* Compute overall max and return.  Caller can find prev_querypos with
+   largest max and store in fwd_pos[curr_querypos][currhit] and max in
+   fwd_max[curr_querypos][currhit].  During traceback, recompute at
+   prev_querypos and find prevhit that gives the max.  */
+
+  if (diffdistance < maxintronlen) {
+    if (diffdistance <= EQUAL_DISTANCE_NOT_SPLICING) {
+      debug9(canonicalsgn = 9);
+      fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH;
+#ifdef PMAP
+      if (diffdistance % 3 != 0) {
+	fwd_score -= NONCODON_INDEL_PENALTY;
+      }
+#endif
+    } else if (near_end_p == false && prevlink->fwd_consecutive < EXON_DEFN) {
+      debug9(canonicalsgn = 0);
+      if (splicingp == true) {
+	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
+      } else {
+	fwd_score = prevlink->fwd_score - (diffdistance/ONE + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
+      }
+
+    } else if (splicingp == false) {
+      debug9(canonicalsgn = 0);
+      fwd_score = prevlink->fwd_score - (diffdistance/ONE + 1) - querydist_penalty;
+
+    } else if (use_shifted_canonical_p == true) {
+      leftpos = prevposition + querydistance - 1;
+      /* printf("leftpos %d, last_leftpos %d, rightpos %d\n",leftpos,last_leftpos,rightpos); */
+      if (leftpos == last_leftpos) {
+	canonicalp = last_canonicalp;
+      } else {
+	debug7(printf("Calling find_shift_canonical fwd\n"));
+	canonicalp = find_shifted_canonical(leftpos,rightpos,querydistance-indexsize_nt,
+					    /* &lastGT,&lastAG, */
+					    Genome_prev_donor_position,Genome_prev_acceptor_position,
+					    chroffset,chrhigh,plusp,skip_repetitive_p);
+	/* And need to check for shift_canonical_rev */
+
+	last_leftpos = leftpos;
+	last_canonicalp = canonicalp;
+      }
+      if (canonicalp == true) {
+	debug9(canonicalsgn = +1);
+	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty;
+      } else {
+	debug9(canonicalsgn = 0);
+	fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH;
+      }
+
+    } else {
+      debug9(canonicalsgn = +1);
+      fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty;
+    }
+
+    debug9(printf("\tD. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d, intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
+		  prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
+		  prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,
+		  best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
+		  gendistance,querydistance,canonicalsgn));
+	    
+    /* Allow ties, which should favor shorter intron */
+    if (fwd_score >= best_fwd_score) {
+      if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
+	best_fwd_consecutive = prevlink->fwd_consecutive + (querydistance + indexsize_nt);
+	/* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
+      } else {
+	best_fwd_consecutive = 0;
+	/* best_fwd_rootnlinks = 1; */
+      }
+      best_fwd_score = fwd_score;
+      best_fwd_prevpos = prev_querypos;
+      best_fwd_prevhit = prevhit;
 #ifdef DEBUG9
-			     int *fwd_tracei,
+      best_fwd_tracei = ++*fwd_tracei;
+      best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
+      best_fwd_intronnrev = prevlink->fwd_intronnrev;
+      best_fwd_intronnunk = prevlink->fwd_intronnunk;
+      switch (canonicalsgn) {
+      case 1: best_fwd_intronnfwd++; break;
+      case 0: best_fwd_intronnunk++; break;
+      }
+#endif
+      debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
+    } else {
+      debug9(printf(" => Loses to %d\n",best_fwd_score));
+    }
+  }
+
+  prevhit = active[prev_querypos][prevhit];
+ }
 #endif
-			     Link_T currlink, int querypos,
+
+
+static void
+score_querypos_lookback_one (int *fwd_tracei, Link_T currlink, int curr_querypos, int currhit,
 			     int querystart, int queryend, unsigned int position,
-			     struct Link_T **links, Chrpos_T **mappings,
+			     struct Link_T **links, int **fwd_scores, Chrpos_T **mappings,
 			     int **active, int *firstactive,
 			     Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
 			     int indexsize, Intlist_T processed,
@@ -935,8 +1065,8 @@ score_querypos_lookback_one (
   /* int best_fwd_rootnlinks = 1; */
   int best_fwd_score = 0, fwd_score;
   int best_fwd_prevpos = -1, best_fwd_prevhit = -1;
+  int best_fwd_tracei, last_tracei;
 #ifdef DEBUG9
-  int best_fwd_tracei;
   int best_fwd_intronnfwd = 0, best_fwd_intronnrev = 0, best_fwd_intronnunk = 0;
   int canonicalsgn = 0;
 #endif
@@ -975,9 +1105,9 @@ score_querypos_lookback_one (
     prev_active = active[prev_querypos];
 
 #ifdef PMAP
-    querydistance = (querypos - prev_querypos)*3;
+    querydistance = (curr_querypos - prev_querypos)*3;
 #else
-    querydistance = querypos - prev_querypos;
+    querydistance = curr_querypos - prev_querypos;
 #endif
     prevhit = firstactive[prev_querypos];
     prevposition = position;	/* Prevents prevposition + querydistance == position */
@@ -989,12 +1119,12 @@ score_querypos_lookback_one (
       best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
       best_fwd_rootposition = prevlink->fwd_rootposition;
       /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
-      best_fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*querydistance;
+      best_fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*querydistance;
 
       best_fwd_prevpos = prev_querypos;
       best_fwd_prevhit = prevhit;
-#ifdef DEBUG9
       best_fwd_tracei = prevlink->fwd_tracei;
+#ifdef DEBUG9
       best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
       best_fwd_intronnrev = prevlink->fwd_intronnrev;
       best_fwd_intronnunk = prevlink->fwd_intronnunk;
@@ -1007,7 +1137,7 @@ score_querypos_lookback_one (
 
 
       debug9(printf("\tA. Adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
-		    prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],prevlink->fwd_score,
+		    prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],fwd_scores[prev_querypos][prevhit],
 		    best_fwd_score,best_fwd_consecutive,best_fwd_tracei,
 		    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk));
     }
@@ -1015,13 +1145,13 @@ score_querypos_lookback_one (
 
 
   /* Check work list */
-  if (anchoredp && querypos - indexsize_query <= querystart) {
+  if (anchoredp && curr_querypos - indexsize_query <= querystart) {
     /* Allow close prevpositions that overlap with anchor */
     /* Can give rise to false positives, and increases amount of dynamic programming work */
-  } else if (0 && anchoredp && querypos == queryend) {
+  } else if (0 && anchoredp && curr_querypos == queryend) {
     /* Test first position */
   } else {
-    while (processed != NULL && (prev_querypos = Intlist_head(processed)) > querypos - indexsize_query) {
+    while (processed != NULL && (prev_querypos = Intlist_head(processed)) > curr_querypos - indexsize_query) {
       debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos));
       processed = Intlist_next(processed);
     }
@@ -1030,14 +1160,15 @@ score_querypos_lookback_one (
   /* D. Evaluate for mismatches (all other previous querypos) */
   donep = false;
   nseen = 0;
+  last_tracei = -1;
   for ( ; processed != NULL && best_fwd_consecutive < enough_consecutive && donep == false;
 	processed = Intlist_next(processed), nseen++) {
     prev_querypos = Intlist_head(processed);
 
 #ifdef PMAP
-    querydistance = (querypos - prev_querypos)*3;
+    querydistance = (curr_querypos - prev_querypos)*3;
 #else
-    querydistance = querypos - prev_querypos;
+    querydistance = curr_querypos - prev_querypos;
 #endif
 
     if (nseen > nlookback && querydistance - indexsize_nt > lookback) {
@@ -1053,6 +1184,15 @@ score_querypos_lookback_one (
       prev_links = links[prev_querypos];
       prev_active = active[prev_querypos];
 
+      /* Range 0 */
+      while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) {
+	debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei));
+	prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+      }
+      if (prevhit != -1) {
+	last_tracei = prev_links[prevhit].fwd_tracei;
+      }
+
       /* Range 1: From Infinity to maxintronlen */
       if (splicingp == true) {
 	/* This is equivalent to diffdistance >= maxintronlen, where
@@ -1075,7 +1215,7 @@ score_querypos_lookback_one (
 	assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
 	diffdistance = gendistance - querydistance; /* No need for abs() */
 
-	fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/;
+	fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/;
 	if (splicingp == true) {
 	  fwd_score -= (diffdistance/TEN_THOUSAND + 1);
 	} else {
@@ -1151,7 +1291,7 @@ score_querypos_lookback_one (
 
 	debug9(printf("\tD2. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
 		      prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
-		      prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
+		      fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
 		      gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
 	    
@@ -1168,8 +1308,8 @@ score_querypos_lookback_one (
 	  best_fwd_score = fwd_score;
 	  best_fwd_prevpos = prev_querypos;
 	  best_fwd_prevhit = prevhit;
-#ifdef DEBUG9
 	  best_fwd_tracei = ++*fwd_tracei;
+#ifdef DEBUG9
 	  best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
 	  best_fwd_intronnrev = prevlink->fwd_intronnrev;
 	  best_fwd_intronnunk = prevlink->fwd_intronnunk;
@@ -1204,7 +1344,7 @@ score_querypos_lookback_one (
 #else
 	/* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
 	/* This is how version 2013-08-14 did it */
-	fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH;
+	fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH;
 #endif
 
 #if 0
@@ -1216,7 +1356,7 @@ score_querypos_lookback_one (
 
 	debug9(printf("\tD4. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
 		      prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
-		      prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
+		      fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
 		      gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
 	    
@@ -1233,9 +1373,9 @@ score_querypos_lookback_one (
 	  best_fwd_score = fwd_score;
 	  best_fwd_prevpos = prev_querypos;
 	  best_fwd_prevhit = prevhit;
-#ifdef DEBUG9
 	  /* best_fwd_tracei = ++*fwd_tracei; */
 	  best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
+#ifdef DEBUG9
 	  best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
 	  best_fwd_intronnrev = prevlink->fwd_intronnrev;
 	  best_fwd_intronnunk = prevlink->fwd_intronnunk;
@@ -1264,17 +1404,17 @@ score_querypos_lookback_one (
   currlink->fwd_pos = best_fwd_prevpos;
   currlink->fwd_hit = best_fwd_prevhit;
   if (currlink->fwd_pos >= 0) {
-    debug9(currlink->fwd_tracei = best_fwd_tracei);
-    currlink->fwd_score = best_fwd_score;
+    currlink->fwd_tracei = best_fwd_tracei;
+    fwd_scores[curr_querypos][currhit] = best_fwd_score;
   } else if (anchoredp == true) {
-    debug9(currlink->fwd_tracei = -1);
-    currlink->fwd_score = -100000;
+    currlink->fwd_tracei = -1;
+    fwd_scores[curr_querypos][currhit] = -100000;
   } else if (localp == true) {
-    debug9(currlink->fwd_tracei = ++*fwd_tracei);
-    currlink->fwd_score = indexsize_nt;
+    currlink->fwd_tracei = ++*fwd_tracei;
+    fwd_scores[curr_querypos][currhit] = indexsize_nt;
   } else {
-    debug9(currlink->fwd_tracei = ++*fwd_tracei);
-    currlink->fwd_score = best_fwd_score;
+    currlink->fwd_tracei = ++*fwd_tracei;
+    fwd_scores[curr_querypos][currhit] = best_fwd_score;
   }
 
 #ifdef DEBUG9
@@ -1284,7 +1424,7 @@ score_querypos_lookback_one (
 #endif
 
   debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n",
-		currlink->fwd_pos,currlink->fwd_hit,currlink->fwd_score,currlink->fwd_tracei));
+		currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][currhit],currlink->fwd_tracei));
   debug3(printf("%d %d  %d %d  1\n",querypos,hit,best_prevpos,best_prevhit));
 
   return;
@@ -1294,14 +1434,10 @@ score_querypos_lookback_one (
 
 
 static void
-score_querypos_lookback_mult (
-#ifdef DEBUG9
-			      int *fwd_tracei,
-#endif
-			      int low_hit, int high_hit,
-			      int querypos, int querystart, int queryend, unsigned int *positions,
-			      struct Link_T **links, Chrpos_T **mappings,
-			      int **active, int *firstactive,
+score_querypos_lookback_mult (int *fwd_tracei, int low_hit, int high_hit, int curr_querypos,
+			      int querystart, int queryend, unsigned int *positions,
+			      struct Link_T **links, int **fwd_scores,
+			      Chrpos_T **mappings, int **active, int *firstactive,
 			      Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
 			      int indexsize, Intlist_T processed,
 			      bool anchoredp, bool localp, bool splicingp,
@@ -1314,12 +1450,12 @@ score_querypos_lookback_mult (
   Chrpos_T *prev_mappings, *adj_mappings;
   int *prev_active, *adj_active;
 
-  int best_fwd_consecutive;
+  int overall_fwd_consecutive, best_fwd_consecutive;
   int best_fwd_rootposition;
   int best_fwd_score, fwd_score;
   int best_fwd_prevpos, best_fwd_prevhit;
+  int best_fwd_tracei, last_tracei;
 #ifdef DEBUG9
-  int best_fwd_tracei;
   int best_fwd_intronnfwd, best_fwd_intronnrev, best_fwd_intronnunk;
   int canonicalsgn = 0;
 #endif
@@ -1344,14 +1480,14 @@ score_querypos_lookback_mult (
   /* Determine work load */
   /* printf("Work load (lookback): %s\n",Intlist_to_string(processed)); */
   last_item = processed;
-  if (anchoredp && querypos - indexsize_query <= querystart) {
+  if (anchoredp && curr_querypos - indexsize_query <= querystart) {
     /* Allow close prevpositions that overlap with anchor */
     /* Can give rise to false positives, and increases amount of dynamic programming work */
     /* debug9(printf("No skipping because close to anchor\n")); */
-  } else if (0 && anchoredp && querypos == queryend) {
+  } else if (0 && anchoredp && curr_querypos == queryend) {
     /* Test first position */
   } else {
-    while (processed != NULL && (/*prev_querypos =*/ Intlist_head(processed)) > querypos - indexsize_query) {
+    while (processed != NULL && (/*prev_querypos =*/ Intlist_head(processed)) > curr_querypos - indexsize_query) {
       debug9(printf("Skipping prev_querypos %d, because too close\n",Intlist_head(processed)));
       processed = Intlist_next(processed);
     }
@@ -1359,7 +1495,7 @@ score_querypos_lookback_mult (
 
   if (last_item == NULL) {
     for (hiti = 0; hiti < nhits; hiti++) {
-      currlink = &(links[querypos][hiti + low_hit]);
+      currlink = &(links[curr_querypos][hiti + low_hit]);
 
       currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
       currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
@@ -1367,13 +1503,13 @@ score_querypos_lookback_mult (
       currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
 
       if (anchoredp == true) {
-	debug9(currlink->fwd_tracei = -1);
-	currlink->fwd_score = -100000;
+	currlink->fwd_tracei = -1;
+	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
       } else if (localp == true) {
-	debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	currlink->fwd_score = indexsize_nt;
+	currlink->fwd_tracei = ++*fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
       } else {
-	currlink->fwd_score = /*best_fwd_score =*/ 0;
+	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
       }
     }
 
@@ -1386,20 +1522,20 @@ score_querypos_lookback_mult (
     adj_active = active[adj_querypos];
 
 #ifdef PMAP
-    adj_querydistance = (querypos - adj_querypos)*3;
+    adj_querydistance = (curr_querypos - adj_querypos)*3;
 #else
-    adj_querydistance = querypos - adj_querypos;
+    adj_querydistance = curr_querypos - adj_querypos;
 #endif
 
     /* Process prevhit and hiti in parallel.  Values are asscending along prevhit chain and from 0 to nhits-1. */
     prevhit = firstactive[adj_querypos];
     hiti = 0;
     while (prevhit != -1 && hiti < nhits) {
-      if ((prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < (position =  positions[hiti])) {
+      if ((prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < (position = positions[hiti])) {
 	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
 
       } else if (prevposition + adj_querydistance > position) {
-	currlink = &(links[querypos][hiti + low_hit]);
+	currlink = &(links[curr_querypos][hiti + low_hit]);
 
 	currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
 	currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
@@ -1407,32 +1543,32 @@ score_querypos_lookback_mult (
 	currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
 
 	if (anchoredp == true) {
-	  debug9(currlink->fwd_tracei = -1);
-	  currlink->fwd_score = -100000;
+	  currlink->fwd_tracei = -1;
+	  fwd_scores[curr_querypos][hiti + low_hit] = -100000;
 	} else if (localp == true) {
-	  debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	  currlink->fwd_score = indexsize_nt;
+	  currlink->fwd_tracei = ++*fwd_tracei;
+	  fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
 	} else {
-	  currlink->fwd_score = /*best_fwd_score =*/ 0;
+	  fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
 	}
 
 	hiti++;
 
       } else {
 	/* Adjacent position found for hiti */
-	currlink = &(links[querypos][hiti + low_hit]);
+	currlink = &(links[curr_querypos][hiti + low_hit]);
 	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
 
 	currlink->fwd_consecutive = /*best_fwd_consecutive =*/ prevlink->fwd_consecutive + adj_querydistance;
 	currlink->fwd_rootposition = /*best_fwd_rootposition =*/ prevlink->fwd_rootposition;
 	currlink->fwd_pos = /*best_fwd_prevpos =*/ adj_querypos;
 	currlink->fwd_hit = /*best_fwd_prevhit =*/ prevhit;
-	currlink->fwd_score = /*best_fwd_score =*/ prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*adj_querydistance;
+	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance;
 
 #ifdef DEBUG9
-	printf("\tA. For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
-	       hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],prevlink->fwd_score,
-	       currlink->fwd_score,currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
+	printf("\tA(1). For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
+	       hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit],
+	       fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
 	       /*best_fwd_intronnfwd*/prevlink->fwd_intronnfwd,
 	       /*best_fwd_intronnrev*/prevlink->fwd_intronnrev,
 	       /*best_fwd_intronnunk*/prevlink->fwd_intronnunk);
@@ -1444,7 +1580,7 @@ score_querypos_lookback_mult (
     }
 
     while (hiti < nhits) {
-      currlink = &(links[querypos][hiti + low_hit]);
+      currlink = &(links[curr_querypos][hiti + low_hit]);
 
       currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
       currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
@@ -1452,13 +1588,13 @@ score_querypos_lookback_mult (
       currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
 
       if (anchoredp == true) {
-	debug9(currlink->fwd_tracei = -1);
-	currlink->fwd_score = -100000;
+	currlink->fwd_tracei = -1;
+	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
       } else if (localp == true) {
-	debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	currlink->fwd_score = indexsize_nt;
+	currlink->fwd_tracei = ++*fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
       } else {
-	currlink->fwd_score = /*best_fwd_score =*/ 0;
+	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
       }
 
       hiti++;
@@ -1471,12 +1607,10 @@ score_querypos_lookback_mult (
     adj_active = active[adj_querypos];
 
 #ifdef PMAP
-    adj_querydistance = (querypos - adj_querypos)*3;
+    adj_querydistance = (curr_querypos - adj_querypos)*3;
 #else
-    adj_querydistance = querypos - adj_querypos;
+    adj_querydistance = curr_querypos - adj_querypos;
 #endif
-    adj_frontier = firstactive[adj_querypos];
-
     nprocessed = Intlist_length(processed);
     frontier = (int *) MALLOCA(nprocessed * sizeof(int));
 
@@ -1484,7 +1618,7 @@ score_querypos_lookback_mult (
     for (p = processed; p != NULL; p = Intlist_next(p)) {
       prev_querypos = Intlist_head(p);
 
-      querydistance = querypos - prev_querypos;
+      querydistance = curr_querypos - prev_querypos;
       if (nseen <= /*nlookback*/1 || querydistance - indexsize_nt <= /*lookback*/sufflookback/2) {
 	max_adjacent_nseen = nseen;
       }
@@ -1495,10 +1629,38 @@ score_querypos_lookback_mult (
       frontier[nseen++] = firstactive[prev_querypos];
     }
     
+
+    /* Look for overall_fwd_consecutive to see whether we can be greedy */
+    overall_fwd_consecutive = 0;
+    adj_frontier = firstactive[adj_querypos];
     for (hiti = 0; hiti < nhits; hiti++) {
       position = positions[hiti];
 
-      /* A. Evaluate adjacent position (at last one processed) */
+      /* A. Evaluate adjacent positions (at last one processed) */
+      prevhit = adj_frontier;	/* Get information from last hiti */
+      prevposition = position;	/* Prevents prevposition + adj_querydistance == position */
+      while (prevhit != -1 && (prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < position) {
+	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
+      }
+      adj_frontier = prevhit;	/* Save information for next hiti */
+
+      if (prevposition + adj_querydistance == position) {
+	/* Adjacent found */
+	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
+	if (prevlink->fwd_consecutive + adj_querydistance > overall_fwd_consecutive) {
+	  overall_fwd_consecutive = prevlink->fwd_consecutive + adj_querydistance;
+	}
+      }
+    }
+    debug(printf("Overall fwd consecutive is %d\n",overall_fwd_consecutive));
+
+
+    /* Now process */
+    adj_frontier = firstactive[adj_querypos];
+    for (hiti = 0; hiti < nhits; hiti++) {
+      position = positions[hiti];
+
+      /* A. Evaluate adjacent positions (at last one processed) */
       prevhit = adj_frontier;	/* Get information from last hiti */
       prevposition = position;	/* Prevents prevposition + adj_querydistance == position */
       while (prevhit != -1 && (prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < position) {
@@ -1514,17 +1676,17 @@ score_querypos_lookback_mult (
 	best_fwd_rootposition = prevlink->fwd_rootposition;
 	best_fwd_prevpos = adj_querypos;
 	best_fwd_prevhit = prevhit;
-	best_fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*adj_querydistance;
+	best_fwd_score = fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance;
 	max_nseen = max_adjacent_nseen;	/* Look not so far back */
+	best_fwd_tracei = prevlink->fwd_tracei;
 
 #ifdef DEBUG9
-	best_fwd_tracei = prevlink->fwd_tracei;
 	best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
 	best_fwd_intronnrev = prevlink->fwd_intronnrev;
 	best_fwd_intronnunk = prevlink->fwd_intronnunk;
 #endif
-	debug9(printf("\tA. For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
-		      hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],prevlink->fwd_score,
+	debug9(printf("\tA(2). For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
+		      hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit],
 		      best_fwd_score,best_fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk));
 
@@ -1536,215 +1698,224 @@ score_querypos_lookback_mult (
 	best_fwd_prevhit = -1;
 	best_fwd_score = 0;
 	max_nseen = max_nonadjacent_nseen; /* Look farther back */
+	best_fwd_tracei = -1;
 
 #ifdef DEBUG9
-	best_fwd_tracei = -1;
 	best_fwd_intronnfwd = 0;
 	best_fwd_intronnrev = 0;
 	best_fwd_intronnunk = 0;
 #endif
       }
 
+      if (overall_fwd_consecutive < GREEDY_NCONSECUTIVE) {
+	/* D. Evaluate for mismatches (all other previous querypos) */
+	nseen = 0;
+	last_tracei = -1;
+	for (p = processed; p != NULL && best_fwd_consecutive < enough_consecutive && nseen <= max_nseen;
+	     p = Intlist_next(p), nseen++) {
 
-      /* D. Evaluate for mismatches (all other previous querypos) */
-      nseen = 0;
-      for (p = processed; p != NULL && best_fwd_consecutive < enough_consecutive && nseen <= max_nseen;
-	   p = Intlist_next(p), nseen++) {
-
-	/* Making this check helps with efficiency */
-	if ((prevhit = frontier[nseen]) != -1) { /* Retrieve starting point from last hiti */
-	  prev_querypos = Intlist_head(p);
+	  /* Making this check helps with efficiency */
+	  if ((prevhit = frontier[nseen]) != -1) { /* Retrieve starting point from last hiti */
+	    prev_querypos = Intlist_head(p);
 #ifdef PMAP
-	  querydistance = (querypos - prev_querypos)*3;
+	    querydistance = (curr_querypos - prev_querypos)*3;
 #else
-	  querydistance = querypos - prev_querypos;
-#endif
-	  /* Actually a querydist_penalty */
-	  querydist_credit = -querydistance/indexsize_nt;
-
-	  prev_mappings = mappings[prev_querypos];
-	  prev_links = links[prev_querypos];
-	  prev_active = active[prev_querypos];
-
-	  /* Range 1: From Infinity to maxintronlen.  To be skipped.
-	     This is equivalent to diffdistance >= maxintronlen, where
-	     diffdistance = abs(gendistance - querydistance) and
-	     gendistance = (position - prevposition - indexsize_nt) */
-	  while (prevhit != -1 && (/*prevposition =*/ /*mappings[prev_querypos]*/prev_mappings[prevhit]) + maxintronlen + querydistance <= position) {
-	    /* Accept within range 1 (ignore) */
-	    prevhit = /*active[prev_querypos]*/prev_active[prevhit];
-	  }
-	  frontier[nseen] = prevhit;	/* Store as starting point for next hiti */
-
+	    querydistance = curr_querypos - prev_querypos;
+#endif
+	    /* Actually a querydist_penalty */
+	    querydist_credit = -querydistance/indexsize_nt;
 
-	  /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */
-	  /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */
-	  while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + EQUAL_DISTANCE_NOT_SPLICING + querydistance < position) {
-	    prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
+	    prev_mappings = mappings[prev_querypos];
+	    prev_links = links[prev_querypos];
+	    prev_active = active[prev_querypos];
 
-	    gendistance = position - prevposition;
-	    assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
-	    diffdistance = gendistance - querydistance; /* No need for abs() */
+	    /* Range 0 */
+	    while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) {
+	      debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei));
+	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+	    }
+	    if (prevhit != -1) {
+	      last_tracei = prev_links[prevhit].fwd_tracei;
+	    }
 
-	    fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/;
-	    if (splicingp == true) {
-	      fwd_score -= (diffdistance/TEN_THOUSAND + 1);
-	    } else {
-	      fwd_score -= (diffdistance/ONE + 1);
+	    /* Range 1: From Infinity to maxintronlen.  To be skipped.
+	       This is equivalent to diffdistance >= maxintronlen, where
+	       diffdistance = abs(gendistance - querydistance) and
+	       gendistance = (position - prevposition - indexsize_nt) */
+	    while (prevhit != -1 && (/*prevposition =*/ /*mappings[prev_querypos]*/prev_mappings[prevhit]) + maxintronlen + querydistance <= position) {
+	      /* Accept within range 1 (ignore) */
+	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
 	    }
+	    frontier[nseen] = prevhit;	/* Store as starting point for next hiti */
 
-	    if (use_canonical_p == true) {
-	      /* prevpos is lower genomic coordinate than currpos */
-	      /* need to subtract from position and prevposition to compensate for greedy matches */
-	      /* need to add to position and prevposition to compensate for missed matches */
-	      if (plusp == true) {
-		prevpos = chroffset + prevposition + indexsize_nt;
-		currpos = chroffset + position - querydistance + indexsize_nt;
-		if (prevpos < GREEDY_ADVANCE || currpos < GREEDY_ADVANCE) {
-		  canonicalp = false;
-		} else if (Genome_sense_canonicalp(/*donor_rightbound*/prevpos + MISS_BEHIND,
-						   /*donor_leftbound*/prevpos - GREEDY_ADVANCE,
-						   /*acceptor_rightbound*/currpos + MISS_BEHIND,
-						   /*acceptor_leftbound*/currpos - GREEDY_ADVANCE,
-						   chroffset) == true) {
-		  debug9(printf("lookback plus: sense canonical\n"));
-		  canonicalp = true;
-		} else if (Genome_antisense_canonicalp(/*donor_rightbound*/currpos + MISS_BEHIND,
-						       /*donor_leftbound*/currpos - GREEDY_ADVANCE,
-						       /*acceptor_rightbound*/prevpos + MISS_BEHIND,
-						       /*acceptor_leftbound*/prevpos - GREEDY_ADVANCE,
-						       chroffset) == true) {
-		  debug9(printf("lookback plus: antisense canonical\n"));
-		  canonicalp = true;
-		} else {
-		  debug9(printf("lookback plus: not canonical\n"));
-		  canonicalp = false;
-		}
+	    /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */
+	    /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */
+	    while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + EQUAL_DISTANCE_NOT_SPLICING + querydistance < position) {
+	      prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
+
+	      gendistance = position - prevposition;
+	      assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
+	      diffdistance = gendistance - querydistance; /* No need for abs() */
 
+	      fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/;
+	      if (splicingp == true) {
+		fwd_score -= (diffdistance/TEN_THOUSAND + 1);
 	      } else {
-		prevpos = chrhigh + 1 - prevposition - indexsize_nt;
-		currpos = chrhigh + 1 - position + querydistance - indexsize_nt;
-		if (currpos < MISS_BEHIND || prevpos < MISS_BEHIND) {
-		  canonicalp = false;
-		} else if (Genome_sense_canonicalp(/*donor_rightbound*/currpos + GREEDY_ADVANCE,
-						   /*donor_leftbound*/currpos - MISS_BEHIND,
-						   /*acceptor_rightbound*/prevpos + GREEDY_ADVANCE,
-						   /*acceptor_leftbound*/prevpos - MISS_BEHIND,
-						   chroffset) == true) {
-		  debug9(printf("lookback minus: sense canonical\n"));
-		  canonicalp = true;
-		} else if (Genome_antisense_canonicalp(/*donor_rightbound*/prevpos + GREEDY_ADVANCE,
-						       /*donor_leftbound*/prevpos - MISS_BEHIND,
-						       /*acceptor_rightbound*/currpos + GREEDY_ADVANCE,
-						       /*acceptor_leftbound*/currpos - MISS_BEHIND,
-						       chroffset) == true) {
-		  debug9(printf("lookback minus: antisense canonical\n"));
-		  canonicalp = true;
+		fwd_score -= (diffdistance/ONE + 1);
+	      }
+
+	      if (use_canonical_p == true) {
+		/* prevpos is lower genomic coordinate than currpos */
+		/* need to subtract from position and prevposition to compensate for greedy matches */
+		/* need to add to position and prevposition to compensate for missed matches */
+		if (plusp == true) {
+		  prevpos = chroffset + prevposition + indexsize_nt;
+		  currpos = chroffset + position - querydistance + indexsize_nt;
+		  if (prevpos < GREEDY_ADVANCE || currpos < GREEDY_ADVANCE) {
+		    canonicalp = false;
+		  } else if (Genome_sense_canonicalp(/*donor_rightbound*/prevpos + MISS_BEHIND,
+						     /*donor_leftbound*/prevpos - GREEDY_ADVANCE,
+						     /*acceptor_rightbound*/currpos + MISS_BEHIND,
+						     /*acceptor_leftbound*/currpos - GREEDY_ADVANCE,
+						     chroffset) == true) {
+		    debug9(printf("lookback plus: sense canonical\n"));
+		    canonicalp = true;
+		  } else if (Genome_antisense_canonicalp(/*donor_rightbound*/currpos + MISS_BEHIND,
+							 /*donor_leftbound*/currpos - GREEDY_ADVANCE,
+							 /*acceptor_rightbound*/prevpos + MISS_BEHIND,
+							 /*acceptor_leftbound*/prevpos - GREEDY_ADVANCE,
+							 chroffset) == true) {
+		    debug9(printf("lookback plus: antisense canonical\n"));
+		    canonicalp = true;
+		  } else {
+		    debug9(printf("lookback plus: not canonical\n"));
+		    canonicalp = false;
+		  }
+
 		} else {
-		  debug9(printf("lookback minus: not canonical\n"));
-		  canonicalp = false;
+		  prevpos = chrhigh + 1 - prevposition - indexsize_nt;
+		  currpos = chrhigh + 1 - position + querydistance - indexsize_nt;
+		  if (currpos < MISS_BEHIND || prevpos < MISS_BEHIND) {
+		    canonicalp = false;
+		  } else if (Genome_sense_canonicalp(/*donor_rightbound*/currpos + GREEDY_ADVANCE,
+						     /*donor_leftbound*/currpos - MISS_BEHIND,
+						     /*acceptor_rightbound*/prevpos + GREEDY_ADVANCE,
+						     /*acceptor_leftbound*/prevpos - MISS_BEHIND,
+						     chroffset) == true) {
+		    debug9(printf("lookback minus: sense canonical\n"));
+		    canonicalp = true;
+		  } else if (Genome_antisense_canonicalp(/*donor_rightbound*/prevpos + GREEDY_ADVANCE,
+							 /*donor_leftbound*/prevpos - MISS_BEHIND,
+							 /*acceptor_rightbound*/currpos + GREEDY_ADVANCE,
+							 /*acceptor_leftbound*/currpos - MISS_BEHIND,
+							 chroffset) == true) {
+		    debug9(printf("lookback minus: antisense canonical\n"));
+		    canonicalp = true;
+		  } else {
+		    debug9(printf("lookback minus: not canonical\n"));
+		    canonicalp = false;
+		  }
 		}
-	      }
 
-	      if (canonicalp == true) {
-		debug9(canonicalsgn = +1);
-	      } else {
-		debug9(canonicalsgn = 0);
-		fwd_score -= non_canonical_penalty;
+		if (canonicalp == true) {
+		  debug9(canonicalsgn = +1);
+		} else {
+		  debug9(canonicalsgn = 0);
+		  fwd_score -= non_canonical_penalty;
+		}
 	      }
-	    }
 
-	    debug9(printf("\tD2, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
-			  hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
-			  prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
-			  best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
-			  gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
+	      debug9(printf("\tD2, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
+			    hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
+			    fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
+			    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
+			    gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
 	    
-	    /* Disallow ties, which should favor adjacent */
-	    if (fwd_score > best_fwd_score) {
-	      if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
-		best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
-	      } else {
-		best_fwd_consecutive = 0;
-	      }
-	      best_fwd_rootposition = prevlink->fwd_rootposition;
-	      best_fwd_score = fwd_score;
-	      best_fwd_prevpos = prev_querypos;
-	      best_fwd_prevhit = prevhit;
+	      /* Disallow ties, which should favor adjacent */
+	      if (fwd_score > best_fwd_score) {
+		if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
+		  best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
+		} else {
+		  best_fwd_consecutive = 0;
+		}
+		best_fwd_rootposition = prevlink->fwd_rootposition;
+		best_fwd_score = fwd_score;
+		best_fwd_prevpos = prev_querypos;
+		best_fwd_prevhit = prevhit;
+		best_fwd_tracei = ++*fwd_tracei;
 #ifdef DEBUG9
-	      best_fwd_tracei = ++*fwd_tracei;
-	      best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
-	      best_fwd_intronnrev = prevlink->fwd_intronnrev;
-	      best_fwd_intronnunk = prevlink->fwd_intronnunk;
-	      switch (canonicalsgn) {
-	      case 1: best_fwd_intronnfwd++; break;
-	      case 0: best_fwd_intronnunk++; break;
-	      }
+		best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
+		best_fwd_intronnrev = prevlink->fwd_intronnrev;
+		best_fwd_intronnunk = prevlink->fwd_intronnunk;
+		switch (canonicalsgn) {
+		case 1: best_fwd_intronnfwd++; break;
+		case 0: best_fwd_intronnunk++; break;
+		}
 #endif
-	      debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
-	    } else {
-	      debug9(printf(" => Loses to %d\n",best_fwd_score));
+		debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
+	      } else {
+		debug9(printf(" => Loses to %d\n",best_fwd_score));
+	      }
+	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
 	    }
 
-	    prevhit = /*active[prev_querypos]*/prev_active[prevhit];
-	  }
 
+	    /* Scoring appears to be the same as for range 4, which is rarely called, so including in range 4 */
+	    /* Range 3: From (querypos + EQUAL_DISTANCE_NOT_SPLICING) to (querypos - EQUAL_DISTANCE_NOT_SPLICING) */
+	    /* This is equivalent to -diffdistance > EQUAL_DISTANCE_NOT_SPLICING && prevposition + indexsize_nt <= position */
 
-	  /* Scoring appears to be the same as for range 4, which is rarely called, so including in range 4 */
-	  /* Range 3: From (querypos + EQUAL_DISTANCE_NOT_SPLICING) to (querypos - EQUAL_DISTANCE_NOT_SPLICING) */
-	  /* This is equivalent to -diffdistance > EQUAL_DISTANCE_NOT_SPLICING && prevposition + indexsize_nt <= position */
 
+	    /* Range 4: From (prev_querypos - EQUAL_DISTANCE_NOT_SPLICING) to indexsize_nt */
+	    while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + indexsize_nt <= position) {
+	      prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
 
-	  /* Range 4: From (prev_querypos - EQUAL_DISTANCE_NOT_SPLICING) to indexsize_nt */
-	  while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + indexsize_nt <= position) {
-	    prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
-
-	    gendistance = position - prevposition;
-	    /* was abs(gendistance - querydistance) */
-	    diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance);
+	      gendistance = position - prevposition;
+	      /* was abs(gendistance - querydistance) */
+	      diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance);
 
 #ifdef BAD_GMAX
-	    fwd_score = prevlink->fwd_score + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/;
+	      fwd_score = prevlink->fwd_score + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/;
 #else
-	    /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
-	    /* This is how version 2013-08-14 did it */
-	    fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH;
+	      /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
+	      /* This is how version 2013-08-14 did it */
+	      fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH;
 #endif
 	  
-	    debug9(printf("\tD4, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
-			  hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
-			  prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
-			  best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
-			  gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
+	      debug9(printf("\tD4, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
+			    hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
+			    fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
+			    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
+			    gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
 	    
-	    /* Disallow ties, which should favor adjacent */
-	    if (fwd_score > best_fwd_score) {
-	      if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
-		best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
-	      } else {
-		best_fwd_consecutive = 0;
-	      }
-	      best_fwd_rootposition = prevlink->fwd_rootposition;
-	      best_fwd_score = fwd_score;
-	      best_fwd_prevpos = prev_querypos;
-	      best_fwd_prevhit = prevhit;
+	      /* Disallow ties, which should favor adjacent */
+	      if (fwd_score > best_fwd_score) {
+		if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
+		  best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
+		} else {
+		  best_fwd_consecutive = 0;
+		}
+		best_fwd_rootposition = prevlink->fwd_rootposition;
+		best_fwd_score = fwd_score;
+		best_fwd_prevpos = prev_querypos;
+		best_fwd_prevhit = prevhit;
+		/* best_fwd_tracei = ++*fwd_tracei; */
+		best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
 #ifdef DEBUG9
-	      /* best_fwd_tracei = ++*fwd_tracei; */
-	      best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
-	      best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
-	      best_fwd_intronnrev = prevlink->fwd_intronnrev;
-	      best_fwd_intronnunk = prevlink->fwd_intronnunk;
-	      switch (canonicalsgn) {
-	      case 1: best_fwd_intronnfwd++; break;
-	      case 0: best_fwd_intronnunk++; break;
-	      }
+		best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
+		best_fwd_intronnrev = prevlink->fwd_intronnrev;
+		best_fwd_intronnunk = prevlink->fwd_intronnunk;
+		switch (canonicalsgn) {
+		case 1: best_fwd_intronnfwd++; break;
+		case 0: best_fwd_intronnunk++; break;
+		}
 #endif
-	      debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
-	    } else {
-	      debug9(printf(" => Loses to %d\n",best_fwd_score));
-	    }
+		debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
+	      } else {
+		debug9(printf(" => Loses to %d\n",best_fwd_score));
+	      }
 
-	    prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+	    }
 	  }
 	}
       }
@@ -1753,23 +1924,23 @@ score_querypos_lookback_mult (
 	 small local extension from beating a good canonical intron.
 	 If querypos is too small, don't insert an intron.  */
       /* linksconsecutive already assigned above */
-      currlink = &(links[querypos][hiti + low_hit]);
+      currlink = &(links[curr_querypos][hiti + low_hit]);
       currlink->fwd_consecutive = best_fwd_consecutive;
       currlink->fwd_rootposition = best_fwd_rootposition;
       currlink->fwd_pos = best_fwd_prevpos;
       currlink->fwd_hit = best_fwd_prevhit;
       if (currlink->fwd_pos >= 0) {
-	debug9(currlink->fwd_tracei = best_fwd_tracei);
-	currlink->fwd_score = best_fwd_score;
+	currlink->fwd_tracei = best_fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score;
       } else if (anchoredp == true) {
-	debug9(currlink->fwd_tracei = -1);
-	currlink->fwd_score = -100000;
+	currlink->fwd_tracei = -1;
+	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
       } else if (localp == true) {
-	debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	currlink->fwd_score = indexsize_nt;
+	currlink->fwd_tracei = ++*fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
       } else {
-	debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	currlink->fwd_score = best_fwd_score;
+	currlink->fwd_tracei = ++*fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score;
       }
 
 #ifdef DEBUG9
@@ -1779,7 +1950,7 @@ score_querypos_lookback_mult (
 #endif
 
       debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n",
-		    currlink->fwd_pos,currlink->fwd_hit,currlink->fwd_score,currlink->fwd_tracei));
+		    currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_tracei));
       debug3(printf("%d %d  %d %d  1\n",querypos,hit,best_prevpos,best_prevhit));
     }
 
@@ -1791,14 +1962,10 @@ score_querypos_lookback_mult (
 
 
 static void
-score_querypos_lookforward_one (
-#ifdef DEBUG9
-				int *fwd_tracei,
-#endif
-				Link_T currlink, int querypos,
+score_querypos_lookforward_one (int *fwd_tracei, Link_T currlink, int curr_querypos, int currhit,
 				int querystart, int queryend, unsigned int position,
-				struct Link_T **links, Chrpos_T **mappings,
-				int **active, int *firstactive,
+				struct Link_T **links, int **fwd_scores,
+				Chrpos_T **mappings, int **active, int *firstactive,
 				Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
 				int indexsize, Intlist_T processed,
 				bool anchoredp, bool localp, bool splicingp,
@@ -1812,8 +1979,8 @@ score_querypos_lookforward_one (
   int best_fwd_rootposition = position;
   int best_fwd_score = 0, fwd_score;
   int best_fwd_prevpos = -1, best_fwd_prevhit = -1;
+  int best_fwd_tracei, last_tracei;
 #ifdef DEBUG9
-  int best_fwd_tracei;
   int best_fwd_intronnfwd = 0, best_fwd_intronnrev = 0, best_fwd_intronnunk = 0;
   int canonicalsgn = 0;
 #endif
@@ -1852,9 +2019,9 @@ score_querypos_lookforward_one (
     prev_active = active[prev_querypos];
 
 #ifdef PMAP
-    querydistance = (prev_querypos - querypos)*3;
+    querydistance = (prev_querypos - curr_querypos)*3;
 #else
-    querydistance = prev_querypos - querypos;
+    querydistance = prev_querypos - curr_querypos;
 #endif
     prevhit = firstactive[prev_querypos];
     prevposition = position;	/* Prevents prevposition == position + querydistance */
@@ -1866,12 +2033,12 @@ score_querypos_lookforward_one (
       best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
       /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */
       best_fwd_rootposition = prevlink->fwd_rootposition;
-      best_fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*querydistance;
+      best_fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*querydistance;
       
       best_fwd_prevpos = prev_querypos;
       best_fwd_prevhit = prevhit;
-#ifdef DEBUG9
       best_fwd_tracei = prevlink->fwd_tracei;
+#ifdef DEBUG9
       best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
       best_fwd_intronnrev = prevlink->fwd_intronnrev;
       best_fwd_intronnunk = prevlink->fwd_intronnunk;
@@ -1882,21 +2049,21 @@ score_querypos_lookforward_one (
       lookback = sufflookback/2;
 
       debug9(printf("\tA. Adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
-		    prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],prevlink->fwd_score,
+		    prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],fwd_scores[prev_querypos][prevhit],
 		    best_fwd_score,best_fwd_consecutive,best_fwd_tracei,
 		    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk));
     }
   }
 
   /* Check work list */
-  if (anchoredp && querypos + indexsize_query >= queryend) {
+  if (anchoredp && curr_querypos + indexsize_query >= queryend) {
     /* Allow close prevpositions that overlap with anchor */
     /* Can give rise to false positives, and increases amount of dynamic programming work */
     debug9(printf("No skipping because close to anchor\n"));
-  } else if (0 && anchoredp && querypos == querystart) {
+  } else if (0 && anchoredp && curr_querypos == querystart) {
     /* Test end position */
   } else {
-    while (processed != NULL && (prev_querypos = Intlist_head(processed)) < querypos + indexsize_query) {
+    while (processed != NULL && (prev_querypos = Intlist_head(processed)) < curr_querypos + indexsize_query) {
       debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos));
       processed = Intlist_next(processed);
     }
@@ -1905,14 +2072,15 @@ score_querypos_lookforward_one (
   /* D. Evaluate for mismatches (all other previous querypos) */
   donep = false;
   nseen = 0; 
+  last_tracei = -1;
   for ( ; processed != NULL && best_fwd_consecutive < enough_consecutive && donep == false;
 	processed = Intlist_next(processed), nseen++) {
     prev_querypos = Intlist_head(processed);
 
 #ifdef PMAP
-    querydistance = (prev_querypos - querypos)*3;
+    querydistance = (prev_querypos - curr_querypos)*3;
 #else
-    querydistance = prev_querypos - querypos;
+    querydistance = prev_querypos - curr_querypos;
 #endif
 
     if (nseen > nlookback && querydistance - indexsize_nt > lookback) {
@@ -1928,6 +2096,15 @@ score_querypos_lookforward_one (
       prev_links = links[prev_querypos];
       prev_active = active[prev_querypos];
 
+      /* Range 0 */
+      while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) {
+	debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei));
+	prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+      }
+      if (prevhit != -1) {
+	last_tracei = prev_links[prevhit].fwd_tracei;
+      }
+
       /* Range 1: From Infinity to maxintronlen */
       if (splicingp == true) {
 	/* This is equivalent to diffdistance >= maxintronlen, where
@@ -1950,7 +2127,7 @@ score_querypos_lookforward_one (
 	assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
 	diffdistance = gendistance - querydistance; /* No need for abs() */
 
-	fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/;
+	fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/;
 	if (splicingp == true) {
 	  fwd_score -= (diffdistance/TEN_THOUSAND + 1);
 	} else {
@@ -2021,7 +2198,7 @@ score_querypos_lookforward_one (
 
 	debug9(printf("\tD2. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
 		      prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
-		      prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
+		      fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
 		      gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
 	    
@@ -2038,8 +2215,8 @@ score_querypos_lookforward_one (
 	  best_fwd_score = fwd_score;
 	  best_fwd_prevpos = prev_querypos;
 	  best_fwd_prevhit = prevhit;
-#ifdef DEBUG9
 	  best_fwd_tracei = ++*fwd_tracei;
+#ifdef DEBUG9
 	  best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
 	  best_fwd_intronnrev = prevlink->fwd_intronnrev;
 	  best_fwd_intronnunk = prevlink->fwd_intronnunk;
@@ -2074,7 +2251,7 @@ score_querypos_lookforward_one (
 #else
 	/* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
 	/* This is how version 2013-08-14 did it */
-	fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH;
+	fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH;
 #endif
 #if 0
 	if (/*near_end_p == false &&*/ prevlink->fwd_consecutive < EXON_DEFN) {
@@ -2084,7 +2261,7 @@ score_querypos_lookforward_one (
 
 	debug9(printf("\tD4. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
 		      prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
-		      prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
+		      fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
 		      gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
 	    
@@ -2101,9 +2278,9 @@ score_querypos_lookforward_one (
 	  best_fwd_score = fwd_score;
 	  best_fwd_prevpos = prev_querypos;
 	  best_fwd_prevhit = prevhit;
-#ifdef DEBUG9
 	  /* best_fwd_tracei = ++*fwd_tracei; */
 	  best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
+#ifdef DEBUG9
 	  best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
 	  best_fwd_intronnrev = prevlink->fwd_intronnrev;
 	  best_fwd_intronnunk = prevlink->fwd_intronnunk;
@@ -2132,17 +2309,17 @@ score_querypos_lookforward_one (
   currlink->fwd_pos = best_fwd_prevpos;
   currlink->fwd_hit = best_fwd_prevhit;
   if (currlink->fwd_pos >= 0) {
-    debug9(currlink->fwd_tracei = best_fwd_tracei);
-    currlink->fwd_score = best_fwd_score;
+    currlink->fwd_tracei = best_fwd_tracei;
+    fwd_scores[curr_querypos][currhit] = best_fwd_score;
   } else if (anchoredp == true) {
-    debug9(currlink->fwd_tracei = -1);
-    currlink->fwd_score = -100000;
+    currlink->fwd_tracei = -1;
+    fwd_scores[curr_querypos][currhit] = -100000;
   } else if (localp == true) {
-    debug9(currlink->fwd_tracei = ++*fwd_tracei);
-    currlink->fwd_score = indexsize_nt;
+    currlink->fwd_tracei = ++*fwd_tracei;
+    fwd_scores[curr_querypos][currhit] = indexsize_nt;
   } else {
-    debug9(currlink->fwd_tracei = ++*fwd_tracei);
-    currlink->fwd_score = best_fwd_score;
+    currlink->fwd_tracei = ++*fwd_tracei;
+    fwd_scores[curr_querypos][currhit] = best_fwd_score;
   }
 
 #ifdef DEBUG9
@@ -2152,7 +2329,7 @@ score_querypos_lookforward_one (
 #endif
 
   debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n",
-		currlink->fwd_pos,currlink->fwd_hit,currlink->fwd_score,currlink->fwd_tracei));
+		currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][currhit],currlink->fwd_tracei));
   debug3(printf("%d %d  %d %d  1\n",querypos,hit,best_prevpos,best_prevhit));
 
   return;
@@ -2160,14 +2337,10 @@ score_querypos_lookforward_one (
 
 
 static void
-score_querypos_lookforward_mult (
-#ifdef DEBUG9
-				 int *fwd_tracei,
-#endif
-				 int low_hit, int high_hit,
-				 int querypos, int querystart, int queryend, unsigned int *positions,
-				 struct Link_T **links, Chrpos_T **mappings,
-				 int **active, int *firstactive,
+score_querypos_lookforward_mult (int *fwd_tracei, int low_hit, int high_hit, int curr_querypos,
+				 int querystart, int queryend, unsigned int *positions,
+				 struct Link_T **links, int **fwd_scores,
+				 Chrpos_T **mappings, int **active, int *firstactive,
 				 Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp,
 				 int indexsize, Intlist_T processed,
 				 bool anchoredp, bool localp, bool splicingp,
@@ -2180,12 +2353,12 @@ score_querypos_lookforward_mult (
   Chrpos_T *prev_mappings, *adj_mappings;
   int *prev_active, *adj_active;
 
-  int best_fwd_consecutive;
+  int overall_fwd_consecutive, best_fwd_consecutive;
   int best_fwd_rootposition;
   int best_fwd_score, fwd_score;
   int best_fwd_prevpos, best_fwd_prevhit;
+  int best_fwd_tracei, last_tracei;
 #ifdef DEBUG9
-  int best_fwd_tracei;
   int best_fwd_intronnfwd, best_fwd_intronnrev, best_fwd_intronnunk;
   int canonicalsgn = 0;
 #endif
@@ -2210,14 +2383,14 @@ score_querypos_lookforward_mult (
   /* Determine work load */
   /* printf("Work load (lookforward): %s\n",Intlist_to_string(processed)); */
   last_item = processed;
-  if (anchoredp && querypos + indexsize_query >= queryend) {
+  if (anchoredp && curr_querypos + indexsize_query >= queryend) {
     /* Allow close prevpositions that overlap with anchor */
     /* Can give rise to false positives, and increases amount of dynamic programming work */
     /* debug9(printf("No skipping because close to anchor\n")); */
-  } else if (0 && anchoredp && querypos == querystart) {
+  } else if (0 && anchoredp && curr_querypos == querystart) {
     /* Test end position */
   } else {
-    while (processed != NULL && (prev_querypos = Intlist_head(processed)) < querypos + indexsize_query) {
+    while (processed != NULL && (prev_querypos = Intlist_head(processed)) < curr_querypos + indexsize_query) {
       debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos));
       processed = Intlist_next(processed);
     }
@@ -2225,7 +2398,7 @@ score_querypos_lookforward_mult (
 
   if (last_item == NULL) {
     for (hiti = nhits - 1; hiti >= 0; hiti--) {
-      currlink = &(links[querypos][hiti + low_hit]);
+      currlink = &(links[curr_querypos][hiti + low_hit]);
 
       currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
       currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
@@ -2233,13 +2406,13 @@ score_querypos_lookforward_mult (
       currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
 
       if (anchoredp == true) {
-	debug9(currlink->fwd_tracei = -1);
-	currlink->fwd_score = -100000;
+	currlink->fwd_tracei = -1;
+	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
       } else if (localp == true) {
-	debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	currlink->fwd_score = indexsize_nt;
+	currlink->fwd_tracei = ++*fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
       } else {
-	currlink->fwd_score = /*best_fwd_score =*/ 0;
+	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
       }
     }
 
@@ -2251,9 +2424,9 @@ score_querypos_lookforward_mult (
     adj_active = active[adj_querypos];
 
 #ifdef PMAP
-    adj_querydistance = (adj_querypos - querypos)*3;
+    adj_querydistance = (adj_querypos - curr_querypos)*3;
 #else
-    adj_querydistance = adj_querypos - querypos;
+    adj_querydistance = adj_querypos - curr_querypos;
 #endif
 
     /* Process prevhit and hiti in parallel.  Values are descending along prevhit chain and from nhits-1 to 0. */
@@ -2265,7 +2438,7 @@ score_querypos_lookforward_mult (
 
       } else if (prevposition < position + adj_querydistance) {
 	/* Adjacent position not found for hiti */
-	currlink = &(links[querypos][hiti + low_hit]);
+	currlink = &(links[curr_querypos][hiti + low_hit]);
 
 	currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
 	currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
@@ -2273,32 +2446,32 @@ score_querypos_lookforward_mult (
 	currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
 
 	if (anchoredp == true) {
-	  debug9(currlink->fwd_tracei = -1);
-	  currlink->fwd_score = -100000;
+	  currlink->fwd_tracei = -1;
+	  fwd_scores[curr_querypos][hiti + low_hit] = -100000;
 	} else if (localp == true) {
-	  debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	  currlink->fwd_score = indexsize_nt;
+	  currlink->fwd_tracei = ++*fwd_tracei;
+	  fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
 	} else {
-	  currlink->fwd_score = /*best_fwd_score =*/ 0;
+	  fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
 	}
 
 	hiti--;
 	
       } else {
 	/* Adjacent position found for hiti */
-	currlink = &(links[querypos][hiti + low_hit]);
+	currlink = &(links[curr_querypos][hiti + low_hit]);
 	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
 
 	currlink->fwd_consecutive = /*best_fwd_consecutive =*/ prevlink->fwd_consecutive + adj_querydistance;
 	currlink->fwd_rootposition = /*best_fwd_rootposition =*/ prevlink->fwd_rootposition;
 	currlink->fwd_pos = /*best_fwd_prevpos =*/ adj_querypos;
 	currlink->fwd_hit = /*best_fwd_prevhit =*/ prevhit;
-	currlink->fwd_score = /*best_fwd_score =*/ prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*adj_querydistance;
+	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance;
 
 #ifdef DEBUG9
-	printf("\tA. For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
-	       hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],prevlink->fwd_score,
-	       currlink->fwd_score,currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
+	printf("\tA(3). For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
+	       hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit],
+	       fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
 	       /*best_fwd_intronnfwd*/prevlink->fwd_intronnfwd,
 	       /*best_fwd_intronnrev*/prevlink->fwd_intronnrev,
 	       /*best_fwd_intronnunk*/prevlink->fwd_intronnunk);
@@ -2311,7 +2484,7 @@ score_querypos_lookforward_mult (
 
     while (hiti >= 0) {
       /* Adjacent position not found for hiti */
-      currlink = &(links[querypos][hiti + low_hit]);
+      currlink = &(links[curr_querypos][hiti + low_hit]);
 
       currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH;
       currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti];
@@ -2319,13 +2492,13 @@ score_querypos_lookforward_mult (
       currlink->fwd_hit = /*best_fwd_prevhit =*/ -1;
 
       if (anchoredp == true) {
-	debug9(currlink->fwd_tracei = -1);
-	currlink->fwd_score = -100000;
+	currlink->fwd_tracei = -1;
+	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
       } else if (localp == true) {
-	debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	currlink->fwd_score = indexsize_nt;
+	currlink->fwd_tracei = ++*fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
       } else {
-	currlink->fwd_score = /*best_fwd_score =*/ 0;
+	fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0;
       }
 
       hiti--;
@@ -2338,11 +2511,10 @@ score_querypos_lookforward_mult (
     adj_active = active[adj_querypos];
 
 #ifdef PMAP
-    adj_querydistance = (adj_querypos - querypos)*3;
+    adj_querydistance = (adj_querypos - curr_querypos)*3;
 #else
-    adj_querydistance = adj_querypos - querypos;
+    adj_querydistance = adj_querypos - curr_querypos;
 #endif
-    adj_frontier = firstactive[adj_querypos];
 
     nprocessed = Intlist_length(processed);
     frontier = (int *) MALLOCA(nprocessed * sizeof(int));
@@ -2351,7 +2523,7 @@ score_querypos_lookforward_mult (
     for (p = processed; p != NULL; p = Intlist_next(p)) {
       prev_querypos = Intlist_head(p);
 
-      querydistance = prev_querypos - querypos;
+      querydistance = prev_querypos - curr_querypos;
       if (nseen <= /*nlookback*/1 || querydistance - indexsize_nt <= /*lookback*/sufflookback/2) {
 	max_adjacent_nseen = nseen;
       }
@@ -2362,6 +2534,34 @@ score_querypos_lookforward_mult (
       frontier[nseen++] = firstactive[prev_querypos];
     }
 
+
+    /* Look for overall_fwd_consecutive to see whether we can be greedy */
+    overall_fwd_consecutive = 0;
+    adj_frontier = firstactive[adj_querypos];
+    for (hiti = nhits - 1; hiti >= 0; hiti--) {
+      position = positions[hiti];
+
+      /* A. Evaluate adjacent position (at last one processed) */
+      prevhit = adj_frontier;	/* Get information from last hiti */
+      prevposition = position;	/* Prevents prevposition == position + adj_querydistance */
+      while (prevhit != -1 && (prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) > position + adj_querydistance) {
+	prevhit = /*active[adj_querypos]*/adj_active[prevhit];
+      }
+      adj_frontier = prevhit;	/* Save information for next hiti */
+
+      if (prevposition == position + adj_querydistance) {
+	/* Adjacent found */
+	prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]);
+	if (prevlink->fwd_consecutive + adj_querydistance > overall_fwd_consecutive) {
+	  overall_fwd_consecutive = prevlink->fwd_consecutive + adj_querydistance;
+	}
+      }
+    }
+    debug(printf("Overall fwd consecutive is %d\n",overall_fwd_consecutive));
+
+
+    /* Now process */
+    adj_frontier = firstactive[adj_querypos];
     for (hiti = nhits - 1; hiti >= 0; hiti--) {
       position = positions[hiti];
 
@@ -2381,17 +2581,17 @@ score_querypos_lookforward_mult (
 	best_fwd_rootposition = prevlink->fwd_rootposition;
 	best_fwd_prevpos = adj_querypos;
 	best_fwd_prevhit = prevhit;
-	best_fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*adj_querydistance;
+	best_fwd_score = fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance;
 	max_nseen = max_adjacent_nseen;	/* Look not so far back */
+	best_fwd_tracei = prevlink->fwd_tracei;
 
 #ifdef DEBUG9
-	best_fwd_tracei = prevlink->fwd_tracei;
 	best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
 	best_fwd_intronnrev = prevlink->fwd_intronnrev;
 	best_fwd_intronnunk = prevlink->fwd_intronnunk;
 #endif
-	debug9(printf("\tA. For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
-		      hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],prevlink->fwd_score,
+	debug9(printf("\tA(4). For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n",
+		      hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit],
 		      best_fwd_score,best_fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei,
 		      best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk));
       } else {
@@ -2402,214 +2602,227 @@ score_querypos_lookforward_mult (
 	best_fwd_prevhit = -1;
 	best_fwd_score = 0;
 	max_nseen = max_nonadjacent_nseen; /* Look farther back */
+	best_fwd_tracei = -1;
 
 #ifdef DEBUG9
-	best_fwd_tracei = -1;
 	best_fwd_intronnfwd = 0;
 	best_fwd_intronnrev = 0;
 	best_fwd_intronnunk = 0;
 #endif
       }
 
+      if (overall_fwd_consecutive < GREEDY_NCONSECUTIVE) {
+	/* D. Evaluate for mismatches (all other previous querypos) */
+	nseen = 0;
+	last_tracei = -1;
+	for (p = processed; p != NULL && best_fwd_consecutive < enough_consecutive && nseen <= max_nseen;
+	     p = Intlist_next(p), nseen++) {
 
-      /* D. Evaluate for mismatches (all other previous querypos) */
-      nseen = 0;
-      for (p = processed; p != NULL && best_fwd_consecutive < enough_consecutive && nseen <= max_nseen;
-	   p = Intlist_next(p), nseen++) {
-	/* Making this check helps with efficiency */
-	if ((prevhit = frontier[nseen]) != -1) {	/* Retrieve starting point from last hiti */
-	  prev_querypos = Intlist_head(p);
+	  /* Making this check helps with efficiency */
+	  if ((prevhit = frontier[nseen]) != -1) {	/* Retrieve starting point from last hiti */
+	    prev_querypos = Intlist_head(p);
 #ifdef PMAP
-	  querydistance = (prev_querypos - querypos)*3;
+	    querydistance = (prev_querypos - curr_querypos)*3;
 #else
-	  querydistance = prev_querypos - querypos;
-#endif
-	  /* Actually a querydist_penalty */
-	  querydist_credit = -querydistance/indexsize_nt;
-
-	  prev_mappings = mappings[prev_querypos];
-	  prev_links = links[prev_querypos];
-	  prev_active = active[prev_querypos];
-
-	  /* Range 1: From Infinity to maxintronlen.  To be skipped.
-	     This is equivalent to diffdistance >= maxintronlen, where
-	     diffdistance = abs(gendistance - querydistance) and
-	     gendistance = (position - prevposition - indexsize_nt) */
-	  while (prevhit != -1 && (/*prevposition =*/ /*mappings[prev_querypos]*/prev_mappings[prevhit]) >= position + maxintronlen + querydistance) {
-	    /* Accept within range 1 (ignore) */
-	    prevhit = /*active[prev_querypos]*/prev_active[prevhit];
-	  }
-	  frontier[nseen] = prevhit;	/* Store as starting point for next hiti */
-    
-
-	  /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */
-	  /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */
-	  while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) > position + EQUAL_DISTANCE_NOT_SPLICING + querydistance) {
-	    prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
-
-	    gendistance = prevposition - position;
-	    assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
-	    diffdistance = gendistance - querydistance; /* No need for abs() */
-
-	    fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/;
-	    if (splicingp == true) {
-	      fwd_score -= (diffdistance/TEN_THOUSAND + 1);
-	    } else {
-	      fwd_score -= (diffdistance/ONE + 1);
+	    querydistance = prev_querypos - curr_querypos;
+#endif
+	    /* Actually a querydist_penalty */
+	    querydist_credit = -querydistance/indexsize_nt;
+	    
+	    prev_mappings = mappings[prev_querypos];
+	    prev_links = links[prev_querypos];
+	    prev_active = active[prev_querypos];
+
+	    /* Range 0 */
+	    while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) {
+	      debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei));
+	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+	    }
+	    if (prevhit != -1) {
+	      last_tracei = prev_links[prevhit].fwd_tracei;
 	    }
 
-	    if (use_canonical_p == true) {
-	      /* prevpos is higher genomic coordinate than currpos */
-	      /* need to add to position and prevposition to compensate for greedy matches */
-	      /* need to subtract from position and prevposition to compensate for missed matches */
-	      if (plusp == true) {
-		prevpos = chroffset + prevposition;
-		currpos = chroffset + position + querydistance;
-		if (currpos < MISS_BEHIND || prevpos < MISS_BEHIND) {
-		  canonicalp = false;
-		} else if (Genome_sense_canonicalp(/*donor_rightbound*/currpos + GREEDY_ADVANCE,
-						   /*donor_leftbound*/currpos - MISS_BEHIND,
-						   /*acceptor_rightbound*/prevpos + GREEDY_ADVANCE,
-						   /*acceptor_leftbound*/prevpos - MISS_BEHIND,
-						   chroffset) == true) {
-		  debug9(printf("lookforward plus: sense canonical\n"));
-		  canonicalp = true;
-		} else if (Genome_antisense_canonicalp(/*donor_rightbound*/prevpos + GREEDY_ADVANCE,
-						       /*donor_leftbound*/prevpos - MISS_BEHIND,
-						       /*acceptor_rightbound*/currpos + GREEDY_ADVANCE,
-						       /*acceptor_leftbound*/currpos - MISS_BEHIND,
-						       chroffset) == true) {
-		  debug9(printf("lookforward plus: antisense canonical\n"));
-		  canonicalp = true;
-		} else {
-		  debug9(printf("lookforward plus: not canonical\n"));
-		  canonicalp = false;
-		}
+	    /* Range 1: From Infinity to maxintronlen.  To be skipped.
+	       This is equivalent to diffdistance >= maxintronlen, where
+	       diffdistance = abs(gendistance - querydistance) and
+	       gendistance = (position - prevposition - indexsize_nt) */
+	    while (prevhit != -1 && (/*prevposition =*/ /*mappings[prev_querypos]*/prev_mappings[prevhit]) >= position + maxintronlen + querydistance) {
+	      /* Accept within range 1 (ignore) */
+	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+	    }
+	    frontier[nseen] = prevhit;	/* Store as starting point for next hiti */
+	    
+	    
+	    /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */
+	    /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */
+	    while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) > position + EQUAL_DISTANCE_NOT_SPLICING + querydistance) {
+	      prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
 	      
+	      gendistance = prevposition - position;
+	      assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */
+	      diffdistance = gendistance - querydistance; /* No need for abs() */
+	      
+	      fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/;
+	      if (splicingp == true) {
+		fwd_score -= (diffdistance/TEN_THOUSAND + 1);
 	      } else {
-		prevpos = chrhigh + 1 - prevposition;
-		currpos = chrhigh + 1 - position - querydistance;
-		if (prevpos < GREEDY_ADVANCE || currpos < GREEDY_ADVANCE) {
-		  canonicalp = false;
-		} else if (Genome_sense_canonicalp(/*donor_rightbound*/prevpos + MISS_BEHIND,
-						   /*donor_leftbound*/prevpos - GREEDY_ADVANCE,
-						   /*acceptor_rightbound*/currpos + MISS_BEHIND,
-						   /*acceptor_leftbound*/currpos - GREEDY_ADVANCE,
-						   chroffset) == true) {
-		  debug9(printf("lookforward minus: sense canonical\n"));
-		  canonicalp = true;
-		} else if (Genome_antisense_canonicalp(/*donor_rightbound*/currpos + MISS_BEHIND,
-						       /*donor_leftbound*/currpos - GREEDY_ADVANCE,
-						       /*acceptor_rightbound*/prevpos + MISS_BEHIND,
-						       /*acceptor_leftbound*/prevpos - GREEDY_ADVANCE,
-						       chroffset) == true) {
-		  debug9(printf("lookforward minus: antisense canonical\n"));
-		  canonicalp = true;
+		fwd_score -= (diffdistance/ONE + 1);
+	      }
+	      
+	      if (use_canonical_p == true) {
+		/* prevpos is higher genomic coordinate than currpos */
+		/* need to add to position and prevposition to compensate for greedy matches */
+		/* need to subtract from position and prevposition to compensate for missed matches */
+		if (plusp == true) {
+		  prevpos = chroffset + prevposition;
+		  currpos = chroffset + position + querydistance;
+		  if (currpos < MISS_BEHIND || prevpos < MISS_BEHIND) {
+		    canonicalp = false;
+		  } else if (Genome_sense_canonicalp(/*donor_rightbound*/currpos + GREEDY_ADVANCE,
+						     /*donor_leftbound*/currpos - MISS_BEHIND,
+						     /*acceptor_rightbound*/prevpos + GREEDY_ADVANCE,
+						     /*acceptor_leftbound*/prevpos - MISS_BEHIND,
+						     chroffset) == true) {
+		    debug9(printf("lookforward plus: sense canonical\n"));
+		    canonicalp = true;
+		  } else if (Genome_antisense_canonicalp(/*donor_rightbound*/prevpos + GREEDY_ADVANCE,
+							 /*donor_leftbound*/prevpos - MISS_BEHIND,
+							 /*acceptor_rightbound*/currpos + GREEDY_ADVANCE,
+							 /*acceptor_leftbound*/currpos - MISS_BEHIND,
+							 chroffset) == true) {
+		    debug9(printf("lookforward plus: antisense canonical\n"));
+		    canonicalp = true;
+		  } else {
+		    debug9(printf("lookforward plus: not canonical\n"));
+		    canonicalp = false;
+		  }
+	      
 		} else {
-		  debug9(printf("lookforward minus: not canonical\n"));
-		  canonicalp = false;
+		  prevpos = chrhigh + 1 - prevposition;
+		  currpos = chrhigh + 1 - position - querydistance;
+		  if (prevpos < GREEDY_ADVANCE || currpos < GREEDY_ADVANCE) {
+		    canonicalp = false;
+		  } else if (Genome_sense_canonicalp(/*donor_rightbound*/prevpos + MISS_BEHIND,
+						     /*donor_leftbound*/prevpos - GREEDY_ADVANCE,
+						     /*acceptor_rightbound*/currpos + MISS_BEHIND,
+						     /*acceptor_leftbound*/currpos - GREEDY_ADVANCE,
+						     chroffset) == true) {
+		    debug9(printf("lookforward minus: sense canonical\n"));
+		    canonicalp = true;
+		  } else if (Genome_antisense_canonicalp(/*donor_rightbound*/currpos + MISS_BEHIND,
+							 /*donor_leftbound*/currpos - GREEDY_ADVANCE,
+							 /*acceptor_rightbound*/prevpos + MISS_BEHIND,
+							 /*acceptor_leftbound*/prevpos - GREEDY_ADVANCE,
+							 chroffset) == true) {
+		    debug9(printf("lookforward minus: antisense canonical\n"));
+		    canonicalp = true;
+		  } else {
+		    debug9(printf("lookforward minus: not canonical\n"));
+		    canonicalp = false;
+		  }
 		}
-	      }
 
-	      if (canonicalp == true) {
-		debug9(canonicalsgn = +1);
-	      } else {
-		debug9(canonicalsgn = 0);
-		fwd_score -= non_canonical_penalty;
+		if (canonicalp == true) {
+		  debug9(canonicalsgn = +1);
+		} else {
+		  debug9(canonicalsgn = 0);
+		  fwd_score -= non_canonical_penalty;
+		}
 	      }
-	    }
 
-	    debug9(printf("\tD2, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
-			  hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
-			  prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
-			  best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
-			  gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
+	      debug9(printf("\tD2, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
+			    hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
+			    fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
+			    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
+			    gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
 	    
-	    /* Disallow ties, which should favor adjacent */
-	    if (fwd_score > best_fwd_score) {
-	      if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
-		best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
-	      } else {
-		best_fwd_consecutive = 0;
-	      }
-	      best_fwd_rootposition = prevlink->fwd_rootposition;
-	      best_fwd_score = fwd_score;
-	      best_fwd_prevpos = prev_querypos;
-	      best_fwd_prevhit = prevhit;
+	      /* Disallow ties, which should favor adjacent */
+	      if (fwd_score > best_fwd_score) {
+		if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
+		  best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
+		} else {
+		  best_fwd_consecutive = 0;
+		}
+		best_fwd_rootposition = prevlink->fwd_rootposition;
+		best_fwd_score = fwd_score;
+		best_fwd_prevpos = prev_querypos;
+		best_fwd_prevhit = prevhit;
+		best_fwd_tracei = ++*fwd_tracei;
 #ifdef DEBUG9
-	      best_fwd_tracei = ++*fwd_tracei;
-	      best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
-	      best_fwd_intronnrev = prevlink->fwd_intronnrev;
-	      best_fwd_intronnunk = prevlink->fwd_intronnunk;
-	      switch (canonicalsgn) {
-	      case 1: best_fwd_intronnfwd++; break;
-	      case 0: best_fwd_intronnunk++; break;
-	      }
+		best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
+		best_fwd_intronnrev = prevlink->fwd_intronnrev;
+		best_fwd_intronnunk = prevlink->fwd_intronnunk;
+		switch (canonicalsgn) {
+		case 1: best_fwd_intronnfwd++; break;
+		case 0: best_fwd_intronnunk++; break;
+		}
 #endif
-	      debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
-	    } else {
-	      debug9(printf(" => Loses to %d\n",best_fwd_score));
-	    }
+		debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
+	      } else {
+		debug9(printf(" => Loses to %d\n",best_fwd_score));
+	      }
 
-	    prevhit = /*active[prev_querypos]*/prev_active[prevhit];
-	  }
+	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+	    }
 
 
-	  /* Scoring appears to be the same as for range 4, which is rarely called, so including in range 4 */
-	  /* Range 3: From (querypos + EQUAL_DISTANCE_NOT_SPLICING) to (querypos - EQUAL_DISTANCE_NOT_SPLICING) */
-	  /* This is equivalent to -diffdistance > EQUAL_DISTANCE_NOT_SPLICING && prevposition + indexsize_nt <= position */
+	    /* Scoring appears to be the same as for range 4, which is rarely called, so including in range 4 */
+	    /* Range 3: From (querypos + EQUAL_DISTANCE_NOT_SPLICING) to (querypos - EQUAL_DISTANCE_NOT_SPLICING) */
+	    /* This is equivalent to -diffdistance > EQUAL_DISTANCE_NOT_SPLICING && prevposition + indexsize_nt <= position */
 
 
-	  /* Range 4: From (prev_querypos - EQUAL_DISTANCE_NOT_SPLICING) to indexsize_nt */
-	  while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) >= position + indexsize_nt) {
-	    prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
+	    /* Range 4: From (prev_querypos - EQUAL_DISTANCE_NOT_SPLICING) to indexsize_nt */
+	    while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) >= position + indexsize_nt) {
+	      prevlink = &(/*links[prev_querypos]*/prev_links[prevhit]);
 
-	    gendistance = prevposition - position;
-	    /* was abs(gendistance - querydistance) */
-	    diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance);
+	      gendistance = prevposition - position;
+	      /* was abs(gendistance - querydistance) */
+	      diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance);
 
 #ifdef BAD_GMAX
-	    fwd_score = prevlink->fwd_score + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/;
+	      fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/;
 #else
-	    /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
-	    /* This is how version 2013-08-14 did it */
-	    fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH;
+	      /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */
+	      /* This is how version 2013-08-14 did it */
+	      fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH;
 #endif
 
-	    debug9(printf("\tD4, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
-			  hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
-			  prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
-			  best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
-			  gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
+	      debug9(printf("\tD4, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)",
+			    hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],
+			    fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei,
+			    best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk,
+			    gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn));
 	    
-	    /* Disallow ties, which should favor adjacent */
-	    if (fwd_score > best_fwd_score) {
-	      if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
-		best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
-	      } else {
-		best_fwd_consecutive = 0;
-	      }
-	      best_fwd_rootposition = prevlink->fwd_rootposition;
-	      best_fwd_score = fwd_score;
-	      best_fwd_prevpos = prev_querypos;
-	      best_fwd_prevhit = prevhit;
+	      /* Disallow ties, which should favor adjacent */
+	      if (fwd_score > best_fwd_score) {
+		if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) {
+		  best_fwd_consecutive = prevlink->fwd_consecutive + querydistance;
+		} else {
+		  best_fwd_consecutive = 0;
+		}
+		best_fwd_rootposition = prevlink->fwd_rootposition;
+		best_fwd_score = fwd_score;
+		best_fwd_prevpos = prev_querypos;
+		best_fwd_prevhit = prevhit;
+		/* best_fwd_tracei = ++*fwd_tracei; */
+		best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
+
 #ifdef DEBUG9
-	      /* best_fwd_tracei = ++*fwd_tracei; */
-	      best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */
-	      best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
-	      best_fwd_intronnrev = prevlink->fwd_intronnrev;
-	      best_fwd_intronnunk = prevlink->fwd_intronnunk;
-	      switch (canonicalsgn) {
-	      case 1: best_fwd_intronnfwd++; break;
-	      case 0: best_fwd_intronnunk++; break;
-	      }
+		best_fwd_intronnfwd = prevlink->fwd_intronnfwd;
+		best_fwd_intronnrev = prevlink->fwd_intronnrev;
+		best_fwd_intronnunk = prevlink->fwd_intronnunk;
+		switch (canonicalsgn) {
+		case 1: best_fwd_intronnfwd++; break;
+		case 0: best_fwd_intronnunk++; break;
+		}
 #endif
-	      debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
-	    } else {
-	      debug9(printf(" => Loses to %d\n",best_fwd_score));
-	    }
+		debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive));
+	      } else {
+		debug9(printf(" => Loses to %d\n",best_fwd_score));
+	      }
 
-	    prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+	      prevhit = /*active[prev_querypos]*/prev_active[prevhit];
+	    }
 	  }
 	}
       }
@@ -2618,23 +2831,23 @@ score_querypos_lookforward_mult (
 	 small local extension from beating a good canonical intron.
 	 If querypos is too small, don't insert an intron.  */
       /* linksconsecutive already assigned above */
-      currlink = &(links[querypos][hiti + low_hit]);
+      currlink = &(links[curr_querypos][hiti + low_hit]);
       currlink->fwd_consecutive = best_fwd_consecutive;
       currlink->fwd_rootposition = best_fwd_rootposition;
       currlink->fwd_pos = best_fwd_prevpos;
       currlink->fwd_hit = best_fwd_prevhit;
       if (currlink->fwd_pos >= 0) {
-	debug9(currlink->fwd_tracei = best_fwd_tracei);
-	currlink->fwd_score = best_fwd_score;
+	currlink->fwd_tracei = best_fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score;
       } else if (anchoredp == true) {
-	debug9(currlink->fwd_tracei = -1);
-	currlink->fwd_score = -100000;
+	currlink->fwd_tracei = -1;
+	fwd_scores[curr_querypos][hiti + low_hit] = -100000;
       } else if (localp == true) {
-	debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	currlink->fwd_score = indexsize_nt;
+	currlink->fwd_tracei = ++*fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt;
       } else {
-	debug9(currlink->fwd_tracei = ++*fwd_tracei);
-	currlink->fwd_score = best_fwd_score;
+	currlink->fwd_tracei = ++*fwd_tracei;
+	fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score;
       }
 
 #ifdef DEBUG9
@@ -2644,7 +2857,7 @@ score_querypos_lookforward_mult (
 #endif
 
       debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n",
-		    currlink->fwd_pos,currlink->fwd_hit,currlink->fwd_score,currlink->fwd_tracei));
+		    currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_tracei));
       debug3(printf("%d %d  %d %d  1\n",querypos,hit,best_prevpos,best_prevhit));
     }
 
@@ -2657,7 +2870,7 @@ score_querypos_lookforward_mult (
 
 static void
 revise_active_lookback (int **active, int *firstactive, int *nactive, 
-			int low_hit, int high_hit, struct Link_T **links, int querypos) {
+			int low_hit, int high_hit, int **fwd_scores, int querypos) {
   int best_score, threshold, score;
   int hit, *ptr;
 
@@ -2668,24 +2881,24 @@ revise_active_lookback (int **active, int *firstactive, int *nactive,
     nactive[querypos] = 0;
 
   } else {
-    debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score));
-    best_score = links[querypos][hit].fwd_score;
+    debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit]));
+    best_score = fwd_scores[querypos][hit];
 #ifdef SEPARATE_FWD_REV
-    debug6(printf(" and rev_score is %d",links[querypos][hit].rev_score));
-    if ((score = links[querypos][hit].rev_score) > best_score) {
+    debug6(printf(" and rev_score is %d",rev_scores[querypos][hit]));
+    if ((score = rev_scores[querypos][hit]) > best_score) {
       best_score = score;
     }
 #endif
     debug6(printf("\n"));
 
     for (hit++; hit < high_hit; hit++) {
-      debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score));
-      if ((score = links[querypos][hit].fwd_score) > best_score) {
+      debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit]));
+      if ((score = fwd_scores[querypos][hit]) > best_score) {
 	best_score = score;
       }
 #ifdef SEPARATE_FWD_REV
-      debug6(printf(" and rev_score is %d",links[querypos][hit].rev_score));
-      if ((score = links[querypos][hit].rev_score) > best_score) {
+      debug6(printf(" and rev_score is %d",rev_scores[querypos][hit]));
+      if ((score = rev_scores[querypos][hit]) > best_score) {
 	best_score = score;
       }
 #endif
@@ -2702,9 +2915,9 @@ revise_active_lookback (int **active, int *firstactive, int *nactive,
     ptr = &(firstactive[querypos]);
     hit = low_hit;
     while (hit < high_hit) {
-      while (hit < high_hit && links[querypos][hit].fwd_score <= threshold
+      while (hit < high_hit && fwd_scores[querypos][hit] <= threshold
 #ifdef SEPARATE_FWD_REV
-	     && links[querypos][hit].rev_score <= threshold
+	     && rev_scores[querypos][hit] <= threshold
 #endif
 	     ) {
 	hit++;
@@ -2735,7 +2948,7 @@ revise_active_lookback (int **active, int *firstactive, int *nactive,
 
 static void
 revise_active_lookforward (int **active, int *firstactive, int *nactive, 
-			   int low_hit, int high_hit, struct Link_T **links, int querypos) {
+			   int low_hit, int high_hit, int **fwd_scores, int querypos) {
   int best_score, threshold, score;
   int hit, *ptr;
 
@@ -2745,24 +2958,24 @@ revise_active_lookforward (int **active, int *firstactive, int *nactive,
     firstactive[querypos] = -1;
     nactive[querypos] = 0;
   } else {
-    debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score));
-    best_score = links[querypos][hit].fwd_score;
+    debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit]));
+    best_score = fwd_scores[querypos][hit];
 #ifdef SEPARATE_FWD_REV
-    debug6(printf(" and rev_score is %d",links[querypos][hit].rev_score));
-    if ((score = links[querypos][hit].rev_score) > best_score) {
+    debug6(printf(" and rev_score is %d",rev_scores[querypos][hit]));
+    if ((score = rev_scores[querypos][hit]) > best_score) {
       best_score = score;
     }
 #endif
     debug6(printf("\n"));
 
     for (--hit; hit >= low_hit; --hit) {
-      debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score));
-      if ((score = links[querypos][hit].fwd_score) > best_score) {
+      debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit]));
+      if ((score = fwd_scores[querypos][hit]) > best_score) {
 	best_score = score;
       }
 #ifdef SEPARATE_FWD_REV
-      debug6(printf(" and rev_score is %d",links[querypos][hit].rev_score));
-      if ((score = links[querypos][hit].rev_score) > best_score) {
+      debug6(printf(" and rev_score is %d",rev_scores[querypos][hit]));
+      if ((score = rev_scores[querypos][hit]) > best_score) {
 	best_score = score;
       }
 #endif
@@ -2779,9 +2992,9 @@ revise_active_lookforward (int **active, int *firstactive, int *nactive,
     ptr = &(firstactive[querypos]);
     hit = high_hit - 1;
     while (hit >= low_hit) {
-      while (hit >= low_hit && links[querypos][hit].fwd_score <= threshold
+      while (hit >= low_hit && fwd_scores[querypos][hit] <= threshold
 #ifdef SEPARATE_FWD_REV
-	     && links[querypos][hit].rev_score <= threshold
+	     && rev_scores[querypos][hit] <= threshold
 #endif
 	     ) {
 	--hit;
@@ -3088,8 +3301,9 @@ Linkmatrix_get_cells_fwd (int *nunique, struct Link_T **links, int querystart, i
 #else
 
 static Cell_T *
-Linkmatrix_get_cells_fwd (int *nunique, struct Link_T **links, int querystart, int queryend, int *npositions,
-			  bool favor_right_p, Cellpool_T cellpool) {
+get_cells_fwd (int *nunique, struct Link_T **links, int **fwd_scores,
+	       int querystart, int queryend, int *npositions,
+	       bool favor_right_p, Cellpool_T cellpool) {
   Cell_T *sorted, *cells;
   List_T celllist = NULL;
   int querypos, hit;
@@ -3100,11 +3314,11 @@ Linkmatrix_get_cells_fwd (int *nunique, struct Link_T **links, int querystart, i
   ncells = 0;
   for (querypos = querystart; querypos <= queryend; querypos++) {
     for (hit = 0; hit < npositions[querypos]; hit++) {
-      if (links[querypos][hit].fwd_score > 0) {
+      if (fwd_scores[querypos][hit] > 0) {
 	rootposition = links[querypos][hit].fwd_rootposition;
 	/* tracei = links[querypos][hit].fwd_tracei; */
 	celllist = Cellpool_push(celllist,cellpool,rootposition,querypos,hit,/*fwdp*/true,
-				 links[querypos][hit].fwd_score);
+				 fwd_scores[querypos][hit]);
 	ncells++;
       }
     }
@@ -3311,7 +3525,8 @@ binary_search (int lowi, int highi, Chrpos_T *mappings, Chrpos_T goal) {
 /* Returns celllist */
 /* For PMAP, indexsize is in aa. */
 static Cell_T *
-align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **mappings, int *npositions, int totalpositions,
+align_compute_scores_lookback (int *ncells, struct Link_T **links, int **fwd_scores,
+			       Chrpos_T **mappings, int *npositions, int totalpositions,
 			       bool oned_matrix_p, Chrpos_T *minactive, Chrpos_T *maxactive,
 			       int *firstactive, int *nactive, Cellpool_T cellpool,
 			       int querystart, int queryend, int querylength,
@@ -3327,17 +3542,20 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
 			       bool use_canonical_p, int non_canonical_penalty, bool debug_graphic_p, bool favor_right_p) {
   Cell_T *cells;
   Link_T currlink, prevlink;
-  int querypos, indexsize_nt, indexsize_query, hit, nhits, low_hit, high_hit;
+  int curr_querypos, indexsize_nt, indexsize_query, hit, nhits, low_hit, high_hit;
   int nskipped, min_hits, specific_querypos, specific_low_hit, specific_high_hit, next_querypos;
   Intlist_T processed = NULL;
   int best_overall_score = 0;
   int grand_fwd_score, grand_fwd_querypos, grand_fwd_hit, best_fwd_hit, best_fwd_score;
 #ifdef SEPARATE_FWD_REV
   int grand_rev_score, grand_rev_querypos, grand_rev_hit, best_rev_hit, best_rev_score;
-  debug9(int rev_tracei = 0);
+#ifdef DEBUG9
+  int rev_tracei = 0;
+#endif
 #endif
   int **active;
   Chrpos_T position, prevposition;
+  int fwd_tracei = 0;
 #if 0
   int *lastGT, *lastAG;
 #ifndef PMAP
@@ -3346,7 +3564,6 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
 #endif
 #ifdef DEBUG9
   char *oligo;
-  int fwd_tracei = 0;
 #endif
 #ifdef DEBUG12
   Link_T termlink = NULL;
@@ -3365,6 +3582,7 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
 #endif
   debug0(printf("Lookback: querystart = %d, queryend = %d, indexsize = %d\n",querystart,queryend,indexsize));
 
+  assert(oned_matrix_p == true);
   if (oned_matrix_p == true) {
     active = intmatrix_1d_new(querylength,npositions,totalpositions);
   } else {
@@ -3377,17 +3595,17 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
 #endif
 
   /* Initialize */
-  for (querypos = 0; querypos < querystart; querypos++) {
-    debug6(printf("3.  Initializing firstactive for querypos %d to be -1\n",querypos));
-    firstactive[querypos] = -1;
-    nactive[querypos] = 0;
+  for (curr_querypos = 0; curr_querypos < querystart; curr_querypos++) {
+    debug6(printf("3.  Initializing firstactive for querypos %d to be -1\n",curr_querypos));
+    firstactive[curr_querypos] = -1;
+    nactive[curr_querypos] = 0;
   }
-  while (querypos <= queryend && npositions[querypos] <= 0) {
-    debug6(printf("4.  Initializing firstactive for querypos %d to be -1\n",querypos));
-    debug9(printf("Skipping querypos %d which has no positions\n",querypos));
-    firstactive[querypos] = -1;
-    nactive[querypos] = 0;
-    querypos++;
+  while (curr_querypos <= queryend && npositions[curr_querypos] <= 0) {
+    debug6(printf("4.  Initializing firstactive for querypos %d to be -1\n",curr_querypos));
+    debug9(printf("Skipping querypos %d which has no positions\n",curr_querypos));
+    firstactive[curr_querypos] = -1;
+    nactive[curr_querypos] = 0;
+    curr_querypos++;
   }
 
   if (anchoredp == true) {
@@ -3401,9 +3619,9 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
     currlink = &(links[anchor_querypos][hit]);	
 #ifndef SEPARATE_FWD_REV
     currlink->fwd_pos = currlink->fwd_hit = -1;
-    currlink->fwd_score = indexsize_nt;
     currlink->fwd_consecutive = EXON_DEFN;
-    debug9(currlink->fwd_tracei = 0);
+    currlink->fwd_tracei = 0;
+    fwd_scores[anchor_querypos][hit] = indexsize_nt;
 #else
     fprintf(stderr,"Not implemented yet\n");
     abort();
@@ -3417,31 +3635,31 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
     debug6(printf("Pushing anchorpos %d as processed\n",anchor_querypos));
     processed = Intlist_push(processed,anchor_querypos);
 
-  } else if (querypos <= queryend) {
-    for (hit = 0; hit < npositions[querypos]; hit++) {
-      currlink = &(links[querypos][hit]);
+  } else if (curr_querypos <= queryend) {
+    for (hit = 0; hit < npositions[curr_querypos]; hit++) {
+      currlink = &(links[curr_querypos][hit]);
 #ifndef SEPARATE_FWD_REV
       currlink->fwd_pos = currlink->fwd_hit = -1;
-      currlink->fwd_score = indexsize_nt;
       currlink->fwd_consecutive = indexsize_nt;
-      debug9(currlink->fwd_tracei = -1);
+      currlink->fwd_tracei = -1;
       /* currlink->fwd_rootnlinks = 1; */
+      fwd_scores[curr_querypos][hit] = indexsize_nt;
 #else
       currlink->fwd_pos = currlink->fwd_hit = -1;
-      currlink->fwd_score = indexsize_nt;
       currlink->fwd_consecutive = indexsize_nt;
-      debug9(currlink->fwd_tracei = -1);
+      currlink->fwd_tracei = -1;
       /* currlink->fwd_rootnlinks = 1; */
+      fwd_scores[curr_querypos][hit] = indexsize_nt;
       if (splicingp == true) {
 	currlink->rev_pos = currlink->rev_hit = -1;
-	currlink->rev_score = indexsize_nt;
 	currlink->rev_consecutive = indexsize_nt;
-	debug9(currlink->rev_tracei = -1);
+	currlink->rev_tracei = -1;
 	/* currlink->rev_rootnlinks = 1; */
+	rev_scores[curr_querypos][hit] = indexsize_nt;
       }
 #endif
     }
-    revise_active_lookback(active,firstactive,nactive,0,npositions[querypos],links,querypos);
+    revise_active_lookback(active,firstactive,nactive,0,npositions[curr_querypos],fwd_scores,curr_querypos);
   }
 
   grand_fwd_score = 0;
@@ -3459,8 +3677,8 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
   min_hits = 1000000;
   specific_querypos = -1;
 
-  /* querypos += 1; -- this causes querypos at querystart to be ignored */
-  while (querypos <= queryend) {
+  /* curr_querypos += 1; -- this causes curr_querypos at querystart to be ignored */
+  while (curr_querypos <= queryend) {
     best_fwd_score = 0;
     best_fwd_hit = -1;
 #ifdef SEPARATE_FWD_REV
@@ -3468,101 +3686,92 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
     best_rev_hit = -1;
 #endif
     
-    debug9(printf("Positions at querypos %d (forward order):",querypos);
-	   for (hit = 0; hit < npositions[querypos]; hit++) {
-	     printf(" %u",mappings[querypos][hit]);
+    debug9(printf("Positions at querypos %d (forward order):",curr_querypos);
+	   for (hit = 0; hit < npositions[curr_querypos]; hit++) {
+	     printf(" %u",mappings[curr_querypos][hit]);
 	   }
 	   printf("\n");
 	   );
 
     hit = 0;
-    while (hit < npositions[querypos] && mappings[querypos][hit] < minactive[querypos]) {
+    while (hit < npositions[curr_querypos] && mappings[curr_querypos][hit] < minactive[curr_querypos]) {
       hit++;
     }
     low_hit = hit;
-    while (hit < npositions[querypos] && mappings[querypos][hit] <= maxactive[querypos]) {
+    while (hit < npositions[curr_querypos] && mappings[curr_querypos][hit] <= maxactive[curr_querypos]) {
       hit++;
     }
     high_hit = hit;
     debug9(printf("Querypos %d has hit %d..%d out of %d (minactive = %u, maxactive = %u)\n",
-		  querypos,low_hit,high_hit-1,npositions[querypos],minactive[querypos],maxactive[querypos]));
+		  curr_querypos,low_hit,high_hit-1,npositions[curr_querypos],minactive[curr_querypos],maxactive[curr_querypos]));
 
     /* Can't use nactive yet, so use high_hit - low_hit */
     if (skip_repetitive_p && high_hit - low_hit >= MAX_NACTIVE && nskipped <= MAX_SKIPPED) { /* Previously turned off */
-      debug6(printf("Too many active (%d - %d) at querypos %d.  Setting firstactive to be -1\n",high_hit,low_hit,querypos));
-      firstactive[querypos] = -1;
-      nactive[querypos] = 0;
+      debug6(printf("Too many active (%d - %d) at querypos %d.  Setting firstactive to be -1\n",high_hit,low_hit,curr_querypos));
+      firstactive[curr_querypos] = -1;
+      nactive[curr_querypos] = 0;
       nskipped++;
       debug9(printf("  %d skipped because of %d hits\n",nskipped,high_hit - low_hit + 1));
 
       /* Store most specific querypos in section of skipped */
       if (high_hit - low_hit < min_hits) {
 	min_hits = high_hit - low_hit;
-	specific_querypos = querypos;
+	specific_querypos = curr_querypos;
 	specific_low_hit = low_hit;
 	specific_high_hit = high_hit;
       }
-      querypos++;
+      curr_querypos++;
 
     } else {
       if (nskipped > MAX_SKIPPED) {
 	debug9(printf("Too many skipped.  Going back to specific querypos %d\n",specific_querypos));
-	next_querypos = querypos;
-	querypos = specific_querypos;
+	next_querypos = curr_querypos;
+	curr_querypos = specific_querypos;
 	low_hit = specific_low_hit;
 	high_hit = specific_high_hit;
       } else {
-	next_querypos = querypos + 1;
+	next_querypos = curr_querypos + 1;
       }
 
       if ((nhits = high_hit - low_hit) > 0) {
 	if (nhits == 1) {
-	  currlink = &(links[querypos][low_hit]);
-	  position = mappings[querypos][low_hit];
+	  currlink = &(links[curr_querypos][low_hit]);
+	  position = mappings[curr_querypos][low_hit];
 
-	  debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
+	  debug9(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
 	  debug9(printf("Finding link looking back from querypos %d,%d at %ux%d (%s).  prev_querypos was %d\n",
-			querypos,low_hit,position,active[querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1));
+			curr_querypos,low_hit,position,active[curr_querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1));
 	  
-	  score_querypos_lookback_one(
-#ifdef DEBUG9
-				      &fwd_tracei,
-#endif
-				      currlink,querypos,querystart,queryend,position,
-				      links,mappings,active,firstactive,chroffset,chrhigh,plusp,
+	  score_querypos_lookback_one(&fwd_tracei,currlink,curr_querypos,low_hit,querystart,queryend,position,
+				      links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp,
 				      indexsize,processed,
 				      anchoredp,localp,splicingp,use_canonical_p,
 				      non_canonical_penalty);
 
-	  if (currlink->fwd_score > 0) {
-	    debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,currlink->fwd_score));
-	    best_fwd_score = currlink->fwd_score;
+	  if (fwd_scores[curr_querypos][low_hit] > 0) {
+	    debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,fwd_scores[curr_querypos][low_hit]));
+	    best_fwd_score = fwd_scores[curr_querypos][low_hit];
 	    best_fwd_hit = low_hit;
 	  }
 
 	} else {
-	  debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
+	  debug9(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
 	  debug9(printf("Finding links looking back from querypos %d,%d..%d at (%u..%u) (%s).  prev_querypos was %d\n",
-			querypos,low_hit,high_hit-1,mappings[querypos][low_hit],mappings[querypos][high_hit-1],
+			curr_querypos,low_hit,high_hit-1,mappings[curr_querypos][low_hit],mappings[curr_querypos][high_hit-1],
 			oligo,processed ? Intlist_head(processed) : -1));
 
-	  score_querypos_lookback_mult(
-#ifdef DEBUG9
-				       &fwd_tracei,
-#endif
-				       low_hit,high_hit,querypos,querystart,queryend,
-				       /*positions*/&(mappings[querypos][low_hit]),
-				       links,mappings,active,firstactive,chroffset,chrhigh,plusp,
+	  score_querypos_lookback_mult(&fwd_tracei,low_hit,high_hit,curr_querypos,querystart,queryend,
+				       /*positions*/&(mappings[curr_querypos][low_hit]),
+				       links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp,
 				       indexsize,processed,
 				       anchoredp,localp,splicingp,use_canonical_p,
 				       non_canonical_penalty);
 
 	  debug9(printf("Checking hits from low_hit %d to high_hit %d\n",low_hit,high_hit));
 	  for (hit = low_hit; hit < high_hit; hit++) {
-	    currlink = &(links[querypos][hit]);
-	    debug9(printf("Hit %d has score %d\n",hit,currlink->fwd_score));
-	    if (currlink->fwd_score > best_fwd_score) {
-	      best_fwd_score = currlink->fwd_score;
+	    debug9(printf("Hit %d has score %d\n",hit,fwd_scores[curr_querypos][hit]));
+	    if (fwd_scores[curr_querypos][hit] > best_fwd_score) {
+	      best_fwd_score = fwd_scores[curr_querypos][hit];
 	      best_fwd_hit = hit;
 	    }
 	  }
@@ -3578,30 +3787,29 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
       
 #ifndef SEPARATE_FWD_REV
 	debug9(printf("Overall result at querypos %d yields best_fwd_hit %d\n",
-		      querypos,best_fwd_hit));
+		      curr_querypos,best_fwd_hit));
 #else
 	debug9(printf("Overall result at querypos %d yields best_fwd_hit %d and best_rev_hit %d\n",
-		      querypos,best_fwd_hit,best_rev_hit));
+		      curr_querypos,best_fwd_hit,best_rev_hit));
 #endif
 
-	if (splicingp == true && best_fwd_hit >= 0 && links[querypos][best_fwd_hit].fwd_hit < 0 && 
-	    grand_fwd_querypos >= 0 && querypos >= grand_fwd_querypos + indexsize_query) {
-	  prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]);
-	  if ((best_fwd_score = prevlink->fwd_score - (querypos - grand_fwd_querypos)) > 0) {
+	if (splicingp == true && best_fwd_hit >= 0 && links[curr_querypos][best_fwd_hit].fwd_hit < 0 && 
+	    grand_fwd_querypos >= 0 && curr_querypos >= grand_fwd_querypos + indexsize_query) {
+	  if ((best_fwd_score = fwd_scores[grand_fwd_querypos][grand_fwd_hit] - (curr_querypos - grand_fwd_querypos)) > 0) {
 	    prevposition = mappings[grand_fwd_querypos][grand_fwd_hit];
 	    debug12(printf("Considering prevposition %u to position %u as a grand fwd lookback\n",prevposition,position));
 	    for (hit = low_hit; hit < high_hit; hit++) {
-	      if ((position = mappings[querypos][hit]) > prevposition + maxintronlen) {
+	      if ((position = mappings[curr_querypos][hit]) > prevposition + maxintronlen) {
 		debug12(printf("  => Too long\n"));
 	      } else if (position >= prevposition + indexsize_nt) {
-		currlink = &(links[querypos][hit]);
+		currlink = &(links[curr_querypos][hit]);
 		currlink->fwd_consecutive = indexsize_nt;
-		/* currlink->fwd_rootnlinks = 1; */
 		currlink->fwd_pos = grand_fwd_querypos;
 		currlink->fwd_hit = grand_fwd_hit;
-		currlink->fwd_score = best_fwd_score;
-#ifdef DEBUG9
 		currlink->fwd_tracei = ++fwd_tracei;
+		fwd_scores[curr_querypos][hit] = best_fwd_score;
+#ifdef DEBUG9
+		prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]);
 		currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd;
 		currlink->fwd_intronnrev = prevlink->fwd_intronnrev;
 		currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1;
@@ -3609,19 +3817,19 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
 	      }
 	    }
 	    debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n",
-			   querypos,grand_fwd_querypos,grand_fwd_hit,prevlink->fwd_score));
+			   curr_querypos,grand_fwd_querypos,grand_fwd_hit,fwd_scores[grand_fwd_querypos][grand_fwd_hit]));
 	  }
 	}
 
 	/* Use >= to favor longer path in case of ties */
 	if (best_fwd_hit >= 0 && best_fwd_score >= grand_fwd_score && 
-	    links[querypos][best_fwd_hit].fwd_consecutive > EXON_DEFN) {
+	    links[curr_querypos][best_fwd_hit].fwd_consecutive > EXON_DEFN) {
 	  grand_fwd_score = best_fwd_score;
-	  grand_fwd_querypos = querypos;
+	  grand_fwd_querypos = curr_querypos;
 	  grand_fwd_hit = best_fwd_hit;
-	  debug12(termlink = &(links[querypos][best_fwd_hit]));
+	  debug12(termlink = &(links[curr_querypos][best_fwd_hit]));
 	  debug12(printf("At querypos %d, revising grand fwd to be hit %d with score of %d (pointing back to %d,%d)\n",
-			 querypos,best_fwd_hit,best_fwd_score,termlink->fwd_pos,termlink->fwd_hit));
+			 curr_querypos,best_fwd_hit,best_fwd_score,termlink->fwd_pos,termlink->fwd_hit));
 	}
 
 #ifdef SEPARATE_FWD_REV
@@ -3632,17 +3840,17 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
 	if (splicingp == false || use_canonical_p == false) {
 	  /* rev scores should be the same as the fwd scores */
 	} else {
-	  if (best_rev_hit >= 0 && links[querypos][best_rev_hit].rev_hit < 0 && 
-	      grand_rev_querypos >= 0 && querypos >= grand_rev_querypos + indexsize_query) {
+	  if (best_rev_hit >= 0 && links[curr_querypos][best_rev_hit].rev_hit < 0 && 
+	      grand_rev_querypos >= 0 && curr_querypos >= grand_rev_querypos + indexsize_query) {
 	    prevlink = &(links[grand_rev_querypos][grand_rev_hit]);
-	    if ((best_rev_score = prevlink->rev_score - (querypos - grand_rev_querypos)) > 0) {
+	    if ((best_rev_score = prevlink->rev_score - (curr_querypos - grand_rev_querypos)) > 0) {
 	      prevposition = mappings[grand_rev_querypos][grand_rev_hit];
 	      debug12(printf("Considering prevposition %u to position %u as a grand rev lookback\n",prevposition,position));
 	      for (hit = low_hit; hit < high_hit; hit++) {
-		if ((position = mappings[querypos][hit]) > prevposition + maxintronlen) {
+		if ((position = mappings[curr_querypos][hit]) > prevposition + maxintronlen) {
 		  debug12(printf("  => Too long\n"));
 		} else if (position >= prevposition + indexsize_nt) {
-		  currlink = &(links[querypos][hit]);
+		  currlink = &(links[curr_querypos][hit]);
 		  currlink->rev_consecutive = indexsize_nt;
 		  /* currlink->rev_rootnlinks = 1; */
 		  currlink->rev_pos = grand_rev_querypos;
@@ -3657,30 +3865,30 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
 		}
 	      }
 	      debug12(printf("At querypos %d, setting all rev hits to point back to grand_rev %d,%d with a score of %d\n",
-			     querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
+			     curr_querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
 	    }
 	  }
 
 	  /* Use >= to favor longer path in case of ties */
 	  if (best_rev_hit >= 0 && best_rev_score >= grand_rev_score &&
-	      links[querypos][best_rev_hit].rev_consecutive > EXON_DEFN) {
+	      links[curr_querypos][best_rev_hit].rev_consecutive > EXON_DEFN) {
 	    grand_rev_score = best_rev_score;
-	    grand_rev_querypos = querypos;
+	    grand_rev_querypos = curr_querypos;
 	    grand_rev_hit = best_rev_hit;
 	  }
 	}
 #endif
       }
 
-      revise_active_lookback(active,firstactive,nactive,low_hit,high_hit,links,querypos);
+      revise_active_lookback(active,firstactive,nactive,low_hit,high_hit,fwd_scores,curr_querypos);
 
-      /* Need to push querypos, even if firstactive[querypos] == -1 */
-      /* Want to skip npositions[querypos] == 0, so we can find adjacent despite mismatch or overabundance */
-      if (npositions[querypos] > 0) {
-	debug6(printf("Pushing querypos %d onto processed\n",querypos));
-	processed = Intlist_push(processed,querypos);
+      /* Need to push querypos, even if firstactive[curr_querypos] == -1 */
+      /* Want to skip npositions[curr_querypos] == 0, so we can find adjacent despite mismatch or overabundance */
+      if (npositions[curr_querypos] > 0) {
+	debug6(printf("Pushing querypos %d onto processed\n",curr_querypos));
+	processed = Intlist_push(processed,curr_querypos);
       }
-      querypos = next_querypos;
+      curr_querypos = next_querypos;
     }
   }
   debug9(printf("End of loop lookback\n"));
@@ -3715,8 +3923,8 @@ align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **ma
 				      indexsize,best_overall_score,favor_right_p,cellpool);
   }
 #else
-  cells = Linkmatrix_get_cells_fwd(&(*ncells),links,querystart,queryend,npositions,
-				   favor_right_p,cellpool);
+  cells = get_cells_fwd(&(*ncells),links,fwd_scores,querystart,queryend,npositions,
+			favor_right_p,cellpool);
 #endif
 
   debug9(FREE(oligo));
@@ -3747,13 +3955,13 @@ get_genomic_nt (char *g_alt, Chrpos_T chrpos, Univcoord_T chroffset,
 
 
 static List_T
-traceback_one (int querypos, int hit, struct Link_T **links, Chrpos_T **mappings,
+traceback_one (int curr_querypos, int hit, struct Link_T **links, Chrpos_T **mappings,
 	       char *queryseq_ptr, char *queryuc_ptr, 
 #ifdef PMAP
 	       Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, bool lookbackp,
 #endif
 #ifdef DEBUG0
-	       int indexsize,
+	       int **fwd_scores, int indexsize,
 #endif	       
 	       Pairpool_T pairpool, bool fwdp) {
   List_T path = NULL;
@@ -3769,60 +3977,60 @@ traceback_one (int querypos, int hit, struct Link_T **links, Chrpos_T **mappings
 #endif
 
 
-  while (querypos >= 0) {
-    position = mappings[querypos][hit];
+  while (curr_querypos >= 0) {
+    position = mappings[curr_querypos][hit];
 
 #ifdef PMAP
     /* Change querypos positions from protein to nucleotide */
     if (lookbackp == true) {
       c2 = get_genomic_nt(&c2_alt,position+2,chroffset,chrhigh,watsonp);
-      path = Pairpool_push(path,pairpool,querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+      path = Pairpool_push(path,pairpool,curr_querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			   /*dynprogindex*/0);
       c2 = get_genomic_nt(&c2_alt,position+1,chroffset,chrhigh,watsonp);
-      path = Pairpool_push(path,pairpool,querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+      path = Pairpool_push(path,pairpool,curr_querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			   /*dynprogindex*/0);
       c2 = get_genomic_nt(&c2_alt,position,chroffset,chrhigh,watsonp);
-      path = Pairpool_push(path,pairpool,querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+      path = Pairpool_push(path,pairpool,curr_querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			   /*dynprogindex*/0);
     } else {
       c2 = get_genomic_nt(&c2_alt,position,chroffset,chrhigh,watsonp);
-      path = Pairpool_push(path,pairpool,querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+      path = Pairpool_push(path,pairpool,curr_querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			   /*dynprogindex*/0);
       c2 = get_genomic_nt(&c2_alt,position+1,chroffset,chrhigh,watsonp);
-      path = Pairpool_push(path,pairpool,querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+      path = Pairpool_push(path,pairpool,curr_querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			   /*dynprogindex*/0);
       c2 = get_genomic_nt(&c2_alt,position+2,chroffset,chrhigh,watsonp);
-      path = Pairpool_push(path,pairpool,querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+      path = Pairpool_push(path,pairpool,curr_querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			   /*dynprogindex*/0);
     }
 #else
     /* genomic nucleotide same as queryseq */
-    c2 = queryuc_ptr[querypos];
-    path = Pairpool_push(path,pairpool,querypos,position,queryseq_ptr[querypos],MATCH_COMP,
+    c2 = queryuc_ptr[curr_querypos];
+    path = Pairpool_push(path,pairpool,curr_querypos,position,queryseq_ptr[curr_querypos],MATCH_COMP,
 			 c2,/*genomealt*/c2,/*dynprogindex*/0);
 #endif
 
 
 #ifdef DEBUG0
     debug0(oligo = (char *) CALLOC(indexsize+1,sizeof(char)));
-    debug0(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
+    debug0(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
     if (fwdp == true) {
       debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
-		    querypos,hit,oligo,position,
-		    links[querypos][hit].fwd_score,links[querypos][hit].fwd_consecutive));
+		    curr_querypos,hit,oligo,position,
+		    fwd_scores[curr_querypos][hit],links[curr_querypos][hit].fwd_consecutive));
       debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)",
-		    links[querypos][hit].fwd_tracei,links[querypos][hit].fwd_intronnfwd,links[querypos][hit].fwd_intronnrev,
-		    links[querypos][hit].fwd_intronnunk));
+		    links[curr_querypos][hit].fwd_tracei,links[curr_querypos][hit].fwd_intronnfwd,links[curr_querypos][hit].fwd_intronnrev,
+		    links[curr_querypos][hit].fwd_intronnunk));
       debug0(printf("\n"));
 
 #ifdef SEPARATE_FWD_REV
     } else {
       debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
-		    querypos,hit,oligo,position,
-		    links[querypos][hit].rev_score,links[querypos][hit].rev_consecutive));
+		    curr_querypos,hit,oligo,position,
+		    links[curr_querypos][hit].rev_score,links[curr_querypos][hit].rev_consecutive));
       debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)",
-		    links[querypos][hit].rev_tracei,links[querypos][hit].rev_intronnfwd,links[querypos][hit].rev_intronnrev,
-		    links[querypos][hit].rev_intronnunk));
+		    links[curr_querypos][hit].rev_tracei,links[curr_querypos][hit].rev_intronnfwd,links[curr_querypos][hit].rev_intronnrev,
+		    links[curr_querypos][hit].rev_intronnunk));
       debug0(printf("\n"));
 
 #endif
@@ -3831,18 +4039,18 @@ traceback_one (int querypos, int hit, struct Link_T **links, Chrpos_T **mappings
     debug0(FREE(oligo));
 
     /* prevposition = position; */
-    prev_querypos = querypos;
+    prev_querypos = curr_querypos;
     prevhit = hit;
     if (fwdp == true) {
-      querypos = links[prev_querypos][prevhit].fwd_pos;
+      curr_querypos = links[prev_querypos][prevhit].fwd_pos;
       hit = links[prev_querypos][prevhit].fwd_hit;
 #ifdef SEPARATE_FWD_REV
     } else {
-      querypos = links[prev_querypos][prevhit].rev_pos;
+      curr_querypos = links[prev_querypos][prevhit].rev_pos;
       hit = links[prev_querypos][prevhit].rev_hit;
 #endif
     }
-    debug3(printf("%d %d  %d %d  3\n",prev_querypos,prevhit,querypos,hit));
+    debug3(printf("%d %d  %d %d  3\n",prev_querypos,prevhit,curr_querypos,hit));
   }
   debug0(printf("Done\n\n"));
 
@@ -3851,12 +4059,12 @@ traceback_one (int querypos, int hit, struct Link_T **links, Chrpos_T **mappings
 
 
 static List_T
-traceback_one_snps (int querypos, int hit, struct Link_T **links, Chrpos_T **mappings,
+traceback_one_snps (int curr_querypos, int hit, struct Link_T **links, Chrpos_T **mappings,
 		    char *queryseq_ptr,
 
 		    Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
 #ifdef DEBUG0
-		    int indexsize,
+		    int **fwd_scores, int indexsize,
 #endif
 		    Pairpool_T pairpool, bool fwdp) {
   List_T path = NULL;
@@ -3869,48 +4077,48 @@ traceback_one_snps (int querypos, int hit, struct Link_T **links, Chrpos_T **map
 #endif
 
 
-  while (querypos >= 0) {
-    position = mappings[querypos][hit];
+  while (curr_querypos >= 0) {
+    position = mappings[curr_querypos][hit];
 
 #ifdef PMAP
     /* Change querypos positions from protein to nucleotide */
     c2 = get_genomic_nt(&c2_alt,position+2,chroffset,chrhigh,watsonp);
-    path = Pairpool_push(path,pairpool,querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+    path = Pairpool_push(path,pairpool,curr_querypos*3+2,position+2,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			 /*dynprogindex*/0);
     c2 = get_genomic_nt(&c2_alt,position+1,chroffset,chrhigh,watsonp);
-    path = Pairpool_push(path,pairpool,querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+    path = Pairpool_push(path,pairpool,curr_querypos*3+1,position+1,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			 /*dynprogindex*/0);
     c2 = get_genomic_nt(&c2_alt,position,chroffset,chrhigh,watsonp);
-    path = Pairpool_push(path,pairpool,querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
+    path = Pairpool_push(path,pairpool,curr_querypos*3,position,/*cdna*/c2,MATCH_COMP,c2,c2_alt,
 			 /*dynprogindex*/0);
 #else
     /* genomic nucleotide or SNP same as queryseq */
     c2 = get_genomic_nt(&c2_alt,position,chroffset,chrhigh,watsonp);
-    path = Pairpool_push(path,pairpool,querypos,position,queryseq_ptr[querypos],MATCH_COMP,c2,c2_alt,
+    path = Pairpool_push(path,pairpool,curr_querypos,position,queryseq_ptr[curr_querypos],MATCH_COMP,c2,c2_alt,
 			 /*dynprogindex*/0);
 #endif
 
 
 #ifdef DEBUG0
     debug0(oligo = (char *) CALLOC(indexsize+1,sizeof(char)));
-    debug0(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
+    debug0(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
     if (fwdp == true) {
       debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
-		    querypos,hit,oligo,position,
-		    links[querypos][hit].fwd_score,links[querypos][hit].fwd_consecutive));
+		    curr_querypos,hit,oligo,position,
+		    fwd_scores[curr_querypos][hit],links[curr_querypos][hit].fwd_consecutive));
       debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)",
-		    links[querypos][hit].fwd_tracei,links[querypos][hit].fwd_intronnfwd,links[querypos][hit].fwd_intronnrev,
-		    links[querypos][hit].fwd_intronnunk));
+		    links[curr_querypos][hit].fwd_tracei,links[curr_querypos][hit].fwd_intronnfwd,links[curr_querypos][hit].fwd_intronnrev,
+		    links[curr_querypos][hit].fwd_intronnunk));
       debug0(printf("\n"));
 
 #ifdef SEPARATE_FWD_REV
     } else {
       debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d",
-		    querypos,hit,oligo,position,
-		    links[querypos][hit].rev_score,links[querypos][hit].rev_consecutive));
+		    curr_querypos,hit,oligo,position,
+		    links[curr_querypos][hit].rev_score,links[curr_querypos][hit].rev_consecutive));
       debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)",
-		    links[querypos][hit].rev_tracei,links[querypos][hit].rev_intronnfwd,links[querypos][hit].rev_intronnrev,
-		    links[querypos][hit].rev_intronnunk));
+		    links[curr_querypos][hit].rev_tracei,links[curr_querypos][hit].rev_intronnfwd,links[curr_querypos][hit].rev_intronnrev,
+		    links[curr_querypos][hit].rev_intronnunk));
       debug0(printf("\n"));
 #endif
     }
@@ -3918,18 +4126,18 @@ traceback_one_snps (int querypos, int hit, struct Link_T **links, Chrpos_T **map
     debug0(FREE(oligo));
 
     /* prevposition = position; */
-    prev_querypos = querypos;
+    prev_querypos = curr_querypos;
     prevhit = hit;
     if (fwdp == true) {
-      querypos = links[prev_querypos][prevhit].fwd_pos;
+      curr_querypos = links[prev_querypos][prevhit].fwd_pos;
       hit = links[prev_querypos][prevhit].fwd_hit;
 #ifdef SEPARATE_FWD_REV
     } else {
-      querypos = links[prev_querypos][prevhit].rev_pos;
+      curr_querypos = links[prev_querypos][prevhit].rev_pos;
       hit = links[prev_querypos][prevhit].rev_hit;
 #endif
     }
-    debug3(printf("%d %d  %d %d  3\n",prev_querypos,prevhit,querypos,hit));
+    debug3(printf("%d %d  %d %d  3\n",prev_querypos,prevhit,curr_querypos,hit));
   }
   debug0(printf("Done\n\n"));
 
@@ -3950,6 +4158,7 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
 			bool favor_right_p, int max_nalignments, bool debug_graphic_p) {
   List_T all_paths = NULL;
   struct Link_T **links;
+  int **fwd_scores;
 
   Cell_T *cells, cell;
   int ncells, i;
@@ -3961,8 +4170,10 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
 
   if (oned_matrix_p == true) {
     links = Linkmatrix_1d_new(querylength,npositions,totalpositions);
+    fwd_scores = intmatrix_1d_new(querylength,npositions,totalpositions);
   } else {
     links = Linkmatrix_2d_new(querylength,npositions);
+    fwd_scores = intmatrix_2d_new(querylength,npositions);
   }
 
   /* These are all oligomers */
@@ -3970,7 +4181,8 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
     mappings_dump_R(mappings,npositions,querylength,/*active*/NULL,/*firstactive*/NULL,indexsize,"all.mers");
   }
   
-  cells = align_compute_scores_lookback(&ncells,links,mappings,npositions,totalpositions,
+  cells = align_compute_scores_lookback(&ncells,links,fwd_scores,
+					mappings,npositions,totalpositions,
 					oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
 					querystart,queryend,querylength,
 			       
@@ -3987,7 +4199,7 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
 #ifdef SEPARATE_FWD_REV
   debug1(Linkmatrix_print_both(links,mappings,querylength,npositions,queryseq_ptr,indexsize));
 #else
-  debug1(Linkmatrix_print_fwd(links,mappings,querylength,npositions,queryseq_ptr,indexsize));
+  debug1(print_fwd(links,fwd_scores,mappings,querylength,npositions,queryseq_ptr,indexsize));
 #endif
 
   if (ncells == 0) {
@@ -4012,7 +4224,7 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
 	all_paths = List_push(all_paths,(void *) traceback_one_snps(querypos,hit,links,mappings,queryseq_ptr,
 								    chroffset,chrhigh,/*watsonp*/plusp,
 #ifdef DEBUG0
-								    indexsize,
+								    fwd_scores,indexsize,
 #endif
 								    pairpool,fwdp));
       }
@@ -4044,7 +4256,7 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
 							       chroffset,chrhigh,/*watsonp*/plusp,/*lookbackp*/true,
 #endif
 #ifdef DEBUG0
-							       indexsize,
+							       fwd_scores,indexsize,
 #endif
 							       pairpool,fwdp));
       }
@@ -4065,8 +4277,10 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
 
   if (oned_matrix_p == true) {
     Linkmatrix_1d_free(&links);
+    intmatrix_1d_free(&fwd_scores);
   } else {
     Linkmatrix_2d_free(&links,querylength);
+    intmatrix_2d_free(&fwd_scores,querylength);
   }
 
 #if 0
@@ -4084,7 +4298,8 @@ align_compute_lookback (Chrpos_T **mappings, int *npositions, int totalpositions
 /* Returns celllist */
 /* For PMAP, indexsize is in aa. */
 static Cell_T *
-align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T **mappings, int *npositions, int totalpositions,
+align_compute_scores_lookforward (int *ncells, struct Link_T **links, int **fwd_scores,
+				  Chrpos_T **mappings, int *npositions, int totalpositions,
 				  bool oned_matrix_p, Chrpos_T *minactive, Chrpos_T *maxactive,
 				  int *firstactive, int *nactive, Cellpool_T cellpool,
 				  int querystart, int queryend, int querylength,
@@ -4099,17 +4314,20 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
 				  bool debug_graphic_p, bool favor_right_p) {
   Cell_T *cells;
   Link_T currlink, prevlink;
-  int querypos, indexsize_nt, indexsize_query, hit, nhits, low_hit, high_hit;
+  int curr_querypos, indexsize_nt, indexsize_query, hit, nhits, low_hit, high_hit;
   int nskipped, min_hits, specific_querypos, specific_low_hit, specific_high_hit, next_querypos;
   Intlist_T processed = NULL;
   int best_overall_score = 0;
   int grand_fwd_score, grand_fwd_querypos, grand_fwd_hit, best_fwd_hit, best_fwd_score;
 #ifdef SEPARATE_FWD_REV
   int grand_rev_score, grand_rev_querypos, grand_rev_hit, best_rev_hit, best_rev_score;
-  debug9(int rev_tracei = 0);
+#ifdef DEBUG9
+  int rev_tracei = 0;
+#endif
 #endif
   int **active;
   Chrpos_T position, prevposition;
+  int fwd_tracei = 0;
 #if 0
   int *lastGT, *lastAG;
 #ifndef PMAP
@@ -4118,7 +4336,6 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
 #endif
 #ifdef DEBUG9
   char *oligo;
-  int fwd_tracei = 0;
 #endif
 #ifdef DEBUG12
   Link_T termlink = NULL;
@@ -4149,17 +4366,17 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
 #endif
 
   /* Initialize */
-  for (querypos = querylength - 1; querypos > queryend; querypos--) {
-    debug6(printf("5.  Initializing firstactive for querypos %d to be -1\n",querypos));
-    firstactive[querypos] = -1;
-    nactive[querypos] = 0;
+  for (curr_querypos = querylength - 1; curr_querypos > queryend; curr_querypos--) {
+    debug6(printf("5.  Initializing firstactive for querypos %d to be -1\n",curr_querypos));
+    firstactive[curr_querypos] = -1;
+    nactive[curr_querypos] = 0;
   }
-  while (querypos >= querystart && npositions[querypos] <= 0) {
-    debug6(printf("6.  Initializing firstactive for querypos %d to be -1\n",querypos));
-    debug9(printf("Skipping querypos %d which has no positions\n",querypos));
-    firstactive[querypos] = -1;
-    nactive[querypos] = 0;
-    querypos--;
+  while (curr_querypos >= querystart && npositions[curr_querypos] <= 0) {
+    debug6(printf("6.  Initializing firstactive for querypos %d to be -1\n",curr_querypos));
+    debug9(printf("Skipping querypos %d which has no positions\n",curr_querypos));
+    firstactive[curr_querypos] = -1;
+    nactive[curr_querypos] = 0;
+    curr_querypos--;
   }
   if (anchoredp == true) {
     /* Guaranteed to find a hit */
@@ -4172,9 +4389,9 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
     currlink = &(links[anchor_querypos][hit]);	
 #ifndef SEPARATE_FWD_REV
     currlink->fwd_pos = currlink->fwd_hit = -1;
-    currlink->fwd_score = indexsize_nt;
     currlink->fwd_consecutive = EXON_DEFN;
-    debug9(currlink->fwd_tracei = 0);
+    currlink->fwd_tracei = 0;
+    fwd_scores[anchor_querypos][hit] = indexsize_nt;
 #else
     fprintf(stderr,"Not implemented yet\n");
     abort();
@@ -4188,31 +4405,31 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
     debug6(printf("Pushing anchorpos %d as processed\n",anchor_querypos));
     processed = Intlist_push(processed,anchor_querypos);
 
-  } else if (querypos >= querystart) {
-    for (hit = npositions[querypos] - 1; hit >= 0; --hit) {
-      currlink = &(links[querypos][hit]);
+  } else if (curr_querypos >= querystart) {
+    for (hit = npositions[curr_querypos] - 1; hit >= 0; --hit) {
+      currlink = &(links[curr_querypos][hit]);
 #ifndef SEPARATE_FWD_REV
       currlink->fwd_pos = currlink->fwd_hit = -1;
-      currlink->fwd_score = indexsize_nt;
       currlink->fwd_consecutive = indexsize_nt;
-      debug9(currlink->fwd_tracei = -1);
+      currlink->fwd_tracei = -1;
       /* currlink->fwd_rootnlinks = 1; */
+      fwd_scores[curr_querypos][hit] = indexsize_nt;
 #else
       currlink->fwd_pos = currlink->fwd_hit = -1;
       currlink->fwd_score = indexsize_nt;
       currlink->fwd_consecutive = indexsize_nt;
-      debug9(currlink->fwd_tracei = -1);
+      currlink->fwd_tracei = -1;
       /* currlink->fwd_rootnlinks = 1; */
       if (splicingp == true) {
 	currlink->rev_pos = currlink->rev_hit = -1;
-	currlink->rev_score = indexsize_nt;
 	currlink->rev_consecutive = indexsize_nt;
 	currlink->rev_tracei = -1;
 	/* currlink->rev_rootnlinks = 1; */
+	rev_scores[curr_querypos][hit] = indexsize_nt;
       }
 #endif
     }
-    revise_active_lookforward(active,firstactive,nactive,0,npositions[querypos],links,querypos);
+    revise_active_lookforward(active,firstactive,nactive,0,npositions[curr_querypos],fwd_scores,curr_querypos);
   }
 
 
@@ -4231,8 +4448,8 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
   min_hits = 1000000;
   specific_querypos = -1;
 
-  /* querypos -= 1; -- this causes querypos at queryend to be ignored */
-  while (querypos >= querystart) {
+  /* curr_querypos -= 1; -- this causes curr_querypos at queryend to be ignored */
+  while (curr_querypos >= querystart) {
     best_fwd_score = 0;
     best_fwd_hit = -1;
 #ifdef SEPARATE_FWD_REV
@@ -4240,101 +4457,91 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
     best_rev_hit = -1;
 #endif
     
-    debug9(printf("Positions at querypos %d (reverse order):",querypos);
-	   for (hit = npositions[querypos] - 1; hit >= 0; --hit) {
-	     printf(" %u",mappings[querypos][hit]);
+    debug9(printf("Positions at querypos %d (reverse order):",curr_querypos);
+	   for (hit = npositions[curr_querypos] - 1; hit >= 0; --hit) {
+	     printf(" %u",mappings[curr_querypos][hit]);
 	   }
 	   printf("\n");
 	   );
 
-    hit = npositions[querypos] - 1;
-    while (hit >= 0 && mappings[querypos][hit] > maxactive[querypos]) {
+    hit = npositions[curr_querypos] - 1;
+    while (hit >= 0 && mappings[curr_querypos][hit] > maxactive[curr_querypos]) {
       --hit;
     }
     high_hit = hit + 1;
-    while (hit >= 0 && mappings[querypos][hit] >= minactive[querypos]) {
+    while (hit >= 0 && mappings[curr_querypos][hit] >= minactive[curr_querypos]) {
       --hit;
     }
     low_hit = hit + 1;
     debug9(printf("Querypos %d has hit %d..%d out of %d (minactive = %u, maxactive = %u)\n",
-		  querypos,high_hit-1,low_hit,npositions[querypos],minactive[querypos],maxactive[querypos]));
+		  curr_querypos,high_hit-1,low_hit,npositions[curr_querypos],minactive[curr_querypos],maxactive[curr_querypos]));
 
     /* Can't use nactive yet, so use high_hit - low_hit */
     if (skip_repetitive_p && high_hit - low_hit >= MAX_NACTIVE && nskipped <= MAX_SKIPPED) { /* Previously turned off */
-      debug6(printf("Too many active (%d - %d) at querypos %d.  Setting firstactive to be -1\n",high_hit,low_hit,querypos));
-      firstactive[querypos] = -1;
-      nactive[querypos] = 0;
+      debug6(printf("Too many active (%d - %d) at querypos %d.  Setting firstactive to be -1\n",high_hit,low_hit,curr_querypos));
+      firstactive[curr_querypos] = -1;
+      nactive[curr_querypos] = 0;
       nskipped++;
       debug9(printf("  %d skipped because of %d hits\n",nskipped,high_hit - low_hit + 1));
 
       /* Store most specific querypos in section of skipped */
       if (high_hit - low_hit < min_hits) {
 	min_hits = high_hit - low_hit;
-	specific_querypos = querypos;
+	specific_querypos = curr_querypos;
 	specific_low_hit = low_hit;
 	specific_high_hit = high_hit;
       }
-      querypos--;
+      curr_querypos--;
 
     } else {
       if (nskipped > MAX_SKIPPED) {
 	debug9(printf("Too many skipped.  Going back to specific querypos %d\n",specific_querypos));
-	next_querypos = querypos;
-	querypos = specific_querypos;
+	next_querypos = curr_querypos;
+	curr_querypos = specific_querypos;
 	low_hit = specific_low_hit;
 	high_hit = specific_high_hit;
       } else {
-	next_querypos = querypos - 1;
+	next_querypos = curr_querypos - 1;
       }
 
       if ((nhits = high_hit - low_hit) > 0) {
 	if (nhits == 1) {
-	  currlink = &(links[querypos][low_hit]);
-	  position = mappings[querypos][low_hit];
+	  currlink = &(links[curr_querypos][low_hit]);
+	  position = mappings[curr_querypos][low_hit];
 
-	  debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
+	  debug9(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
 	  debug9(printf("Finding link looking forward from querypos %d,%d at %ux%d (%s).  prev_querypos was %d\n",
-			querypos,low_hit,position,active[querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1));
-	  score_querypos_lookforward_one(
-#ifdef DEBUG9
-					 &fwd_tracei,
-#endif
-					 currlink,querypos,querystart,queryend,position,
-					 links,mappings,active,firstactive,
+			curr_querypos,low_hit,position,active[curr_querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1));
+	  score_querypos_lookforward_one(&fwd_tracei,currlink,curr_querypos,low_hit,querystart,queryend,position,
+					 links,fwd_scores,mappings,active,firstactive,
 					 chroffset,chrhigh,plusp,
 					 indexsize,processed,
 					 anchoredp,localp,splicingp,use_canonical_p,
 					 non_canonical_penalty);
-
-	  if (currlink->fwd_score > 0) {
-	    debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,currlink->fwd_score));
-	    best_fwd_score = currlink->fwd_score;
+	  if (fwd_scores[curr_querypos][low_hit] > 0) {
+	    debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,fwd_scores[curr_querypos][low_hit]));
+	    best_fwd_score = fwd_scores[curr_querypos][low_hit];
 	    best_fwd_hit = low_hit;
 	  }
 
 	} else {
-	  debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize));
+	  debug9(strncpy(oligo,&(queryseq_ptr[curr_querypos]),indexsize));
 	  debug9(printf("Finding links looking forward from querypos %d,%d..%d at (%u..%u) (%s).  prev_querypos was %d\n",
-			querypos,high_hit-1,low_hit,mappings[querypos][high_hit-1],mappings[querypos][low_hit],
+			curr_querypos,high_hit-1,low_hit,mappings[curr_querypos][high_hit-1],mappings[curr_querypos][low_hit],
 			oligo,processed ? Intlist_head(processed) : -1));
 	
-	  score_querypos_lookforward_mult(
-#ifdef DEBUG9
-					  &fwd_tracei,
-#endif
-					  low_hit,high_hit,querypos,querystart,queryend,
-					  /*positions*/&(mappings[querypos][low_hit]),
-					  links,mappings,active,firstactive,chroffset,chrhigh,plusp,
+	  score_querypos_lookforward_mult(&fwd_tracei,low_hit,high_hit,curr_querypos,querystart,queryend,
+					  /*positions*/&(mappings[curr_querypos][low_hit]),
+					  links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp,
 					  indexsize,processed,
 					  anchoredp,localp,splicingp,use_canonical_p,
 					  non_canonical_penalty);
 
 	  debug9(printf("Checking hits from high_hit %d to low_hit %d\n",high_hit,low_hit));
 	  for (hit = high_hit - 1; hit >= low_hit; hit--) {
-	    currlink = &(links[querypos][hit]);
-	    debug9(printf("Hit %d has score %d\n",hit,currlink->fwd_score));
-	    if (currlink->fwd_score > best_fwd_score) {
-	      best_fwd_score = currlink->fwd_score;
+	    debug9(printf("Hit %d has score %d\n",hit,fwd_scores[curr_querypos][hit]));
+	    if (fwd_scores[curr_querypos][hit] > best_fwd_score) {
+	      best_fwd_score = fwd_scores[curr_querypos][hit];
 	      best_fwd_hit = hit;
 	    }
 	  }
@@ -4350,30 +4557,30 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
       
 #ifndef SEPARATE_FWD_REV
 	debug9(printf("Overall result at querypos %d yields best_fwd_hit %d\n",
-		      querypos,best_fwd_hit));
+		      curr_querypos,best_fwd_hit));
 #else
 	debug9(printf("Overall result at querypos %d yields best_fwd_hit %d and best_rev_hit %d\n",
-		      querypos,best_fwd_hit,best_rev_hit));
+		      curr_querypos,best_fwd_hit,best_rev_hit));
 #endif
 
-	if (splicingp == true && best_fwd_hit >= 0 && links[querypos][best_fwd_hit].fwd_hit < 0 && 
-	    grand_fwd_querypos <= querylength - indexsize_query && querypos + indexsize_query <= grand_fwd_querypos) {
-	  prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]);
-	  if ((best_fwd_score = prevlink->fwd_score - (grand_fwd_querypos - querypos)) > 0) {
+	if (splicingp == true && best_fwd_hit >= 0 && links[curr_querypos][best_fwd_hit].fwd_hit < 0 && 
+	    grand_fwd_querypos <= querylength - indexsize_query && curr_querypos + indexsize_query <= grand_fwd_querypos) {
+	  if ((best_fwd_score = fwd_scores[grand_fwd_querypos][grand_fwd_hit] - (grand_fwd_querypos - curr_querypos)) > 0) {
 	    prevposition = mappings[grand_fwd_querypos][grand_fwd_hit];
 	    debug12(printf("Considering prevposition %u to position %u as a grand fwd lookforward\n",prevposition,position));
 	    for (hit = high_hit - 1; hit >= low_hit; --hit) {
-	      if ((position = mappings[querypos][hit]) + maxintronlen < prevposition) {
+	      if ((position = mappings[curr_querypos][hit]) + maxintronlen < prevposition) {
 		debug12(printf("  => Too long\n"));
 	      } else if (position + indexsize_nt <= prevposition) {
-		currlink = &(links[querypos][hit]);
+		currlink = &(links[curr_querypos][hit]);
 		currlink->fwd_consecutive = indexsize_nt;
-		/* currlink->fwd_rootnlinks = 1; */
 		currlink->fwd_pos = grand_fwd_querypos;
 		currlink->fwd_hit = grand_fwd_hit;
-		currlink->fwd_score = best_fwd_score;
-#ifdef DEBUG9
 		currlink->fwd_tracei = ++fwd_tracei;
+		/* currlink->fwd_rootnlinks = 1; */
+		fwd_scores[curr_querypos][hit] = best_fwd_score;
+#ifdef DEBUG9
+		prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]);
 		currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd;
 		currlink->fwd_intronnrev = prevlink->fwd_intronnrev;
 		currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1;
@@ -4381,19 +4588,19 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
 	      }
 	    }
 	    debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n",
-			   querypos,grand_fwd_querypos,grand_fwd_hit,prevlink->fwd_score));
+			   curr_querypos,grand_fwd_querypos,grand_fwd_hit,fwd_scores[grand_fwd_querypos][grand_fwd_hit]));
 	  }
 	}
 
 	/* Use >= to favor longer path in case of ties */
 	if (best_fwd_hit >= 0 && best_fwd_score >= grand_fwd_score && 
-	    links[querypos][best_fwd_hit].fwd_consecutive > EXON_DEFN) {
+	    links[curr_querypos][best_fwd_hit].fwd_consecutive > EXON_DEFN) {
 	  grand_fwd_score = best_fwd_score;
-	  grand_fwd_querypos = querypos;
+	  grand_fwd_querypos = curr_querypos;
 	  grand_fwd_hit = best_fwd_hit;
-	  debug12(termlink = &(links[querypos][best_fwd_hit]));
+	  debug12(termlink = &(links[curr_querypos][best_fwd_hit]));
 	  debug12(printf("At querypos %d, revising grand fwd to be hit %d with score of %d (pointing back to %d,%d)\n",
-			 querypos,best_fwd_hit,best_fwd_score,termlink->fwd_pos,termlink->fwd_hit));
+			 curr_querypos,best_fwd_hit,best_fwd_score,termlink->fwd_pos,termlink->fwd_hit));
 	}
 
 #ifdef SEPARATE_FWD_REV
@@ -4404,17 +4611,17 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
 	if (splicingp == false || use_canonical_p == false) {
 	  /* rev scores should be the same as the fwd scores */
 	} else {
-	  if (best_rev_hit >= 0 && links[querypos][best_rev_hit].rev_hit < 0 && 
-	      grand_rev_querypos <= querylength - indexsize_query && querypos + indexsize_query <= grand_rev_querypos) {
+	  if (best_rev_hit >= 0 && links[curr_querypos][best_rev_hit].rev_hit < 0 && 
+	      grand_rev_querypos <= querylength - indexsize_query && curr_querypos + indexsize_query <= grand_rev_querypos) {
 	    prevlink = &(links[grand_rev_querypos][grand_rev_hit]);
-	    if ((best_rev_score = prevlink->rev_score - (grand_rev_querypos - querypos)) > 0) {
+	    if ((best_rev_score = prevlink->rev_score - (grand_rev_querypos - curr_querypos)) > 0) {
 	      prevposition = mappings[grand_rev_querypos][grand_rev_hit];
 	      debug12(printf("Considering prevposition %u to position %u as a grand rev lookforward\n",prevposition,position));
 	      for (hit = high_hit - 1; hit >= low_hit; --hit) {
-		if ((position = mappings[querypos][hit]) + maxintronlen < prevposition) {
+		if ((position = mappings[curr_querypos][hit]) + maxintronlen < prevposition) {
 		  debug12(printf("  => Too long\n"));
 		} else if (position + indexsize_nt <= prevposition) {
-		  currlink = &(links[querypos][hit]);
+		  currlink = &(links[curr_querypos][hit]);
 		  currlink->rev_consecutive = indexsize_nt;
 		  /* currlink->rev_rootnlinks = 1; */
 		  currlink->rev_pos = grand_rev_querypos;
@@ -4429,30 +4636,30 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
 		}
 	      }
 	      debug12(printf("At querypos %d, setting all rev hits to point back to grand_rev %d,%d with a score of %d\n",
-			     querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
+			     curr_querypos,grand_rev_querypos,grand_rev_hit,prevlink->rev_score));
 	    }
 	  }
 
 	  /* Use >= to favor longer path in case of ties */
 	  if (best_rev_hit >= 0 && best_rev_score >= grand_rev_score &&
-	      links[querypos][best_rev_hit].rev_consecutive > EXON_DEFN) {
+	      links[curr_querypos][best_rev_hit].rev_consecutive > EXON_DEFN) {
 	    grand_rev_score = best_rev_score;
-	    grand_rev_querypos = querypos;
+	    grand_rev_querypos = curr_querypos;
 	    grand_rev_hit = best_rev_hit;
 	  }
 	}
 #endif
       }
 
-      revise_active_lookforward(active,firstactive,nactive,low_hit,high_hit,links,querypos);
+      revise_active_lookforward(active,firstactive,nactive,low_hit,high_hit,fwd_scores,curr_querypos);
 
-      /* Need to push querypos, even if firstactive[querypos] == -1 */
-      /* Want to skip npositions[querypos] == 0, so we can find adjacent despite mismatch or overabundance */
-      if (npositions[querypos] > 0) {
-	debug6(printf("Pushing querypos %d onto processed\n",querypos));
-	processed = Intlist_push(processed,querypos);
+      /* Need to push curr_querypos, even if firstactive[curr_querypos] == -1 */
+      /* Want to skip npositions[curr_querypos] == 0, so we can find adjacent despite mismatch or overabundance */
+      if (npositions[curr_querypos] > 0) {
+	debug6(printf("Pushing querypos %d onto processed\n",curr_querypos));
+	processed = Intlist_push(processed,curr_querypos);
       }
-      querypos = next_querypos;
+      curr_querypos = next_querypos;
     }
   }
   debug9(printf("End of loop lookforward\n"));
@@ -4488,8 +4695,8 @@ align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T *
 				      indexsize,best_overall_score,favor_right_p,cellpool);
   }
 #else
-  cells = Linkmatrix_get_cells_fwd(&(*ncells),links,querystart,queryend,npositions,
-				   favor_right_p,cellpool);
+  cells = get_cells_fwd(&(*ncells),links,fwd_scores,querystart,queryend,npositions,
+			favor_right_p,cellpool);
 #endif
 
   debug9(FREE(oligo));
@@ -4512,6 +4719,7 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
 			   bool favor_right_p, int max_nalignments, bool debug_graphic_p) {
   List_T all_paths = NULL;
   struct Link_T **links;
+  int **fwd_scores;
 
   Cell_T *cells, cell;
   int ncells, i;
@@ -4522,8 +4730,10 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
 
   if (oned_matrix_p == true) {
     links = Linkmatrix_1d_new(querylength,npositions,totalpositions);
+    fwd_scores = intmatrix_1d_new(querylength,npositions,totalpositions);
   } else {
     links = Linkmatrix_2d_new(querylength,npositions);
+    fwd_scores = intmatrix_2d_new(querylength,npositions);
   }
 
   /* These are all oligomers */
@@ -4531,7 +4741,8 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
     mappings_dump_R(mappings,npositions,querylength,/*active*/NULL,/*firstactive*/NULL,indexsize,"all.mers");
   }
   
-  cells = align_compute_scores_lookforward(&ncells,links,mappings,npositions,totalpositions,
+  cells = align_compute_scores_lookforward(&ncells,links,fwd_scores,
+					   mappings,npositions,totalpositions,
 					   oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool,
 					   querystart,queryend,querylength,
 					   
@@ -4548,7 +4759,7 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
 #ifdef SEPARATE_FWD_REV
   debug1(Linkmatrix_print_both(links,mappings,querylength,npositions,queryseq_ptr,indexsize));
 #else
-  debug1(Linkmatrix_print_fwd(links,mappings,querylength,npositions,queryseq_ptr,indexsize));
+  debug1(print_fwd(links,fwd_scores,mappings,querylength,npositions,queryseq_ptr,indexsize));
 #endif
 
   if (ncells == 0) {
@@ -4571,7 +4782,7 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
 
 
       if (debug_graphic_p == true) {
-	best_path_dump_R(links,mappings,querypos,hit,fwdp,"best.path");
+	/* best_path_dump_R(links,mappings,querypos,hit,fwdp,"best.path"); */
 	printf("plot(all.mers,col=\"black\",pch=\".\",xlab=\"Query\",ylab=\"Genomic\")\n");
 	printf("points(active.mers,col=\"red\",pch=\".\")\n");
 	printf("points(best.path,col=\"green\",pch=\".\")\n");
@@ -4583,7 +4794,7 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
 	all_paths = List_push(all_paths,(void *) traceback_one_snps(querypos,hit,links,mappings,queryseq_ptr,
 								    chroffset,chrhigh,/*watsonp*/plusp,
 #ifdef DEBUG0
-								    indexsize,
+								    fwd_scores,indexsize,
 #endif
 								    pairpool,fwdp));
       } else {
@@ -4592,7 +4803,7 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
 							       chroffset,chrhigh,/*watsonp*/plusp,/*lookbackp*/false,
 #endif
 #ifdef DEBUG0
-							       indexsize,
+							       fwd_scores,indexsize,
 #endif
 							       pairpool,fwdp));
       }
@@ -4613,8 +4824,10 @@ align_compute_lookforward (Chrpos_T **mappings, int *npositions, int totalpositi
 
   if (oned_matrix_p == true) {
     Linkmatrix_1d_free(&links);
+    intmatrix_1d_free(&fwd_scores);
   } else {
     Linkmatrix_2d_free(&links,querylength);
+    intmatrix_2d_free(&fwd_scores,querylength);
   }
 
 #if 0
@@ -4710,11 +4923,15 @@ convert_to_nucleotides (List_T path,
   debug5(printf("Beginning convert_to_nucleotides with %d pairs.  query_offset = %d, indexsize_nt = %d\n",
 		List_length(path),query_offset,indexsize_nt));
 
-  /* pairptr = path; */
-  /* path = Pairpool_pop(path,&pair); */
-  pair = (Pair_T) path->first;
-  querypos = pair->querypos;
-  genomepos = pair->genomepos;
+  if (path == NULL) {
+    return (List_T) NULL;
+  } else {
+    /* pairptr = path; */
+    /* path = Pairpool_pop(path,&pair); */
+    pair = (Pair_T) path->first;
+    querypos = pair->querypos;
+    genomepos = pair->genomepos;
+  }
 
 #ifdef PMAP
   default_fill = indexsize_nt - 3;
@@ -4979,11 +5196,15 @@ convert_to_nucleotides_snps (List_T path,
 
   debug5(printf("Beginning convert_to_nucleotides_snps with %d pairs\n",List_length(path)));
 
-  /* pairptr = path; */
-  /* path = Pairpool_pop(path,&pair); */
-  pair = (Pair_T) path->first;
-  querypos = pair->querypos;
-  genomepos = pair->genomepos;
+  if (path == NULL) {
+    return (List_T) NULL;
+  } else {
+    /* pairptr = path; */
+    /* path = Pairpool_pop(path,&pair); */
+    pair = (Pair_T) path->first;
+    querypos = pair->querypos;
+    genomepos = pair->genomepos;
+  }
 
 #ifdef PMAP
   default_fill = indexsize_nt - 3;
@@ -6334,7 +6555,7 @@ Stage2_compute_ends (char *queryseq_ptr, char *queryuc_ptr, int querylength, int
 #endif
 
 
-  if (totalpositions == 0) {
+  if (totalpositions <= 0) {
     debug(printf("Quitting because totalpositions is zero\n"));
     all_results = (List_T) NULL;
 
diff --git a/src/stage3.c b/src/stage3.c
index e448541..a3d2a74 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 198281 2016-09-24 00:55:49Z twu $";
+static char rcsid[] = "$Id: stage3.c 208645 2017-07-28 00:55:34Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -72,7 +72,10 @@ static char rcsid[] = "$Id: stage3.c 198281 2016-09-24 00:55:49Z twu $";
 #define DYNPROGINDEX_MINOR +1
 
 #define DUAL_BREAK_PROB_THRESHOLD 0.90
-#define MIN_STAGE2_FOR_DUALBREAK 3 /* was 24, but misses small exons */
+
+/* If too small, e.g., 3, misses introns with a nearby mismatch.  If too large, e.g., 24, misses small exons */
+#define MIN_STAGE2_FOR_DUALBREAK 6
+
 #define MIN_MICROEXON_LENGTH 3
 
 #define THETA_SLACK 0.10
@@ -290,6 +293,14 @@ static const Except_T coordinate_error = {"Coordinate error"};
 #define debug21(x)
 #endif
 
+#ifdef DEBUG99
+#define debug99(x) x
+#else 
+#define debug99(x)
+#endif
+
+
+
 
 static bool splicingp;
 static bool novelsplicingp;
@@ -928,6 +939,89 @@ Stage3_queryend_cmp (const void *a, const void *b) {
   }
 }
 
+int
+Stage3_chrnum_cmp (const void *a, const void *b) {
+  T x = * (T *) a;
+  T y = * (T *) b;
+
+  if (x->chrnum < y->chrnum) {
+    return -1;
+  } else if (y->chrnum < x->chrnum) {
+    return +1;
+  } else {
+    return 0;
+  }
+}
+
+
+int
+Stage3_chrnum_querystart_cmp (const void *a, const void *b) {
+  T x = * (T *) a;
+  T y = * (T *) b;
+  int x_querystart, y_querystart, x_length, y_length;
+
+  if (x->chrnum < y->chrnum) {
+    return -1;
+  } else if (y->chrnum < x->chrnum) {
+    return +1;
+  } else {
+    x_querystart = Pair_querypos(&(x->pairarray[0]));
+    y_querystart = Pair_querypos(&(y->pairarray[0]));
+
+    if (x_querystart < y_querystart) {
+      return -1;
+    } else if (y_querystart < x_querystart) {
+      return +1;
+    } else {
+      /* Put longer segments at the end so they supersede earlier chimeric matches */
+      x_length = Pair_querypos(&(x->pairarray[x->npairs-1])) - x_querystart;
+      y_length = Pair_querypos(&(y->pairarray[y->npairs-1])) - y_querystart;
+
+      if (x_length < y_length) {
+	return -1;
+      } else if (y_length < x_length) {
+	return +1;
+      } else {
+	return 0;
+      }
+    }
+  }
+}
+
+int
+Stage3_chrnum_queryend_cmp (const void *a, const void *b) {
+  T x = * (T *) a;
+  T y = * (T *) b;
+  int x_queryend, y_queryend, x_length, y_length;
+
+  if (x->chrnum < y->chrnum) {
+    return -1;
+  } else if (y->chrnum < x->chrnum) {
+    return +1;
+  } else {
+    x_queryend = Pair_querypos(&(x->pairarray[x->npairs-1]));
+    y_queryend = Pair_querypos(&(y->pairarray[y->npairs-1]));
+
+    if (x_queryend < y_queryend) {
+      return -1;
+    } else if (y_queryend < x_queryend) {
+      return +1;
+    } else {
+      /* Put longer segments at the end so they supersede earlier chimeric matches */
+      x_length = x_queryend - Pair_querypos(&(x->pairarray[0]));
+      y_length = y_queryend - Pair_querypos(&(y->pairarray[0]));
+
+      if (x_length < y_length) {
+	return -1;
+      } else if (y_length < x_length) {
+	return +1;
+      } else {
+	return 0;
+      }
+    }
+  }
+}
+
 
 int
 Stage3_identity_cmp (const void *a, const void *b) {
@@ -1285,10 +1379,10 @@ get_genomic_seg (Chrpos_T genomicpos, Univcoord_T chroffset, Univcoord_T chrhigh
 static List_T
 insert_gapholders (List_T pairs, char *queryseq_ptr, char *queryuc_ptr,
 		   Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp,
-		   Pairpool_T pairpool) {
+		   Pairpool_T pairpool, bool finalp) {
   List_T path = NULL;
   Pair_T pair, leftpair, gappair = NULL;
-  int queryjump, genomejump;
+  int queryjump, genomejump, i;
   bool firstp = true;
   char comp, c, g, g_alt;
 
@@ -1369,6 +1463,33 @@ insert_gapholders (List_T pairs, char *queryseq_ptr, char *queryuc_ptr,
       path = List_transfer_one(path,&pairs);
 #endif
 
+    } else if (finalp == true && queryjump == genomejump) {
+      /* Fill gap with nucleotides */
+      debug(printf("Filling a gap with nucleotides at %d..%d because of queryjump %d == genomejump %d\n",
+		   leftpair->querypos,pair->querypos,queryjump,genomejump));
+      for (i = 1; i <= queryjump; i++) {
+	g = get_genomic_nt(&g_alt,leftpair->genomepos+i,chroffset,chrhigh,watsonp);
+	/* It is possible for a gap with c == g to occur in the middle of a repetitive oligo, such as poly-A */
+	if ((c = queryuc_ptr[leftpair->querypos+i]) == g || c == g_alt) {
+	  comp = MATCH_COMP;
+#ifdef PMAP
+	} else if (Dynprog_consistent_p(c,g,g_alt) == true) {
+	  comp = AMBIGUOUS_COMP;
+#endif
+	} else {
+	  comp = MISMATCH_COMP;
+	}
+	debug(printf(" => query %c, genomic %c\n",queryseq_ptr[leftpair->querypos+i],g));
+
+	path = Pairpool_push(path,pairpool,leftpair->querypos+i,leftpair->genomepos+i,queryseq_ptr[leftpair->querypos+i],
+			     comp,g,g_alt,/*dynprogindex*/0);
+      }
+#ifdef WASTE
+      path = Pairpool_push_existing(path,pairpool,pair);
+#else
+      path = List_transfer_one(path,&pairs);
+#endif
+
     } else if (queryjump == 1 && genomejump == 1) {
       /* Handle a single mismatch by a simple fill */
       g = get_genomic_nt(&g_alt,leftpair->genomepos+1,chroffset,chrhigh,watsonp);
@@ -1724,7 +1845,7 @@ assign_intron_probs (List_T path, int cdna_direction, bool watsonp,
     introntype, intronlength, genomicpos;
   char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt, c2, c2_alt;
 
-  debug(printf("\n** Starting assign_intron_probs\n"));
+  debug(printf("\n** Starting assign_intron_probs with watsonp %d and cdna_direction %d\n",watsonp,cdna_direction));
   while (path != NULL) {
     /* pairptr = path; */
     /* path = Pairpool_pop(path,&pair); */
@@ -3507,8 +3628,8 @@ canonicalp (bool knowngapp, char comp, double donor_prob, double acceptor_prob,
 static int
 sufficient_splice_prob_local (int support, int nmatches, int nmismatches, double distal_spliceprob,
 			      double medial_spliceprob) {
-  debug3(printf("Checking for sufficient splice prob, based on %d matches, %d mismatches, and support %d\n",
-		nmatches,nmismatches,support));
+  debug3(printf("Checking for sufficient splice prob, based on %d matches, %d mismatches, support %d, distal spliceprob %f, and medial spliceprob %f\n",
+		nmatches,nmismatches,support,distal_spliceprob,medial_spliceprob));
   nmatches -= 2*nmismatches;
   if (nmatches < 0) {
     return (int) false;
@@ -3525,6 +3646,17 @@ sufficient_splice_prob_local (int support, int nmatches, int nmismatches, double
   }
 }
 
+static bool
+sufficient_splice_prob_strict (double distal_spliceprob, double medial_spliceprob) {
+  debug3(printf("Checking for sufficient splice prob, based on spliceprob %f, and medial spliceprob %f\n",
+		distal_spliceprob,medial_spliceprob));
+  if (distal_spliceprob > 0.95 && medial_spliceprob > 0.90) {
+    return true;
+  } else {
+    return false;
+  }
+}
+
 
 
 #if 0
@@ -3576,22 +3708,200 @@ exon_length_3 (List_T path) {
 /* Also handles case where novelsplicingp == false */
 /* pairs -> pairs */
 static List_T
-trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
-		       int cdna_direction
+trim_end5_indels (List_T pairs, int ambig_end_length,
+		  Dynprog_T dynprog, Univcoord_T chroffset, Univcoord_T chrhigh,
+		  char *queryseq_ptr, char *queryuc_ptr,
+		  int cdna_direction, bool watsonp, bool jump_late_p,
+		  Pairpool_T pairpool, double defect_rate) {
+  List_T path, exon, pairptr, p;
+  Pair_T pair, medial;
+  int max_nmatches = 0, max_nmismatches;
+  int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
+  int max_score, score;
+  bool nearindelp = false;
+  int nindels;
+
+  int finalscore, continuous_nmatches, continuous_nmismatches, continuous_nopens, continuous_nindels;
+  int querydp3_medialgap, genomedp3_medialgap, queryjump, genomejump;
+  List_T continuous_gappairs_medialgap;
+  int dynprogindex_minor = 0;
+
+  debug3(printf("Starting trim_end5_indels\n"));
+
+  /* Handle first exon */
+  if (pairs == NULL) {
+    /* *trim5p = false; */
+    return (List_T) NULL;
+  } else if (ambig_end_length > 0) {
+    /* Don't mess with ambiguous end */
+    /* *trim5p = false; */
+    return pairs;
+  } else {
+    pair = pairs->first;
+    debug3(printf("querystart %d\n",pair->querypos));
+  }
+
+  exon = (List_T) NULL;
+  while (pairs != NULL && pair->comp != INDEL_COMP) {
+    pairptr = pairs;
+    pairs = Pairpool_pop(pairs,&pair);
+#ifdef WASTE
+    exon = Pairpool_push_existing(exon,pairpool,pair);
+#else
+    exon = List_push_existing(exon,pairptr);
+#endif
+  }
+
+  while (pairs != NULL && ((Pair_T) pairs->first)->comp == INDEL_COMP) {
+    pairptr = pairs;
+    pairs = Pairpool_pop(pairs,&pair);
 #ifdef WASTE
-		       , Pairpool_T pairpool
+    exon = Pairpool_push_existing(exon,pairpool,pair);
+#else
+    exon = List_push_existing(exon,pairptr);
+#endif
+  }
+  debug3(printf("End exon:\n"));
+  debug3(Pair_dump_list(exon,true));
+
+
+  if (exon == NULL) {
+    /* *trim5p = false; */
+    return pairs;
+
+  } else {
+    p = exon;
+    nindels = 1;
+    while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) {
+      p = List_next(p);
+      nindels++;
+    }
+
+    max_nmatches = max_nmismatches = 0;
+    nmatches = nmismatches = 0;
+    max_score = score = 0;
+    /* Evaluate region distal to indel */
+    while (p != NULL) {
+      pair = (Pair_T) List_head(p);
+      if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) {
+	score += 1;
+	nmatches += 1;
+      } else {
+	score -= 3;
+	nmismatches += 1;
+      }
+      if (score > max_score) {
+	max_score = score;
+	max_nmatches = nmatches;
+	max_nmismatches = nmismatches;
+      }
+      debug3(printf("5' querypos %d => score %d, max_nmatches %d, max_nmismatches %d\n",
+		    pair->querypos,score,max_nmatches,max_nmismatches));
+      p = List_next(p);
+    }
+
+#if 0
+    for ( i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
+      medial = (Pair_T) p->first;
+      if (medial->gapp) {
+	debug3(printf("Saw splice medial to 5' end indel\n"));
+	nearindelp = true;
+      } else if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) {
+	/* Skip */
+      } else {
+	debug3(printf("Saw mismatch %c medial to 5' end indel\n",medial->comp));
+      }
+    }
 #endif
-		       ) {
+
+    debug3(printf("Before indel/gap, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches));
+    if (pairs == NULL) {
+      debug3(printf("No indel/gap\n"));
+      path = exon;
+      /* *trim5p = false; */
+
+    } else if (exon == NULL) {
+      debug3(printf("No 5' exon\n"));
+      path = exon;
+      /* *trim5p = false; */
+
+#if 0
+    } else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) {
+      debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches));
+      path = (List_T) NULL;
+      /* *trim5p = true; */
+#endif
+
+    } else {
+      querydp3_medialgap = ((Pair_T) pairs->first)->querypos - 1;
+      genomedp3_medialgap = ((Pair_T) pairs->first)->genomepos - 1;
+      queryjump = querydp3_medialgap + 1;
+      genomejump = queryjump /*+ extramaterial_end*/;
+
+      continuous_gappairs_medialgap = Dynprog_end5_gap(&dynprogindex_minor,&finalscore,
+						       &continuous_nmatches,&continuous_nmismatches,&continuous_nopens,&continuous_nindels,
+						       dynprog,&(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]),
+						       queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap,
+						       chroffset,chrhigh,watsonp,jump_late_p,pairpool,
+						       extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true);
+      debug(printf("CONTINUOUS AT 5 (trim_end5_indels)?\n"));
+      debug(Pair_dump_list(continuous_gappairs_medialgap,true));
+      debug3(printf("continuous finalscore %d\n",finalscore));
+
+      if (finalscore > 0) {
+	debug3(printf("Using continuous\n"));
+        path = continuous_gappairs_medialgap;
+	/* *trim5p = false; */
+	
+      } else if (score < 0) {
+	debug3(printf("Not enough matches, so trimming it\n"));
+	path = (List_T) NULL;
+	/* *trim5p = true; */
+
+      } else {
+	debug3(printf("Using indel, because score %d > 0\n",score));
+	path = exon;		/* exon already has the indel */
+	/* *trim5p = false; */
+      }
+    }
+
+    path = Pairpool_transfer(path,pairs);
+
+    pairs = List_reverse(path);
+    pairs = clean_pairs_end5(pairs,ambig_end_length);
+
+    debug3(printf("End of trim_end5_indels: length = %d\n",List_length(pairs)));
+    debug3(Pair_dump_list(pairs,true));
+    return pairs;
+  }
+}
+
+
+/* Also handles case where novelsplicingp == false */
+/* pairs -> pairs */
+static List_T
+trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
+		 Dynprog_T dynprog, Univcoord_T chroffset, Univcoord_T chrhigh,
+		 char *queryseq_ptr, char *queryuc_ptr,
+		 int cdna_direction, bool watsonp, bool jump_late_p,
+		 Pairpool_T pairpool, double defect_rate) {
   List_T path, exon, pairptr, p;
-  Pair_T pair, medial, splice = NULL, gappair;
+  Pair_T pair, splice = NULL, gappair;
   int max_nmatches = 0, max_nmismatches;
   int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
   int max_score, score;
-  bool nearindelp = false;
+  /* bool nearindelp = false; */
   double medial_prob;
-  int nindels;
 
-  debug3(printf("Starting trim_end5_exon_indels\n"));
+  int finalscore, continuous_nmatches, continuous_nmismatches, continuous_nopens, continuous_nindels;
+  int querydp3_medialgap, genomedp3_medialgap, queryjump, genomejump;
+  List_T continuous_gappairs_medialgap;
+  int dynprogindex_minor = 0;
+
+
+  debug3(printf("Starting trim_end5_exons with ambig_end_length %d\n",ambig_end_length));
+
+  *indelp = false;
 
   /* Handle first exon */
   if (pairs == NULL) {
@@ -3660,67 +3970,25 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
   debug3(Pair_dump_one(gappair,true));
   debug3(printf("\n"));
 
-  if (gappair->comp == INDEL_COMP) {
-    /* Handle end indel.  No longer possible, since we stop only at gapp */
-    /* indel = pair; */
-    
-    p = pairs;
-    nindels = 1;
-    while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) {
-      p = List_next(p);
-      nindels++;
-    }
-
-    for ( i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
-      medial = (Pair_T) p->first;
-      if (medial->gapp) {
-	debug3(printf("Saw splice medial to 5' end indel\n"));
-	splice = medial;
-	nearindelp = true;
-      } else if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) {
-	/* Skip */
-      } else {
-	debug3(printf("Saw mismatch %c medial to 5' end indel\n",medial->comp));
-      }
-    }
-
-  } else {
-    /* Handle end exon */
-    splice = gappair;
-    debug3(printf("5' end splice length: %d\n",splice->genomejump));
-
-    for (p = pairs, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
-      medial = (Pair_T) p->first;
-      if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) {
-	/* Skip */
-      } else if (medial->comp == INDEL_COMP || medial->comp == SHORTGAP_COMP) {
-	debug3(printf("Saw indel medial to 5' end intron\n"));
-	nearindelp = true;
-      } else {
-	debug3(printf("Saw mismatch %c medial to 5' end intron\n",medial->comp));
-      }
-    }
+  /* Handle end exon */
+  splice = gappair;
+  debug3(printf("5' end splice length: %d\n",splice->genomejump));
 
 #if 0
-    /* No longer possible, since we stop at first indel */
-    if (exon != NULL) {
-      /* Skip first pair of exon, which holds the gap */
-      for (p = List_next(exon), i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
-	distal = (Pair_T) p->first;
-	if (distal->comp == MATCH_COMP || distal->comp == DYNPROG_MATCH_COMP || distal->comp == AMBIGUOUS_COMP) {
-	  /* Skip */
-	} else if (distal->comp == INDEL_COMP || distal->comp == SHORTGAP_COMP) {
-	  debug3(printf("Saw indel distal to 5' end intron\n"));
-	  nearindelp = true;
-	} else {
-	  debug3(printf("Saw mismatch %c distal to 5' end intron\n",distal->comp));
-	}
-      }
+  for (p = pairs, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
+    medial = (Pair_T) p->first;
+    if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) {
+      /* Skip */
+    } else if (medial->comp == INDEL_COMP || medial->comp == SHORTGAP_COMP) {
+      debug3(printf("Saw indel medial to 5' end intron\n"));
+      nearindelp = true;
+    } else {
+      debug3(printf("Saw mismatch %c medial to 5' end intron\n",medial->comp));
     }
-#endif
   }
+#endif
 
-  debug3(printf("Before indel/gap, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches));
+  debug3(printf("Before end intron, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches));
   if (pairs == NULL) {
     debug3(printf("No indel/gap\n"));
     path = exon;
@@ -3746,44 +4014,12 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
     *trim5p = false;
 #endif
 
+#if 0
   } else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) {
     debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches));
     path = (List_T) NULL;
     *trim5p = true;
-
-  } else if (splice == NULL) {
-    debug3(printf("nindels %d\n",nindels));
-    if (max_nmatches < min_indel_end_matches) {
-      debug3(printf("Not enough matches %d < %d, so trimming it\n",max_nmatches,min_indel_end_matches));
-      path = (List_T) NULL;
-      *trim5p = true;
-
-    } else if (nindels > 3) {
-      /* Large indel */
-      if (max_nmatches - max_nmismatches > nindels) {
-	debug3(printf("Large indel: More matches than mismatches, so keeping it\n"));
-	path = exon;		/* exon already has the indel */
-	*trim5p = false;
-
-      } else {
-	debug3(printf("Large indel: Trimming it\n"));
-	path = (List_T) NULL;
-	*trim5p = true;
-      }
-
-    } else {
-      /* Small indel */
-      if (max_nmatches - max_nmismatches > 2) {
-	debug3(printf("Small indel: More matches than mismatches, so keeping it\n"));
-	path = exon;		/* exon already has the indel */
-	*trim5p = false;
-
-      } else {
-	debug3(printf("Small indel: Trimming it\n"));
-	path = (List_T) NULL;
-	*trim5p = true;
-      }
-    }
+#endif
 
   } else {
     if (splice->genomejump > maxintronlen_ends) {
@@ -3824,25 +4060,58 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
       *trim5p = true;
 #endif
 
-    } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches,
-					    /*distal_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob,
+    } else if (sufficient_splice_prob_strict(/*distal_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob,
 					    /*medial_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob)) {
-      /* Want to keep for comparison of fwd and rev, even if probabilities are poor */
       debug3(printf("Keeping first 5' exon with %d matches and %d mismatches\n",max_nmatches,max_nmismatches));
       path = exon;		/* exon already has the gap */
       *trim5p = false;
 
     } else {
-      debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 5' exon\n",splice->donor_prob,splice->acceptor_prob));
+      querydp3_medialgap = ((Pair_T) pairs->first)->querypos - 1;
+      genomedp3_medialgap = ((Pair_T) pairs->first)->genomepos - 1;
+      queryjump = querydp3_medialgap + 1;
+      genomejump = queryjump + extramaterial_end;
+
+      continuous_gappairs_medialgap = Dynprog_end5_gap(&dynprogindex_minor,&finalscore,
+						       &continuous_nmatches,&continuous_nmismatches,&continuous_nopens,&continuous_nindels,
+						       dynprog,&(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]),
+						       queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap,
+						       chroffset,chrhigh,watsonp,jump_late_p,pairpool,
+						       extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true);
+      debug(printf("CONTINUOUS AT 5 (trim_end5_exons)?\n"));
+      debug(Pair_dump_list(continuous_gappairs_medialgap,true));
+      debug3(printf("continuous finalscore %d\n",finalscore));
+
+      if (finalscore > 0) {
+        path = continuous_gappairs_medialgap;
+	if (continuous_nindels > 0) {
+	  *trim5p = true;	/* So calling procedure iterates */
+	  *indelp = true;	/* So calling procedure will call trim_end5_indels */
+	} else {
+	  *trim5p = false;
+	}
+	
+      } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches,
+	                                      /*distal_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob,
+					      /*medial_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob)) {
+        /* Want to keep for comparison of fwd and rev, even if probabilities are poor */
+        debug3(printf("Keeping first 5' exon with %d matches and %d mismatches\n",max_nmatches,max_nmismatches));
+	path = exon;		/* exon already has the gap */
+	*trim5p = false;
 
-      medial_prob = (cdna_direction >= 0) ? splice->acceptor_prob : splice->donor_prob;
-      if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true &&
-	  medial_prob > 0.95) {
-	*trim5p = false;		/* Not really, since we are trimming, but this stops further work */
       } else {
-	*trim5p = true;
+	/* TODO: Set ambig_end_length_5 here, so default output shows a donor or acceptor end type */
+	debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 5' exon\n",splice->donor_prob,splice->acceptor_prob));
+	
+	medial_prob = (cdna_direction >= 0) ? splice->acceptor_prob : splice->donor_prob;
+	if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true &&
+	    medial_prob > 0.95) {
+	  *trim5p = false;		/* Not really, since we are trimming, but this stops further work */
+	} else {
+	  *trim5p = true;
+	}
+	path = (List_T) NULL;
       }
-      path = (List_T) NULL;
     }
   }
 
@@ -3859,32 +4128,208 @@ trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs,
   pairs = List_reverse(path);
   pairs = clean_pairs_end5(pairs,ambig_end_length);
 
-  debug3(printf("End of trim_end5_exon_indels: length = %d\n",List_length(pairs)));
+  debug3(printf("End of trim_end5_exons: length = %d\n",List_length(pairs)));
   debug3(Pair_dump_list(pairs,true));
   return pairs;
 }
 
 
-
 /* Also handles case where novelsplicingp == false */
 /* path -> path */
 static List_T
-trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
-		       int cdna_direction
+trim_end3_indels (List_T path, int ambig_end_length,
+		  Dynprog_T dynprog, Univcoord_T chroffset, Univcoord_T chrhigh,
+		  char *queryseq_ptr, char *queryuc_ptr, int querylength,
+		  int cdna_direction, bool watsonp, bool jump_late_p,
+		  Pairpool_T pairpool, double defect_rate) {
+  List_T pairs, exon, pairptr, p;
+  Pair_T pair, medial;
+  int max_nmatches = 0, max_nmismatches;
+  int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
+  int max_score, score;
+  bool nearindelp = false;
+  int nindels;
+
+  int finalscore, continuous_nmatches, continuous_nmismatches, continuous_nopens, continuous_nindels;
+  int querydp5_medialgap, genomedp5_medialgap, queryjump, genomejump;
+  List_T continuous_gappairs_medialgap;
+  int dynprogindex_minor = 0;
+
+  debug3(printf("Starting trim_end3_indels\n"));
+
+  /* Handle last exon */
+  if (path == NULL) {
+    /* *trim3p = false; */
+    return (List_T) NULL;
+  } else if (ambig_end_length > 0) {
+    /* Don't mess with ambiguous end */
+    /* *trim3p = false; */
+    return path;
+  } else {
+    pair = path->first;
+    debug3(printf("queryend %d\n",pair->querypos));
+  }
+
+  exon = (List_T) NULL;
+  while (path != NULL && pair->comp != INDEL_COMP) {
+    pairptr = path;
+    path = Pairpool_pop(path,&pair);
 #ifdef WASTE
-		       , Pairpool_T pairpool
+    exon = Pairpool_push_existing(exon,pairpool,pair);
+#else
+    exon = List_push_existing(exon,pairptr);
 #endif
-		       ) {
+  }
+
+  while (path != NULL && ((Pair_T) path->first)->comp == INDEL_COMP) {
+    pairptr = path;
+    path = Pairpool_pop(path,&pair);
+#ifdef WASTE
+    exon = Pairpool_push_existing(exon,pairpool,pair);
+#else
+    exon = List_push_existing(exon,pairptr);
+#endif
+  }
+  debug3(printf("End exon:\n"));
+  debug3(Pair_dump_list(exon,true));
+
+
+  if (exon == NULL) {
+    /* *trim3p = false; */
+    return path;
+
+  } else {
+    p = exon;
+    nindels = 1;
+    while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) {
+      p = List_next(p);
+      nindels++;
+    }
+
+    max_nmatches = max_nmismatches = 0;
+    nmatches = nmismatches = 0;
+    max_score = score = 0;
+    /* Evaluate region distal to indel */
+    while (p != NULL) {
+      pair = (Pair_T) List_head(p);
+      if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) {
+	score += 1;
+	nmatches += 1;
+      } else {
+	score -= 3;
+	nmismatches += 1;
+      }
+      if (score > max_score) {
+	max_score = score;
+	max_nmatches = nmatches;
+	max_nmismatches = nmismatches;
+      }
+      debug3(printf("3' querypos %d => score %d, max_nmatches %d, max_nmismatches %d\n",
+		    pair->querypos,score,max_nmatches,max_nmismatches));
+      p = List_next(p);
+    }
+
+#if 0
+    for ( i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
+      medial = (Pair_T) p->first;
+      if (medial->gapp) {
+	debug3(printf("Saw splice medial to 3' end indeln"));
+	nearindelp = true;
+      } else if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) {
+	/* Skip */
+      } else {
+	debug3(printf("Saw mismatch medial %c to 3' end indel\n",medial->comp));
+      }
+    }
+#endif
+
+    debug3(printf("Before indel/gap, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches));
+    if (path == NULL) {
+      debug3(printf("No indel/gap\n"));
+      pairs = exon;
+      /* *trim3p = false; */
+
+    } else if (exon == NULL) {
+      debug3(printf("No 3' exon\n"));
+      pairs = exon;
+      /* *trim3p = false; */
+
+#if 0
+    } else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) {
+      debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches));
+      pairs = (List_T) NULL;
+      /* *trim3p = true; */
+#endif
+    
+    } else {
+      querydp5_medialgap = ((Pair_T) path->first)->querypos + 1;
+      genomedp5_medialgap = ((Pair_T) path->first)->genomepos + 1;
+      queryjump = querylength - querydp5_medialgap;
+      genomejump = queryjump /*+ extramaterial_end*/;
+
+      continuous_gappairs_medialgap = Dynprog_end3_gap(&dynprogindex_minor,&finalscore,
+						       &continuous_nmatches,&continuous_nmismatches,&continuous_nopens,&continuous_nindels,
+						       dynprog,&(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]),
+						       queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap,
+						       chroffset,chrhigh,watsonp,jump_late_p,pairpool,
+						       extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true);
+      debug(printf("CONTINUOUS AT 3 (trim_end3_indels)?\n"));
+      debug(Pair_dump_list(continuous_gappairs_medialgap,true));
+      debug3(printf("continuous finalscore %d\n",finalscore));
+
+      if (finalscore > 0) {
+	debug3(printf("Using continuous\n"));
+	pairs = List_reverse(continuous_gappairs_medialgap);
+	/* *trim3p = false; */
+	
+      } else if (score < 0) {
+	debug3(printf("Not enough matches, so trimming it\n"));
+	pairs = (List_T) NULL;
+	/* *trim3p = true; */
+
+      } else {
+	debug3(printf("Using indel, because score %d > 0\n",score));
+	pairs = exon;
+	/* *trim3p = false; */
+      }
+    }
+
+    pairs = Pairpool_transfer(pairs,path);
+
+    path = List_reverse(pairs);
+    path = clean_path_end3(path,ambig_end_length);
+
+    debug3(printf("End of trim_end3_indels: length = %d\n",List_length(path)));
+    debug3(Pair_dump_list(path,true));
+    return path;
+  }
+}
+
+
+/* Also handles case where novelsplicingp == false */
+/* path -> path */
+static List_T
+trim_end3_exons (bool *indelp, bool *trim3p, int ambig_end_length, List_T path,
+		 Dynprog_T dynprog, Univcoord_T chroffset, Univcoord_T chrhigh,
+		 char *queryseq_ptr, char *queryuc_ptr, int querylength,
+		 int cdna_direction, bool watsonp, bool jump_late_p,
+		 Pairpool_T pairpool, double defect_rate) {
   List_T pairs, exon, pairptr, p;
-  Pair_T pair, medial, splice = NULL, gappair;
+  Pair_T pair, splice = NULL, gappair;
   int max_nmatches = 0, max_nmismatches;
   int nmatches = 0, nmismatches /* = -1 because of the gap */, i;
   int max_score, score;
-  bool nearindelp = false;
+  /* bool nearindelp = false; */
   double medial_prob;
-  int nindels;
 
-  debug3(printf("Starting trim_end3_exon_indels\n"));
+  int finalscore, continuous_nmatches, continuous_nmismatches, continuous_nopens, continuous_nindels;
+  int querydp5_medialgap, genomedp5_medialgap, queryjump, genomejump;
+  List_T continuous_gappairs_medialgap;
+  int dynprogindex_minor = 0;
+
+  debug3(printf("Starting trim_end3_exons with ambig_end_length %d\n",ambig_end_length));
+
+  *indelp = false;
 
   /* Handle last exon */
   if (path == NULL) {
@@ -3908,7 +4353,7 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
   }
 
   exon = (List_T) NULL;
-  while (path != NULL && !pair->gapp /*&& pair->comp != INDEL_COMP*/) {
+  while (path != NULL && !pair->gapp) {
     pairptr = path;
     path = Pairpool_pop(path,&pair);
 #ifdef WASTE
@@ -3953,69 +4398,27 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
   debug3(Pair_dump_one(gappair,true));
   debug3(printf("\n"));
 
-  if (gappair->comp == INDEL_COMP) {
-    /* Handle end indel.  No longer possible, since we stop only at gapp */
-    /* indel = pair; */
-
-    p = path;
-    nindels = 1;
-    while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) {
-      p = List_next(p);
-      nindels++;
-    }
-
-    for ( i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
-      medial = (Pair_T) p->first;
-      if (medial->gapp) {
-	debug3(printf("Saw splice medial to 3' end indeln"));
-	splice = medial;
-	nearindelp = true;
-      } else if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) {
-	/* Skip */
-      } else {
-	debug3(printf("Saw mismatch medial %c to 3' end indel\n",medial->comp));
-      }
-    }
-
-  } else {
-    /* Handle end exon */
-    splice = gappair;
-    debug3(printf("3' end splice length: %d\n",splice->genomejump));
-
-    for (p = path, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
-      medial = (Pair_T) p->first;
-      if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) {
-	/* Skip */
-      } else if (medial->comp == INDEL_COMP || medial->comp == SHORTGAP_COMP) {
-	debug3(printf("Saw indel medial to 3' end intron\n"));
-	nearindelp = true;
-      } else {
-	debug3(printf("Saw mismatch medial %c to 3' end intron\n",medial->comp));
-      }
-    }
+  /* Handle end exon */
+  splice = gappair;
+  debug3(printf("3' end splice length: %d\n",splice->genomejump));
 
 #if 0
-    /* No longer possible, since we stop at first indel */
-    if (exon != NULL) {
-      /* Skip first pair of exon, which holds the gap */
-      for (p = List_next(exon), i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
-	distal = (Pair_T) p->first;
-	if (distal->comp == MATCH_COMP || distal->comp == DYNPROG_MATCH_COMP || distal->comp == AMBIGUOUS_COMP) {
-	  /* Skip */
-	} else if (distal->comp == INDEL_COMP || distal->comp == SHORTGAP_COMP) {
-	  debug3(printf("Saw indel distal to 3' end intron\n"));
-	  nearindelp = true;
-	} else {
-	  debug3(printf("Saw mismatch %c distal to 3' end intron\n",distal->comp));
-	}
-      }
+  for (p = path, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) {
+    medial = (Pair_T) p->first;
+    if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) {
+      /* Skip */
+    } else if (medial->comp == INDEL_COMP || medial->comp == SHORTGAP_COMP) {
+      debug3(printf("Saw indel medial to 3' end intron\n"));
+      nearindelp = true;
+    } else {
+      debug3(printf("Saw mismatch medial %c to 3' end intron\n",medial->comp));
     }
-#endif
   }
+#endif
 
-  debug3(printf("Before indel/gap, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches));
+  debug3(printf("Before end intron, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches));
   if (path == NULL) {
-    debug3(printf("No indel/gap\n"));
+    debug3(printf("No gap\n"));
     pairs = exon;
     *trim3p = false;
 
@@ -4039,45 +4442,13 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
     *trim3p = false;
 #endif
 
+#if 0
   } else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) {
     debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches));
     pairs = (List_T) NULL;
     *trim3p = true;
+#endif
     
-  } else if (splice == NULL) {
-    debug3(printf("nindels %d\n",nindels));
-    if (max_nmatches < min_indel_end_matches) {
-      debug3(printf("Not enough matches %d < %d, so trimming it\n",max_nmatches,min_indel_end_matches));
-      pairs = (List_T) NULL;
-      *trim3p = true;
-
-    } else if (nindels > 3) {
-      /* Large indel */
-      if (max_nmatches - max_nmismatches > nindels) {
-	debug3(printf("Large indel: More matches than mismatches, so keeping it\n"));
-	pairs = exon;		/* exon already has the indel */
-	*trim3p = false;
-
-      } else {
-	debug3(printf("Large indel: Trimming it\n"));
-	pairs = (List_T) NULL;
-	*trim3p = true;
-      }
-
-    } else {
-      /* Small indel */
-      if (max_nmatches - max_nmismatches > 2) {
-	debug3(printf("Small indel: More matches than mismatches, so keeping it\n"));
-	pairs = exon;		/* exon already has the indel */
-	*trim3p = false;
-
-      } else {
-	debug3(printf("Small indel: Trimming it\n"));
-	pairs = (List_T) NULL;
-	*trim3p = true;
-      }
-    }
-
   } else {
     if (splice->genomejump > maxintronlen_ends) {
       debug3(printf("End intron is too long, so trimming it\n"));
@@ -4117,25 +4488,58 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
       *trim3p = true;
 #endif
 
-    } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches,
-					    /*distal_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob,
-					    /*medial_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob)) {
-      /* Want to keep for comparison of fwd and rev, even if probabilities are poor */
+    } else if (sufficient_splice_prob_strict(/*distal_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob,
+					     /*medial_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob)) {
       debug3(printf("Keeping last 3' exon with %d matches and %d mismatches\n",max_nmatches,max_nmismatches));
       pairs = exon;		/* exon already has the gap */
       *trim3p = false;
-
+	
     } else {
-      debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 3' exon\n",splice->donor_prob,splice->acceptor_prob));
+      querydp5_medialgap = ((Pair_T) path->first)->querypos + 1;
+      genomedp5_medialgap = ((Pair_T) path->first)->genomepos + 1;
+      queryjump = querylength - querydp5_medialgap;
+      genomejump = queryjump + extramaterial_end;
+
+      continuous_gappairs_medialgap = Dynprog_end3_gap(&dynprogindex_minor,&finalscore,
+						       &continuous_nmatches,&continuous_nmismatches,&continuous_nopens,&continuous_nindels,
+						       dynprog,&(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]),
+						       queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap,
+						       chroffset,chrhigh,watsonp,jump_late_p,pairpool,
+						       extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true);
+      debug(printf("CONTINUOUS AT 3 (trim_end3_exons)?\n"));
+      debug(Pair_dump_list(continuous_gappairs_medialgap,true));
+      debug3(printf("continuous finalscore %d\n",finalscore));
+
+      if (finalscore > 0) {
+	pairs = List_reverse(continuous_gappairs_medialgap);
+	if (continuous_nindels > 0) {
+	  *trim3p = true;	/* So calling procedure iterates */
+	  *indelp = true; /* So calling procedure will call trim_end3_indels */
+	} else {
+	  *trim3p = false;
+	}
+	
+      } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches,
+					      /*distal_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob,
+					      /*medial_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob)) {
+	/* Want to keep for comparison of fwd and rev, even if probabilities are poor */
+	debug3(printf("Keeping last 3' exon with %d matches and %d mismatches\n",max_nmatches,max_nmismatches));
+	pairs = exon;		/* exon already has the gap */
+	*trim3p = false;
 
-      medial_prob = (cdna_direction >= 0) ? splice->donor_prob : splice->acceptor_prob;
-      if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true &&
-	  medial_prob > 0.95) {
-	*trim3p = false;		/* Not really, since we are trimming, but this stops further work */
       } else {
-	*trim3p = true;
+	/* TODO: Set ambig_end_length_3 here, so default output shows a donor or acceptor end type */
+	debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 3' exon\n",splice->donor_prob,splice->acceptor_prob));
+	
+	medial_prob = (cdna_direction >= 0) ? splice->donor_prob : splice->acceptor_prob;
+	if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true &&
+	    medial_prob > 0.95) {
+	  *trim3p = false;		/* Not really, since we are trimming, but this stops further work */
+	} else {
+	  *trim3p = true;
+	}
+	pairs = (List_T) NULL;
       }
-      pairs = (List_T) NULL;
     }
   }
 
@@ -4151,7 +4555,7 @@ trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path,
   path = List_reverse(pairs);
   path = clean_path_end3(path,ambig_end_length);
 
-  debug3(printf("End of trim_noncanonical_end3_exons: length = %d\n",List_length(path)));
+  debug3(printf("End of trim_end3_exons: length = %d\n",List_length(path)));
   debug3(Pair_dump_list(path,true));
   return path;
 }
@@ -5072,6 +5476,8 @@ Stage3_new (struct Pair_T *pairarray, List_T pairs, int npairs, int goodness, in
   bool intronp;
   int hardclip_start, hardclip_end;
 
+  assert(pairs != NULL);
+
   start = &(pairarray[0]);
   end = &(pairarray[npairs-1]);
   hardclip_start = start->querypos;
@@ -5079,15 +5485,17 @@ Stage3_new (struct Pair_T *pairarray, List_T pairs, int npairs, int goodness, in
 
   cigar_tokens = Pair_compute_cigar(&intronp,&hardclip_start,&hardclip_end,pairarray,npairs,querylength,
 				    watsonp,sensedir,/*chimera_part*/0);
-  if (Pair_tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end != querylength) {
+  if (Pair_cigar_length(cigar_tokens) + hardclip_start + hardclip_end != querylength) {
     fprintf(stderr,"Could not compute a valid cigar from the following alignment: %d + %d + %d != %d\n",
-	    Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+	    Pair_cigar_length(cigar_tokens),hardclip_start,hardclip_end,querylength);
     Pair_dump_array_stderr(pairarray,npairs,/*zerobasedp*/true);
     Pair_tokens_free(&cigar_tokens);
     return (T) NULL;
 
   } else {
     new = (T) MALLOC_OUT(sizeof(*new)); /* Matches FREE_OUT in Stage3_free */
+    debug99(printf("Creating %p\n",new));
+
     new->cigar_tokens = cigar_tokens;
     new->intronp = intronp;
   }
@@ -5119,9 +5527,9 @@ Stage3_new (struct Pair_T *pairarray, List_T pairs, int npairs, int goodness, in
   if (cdna_direction == 0 && require_splicedir_p == true) {
     new->cdna_direction = Pair_guess_cdna_direction_array(&new->sensedir,pairarray,npairs,/*invertedp*/false,
 							  chroffset,watsonp);
-  } else if (ncanonical == 0 && nsemicanonical == 0 && nnoncanonical == 0) {
+  } else if (ncanonical == 0 && nsemicanonical == 0 /*&& nnoncanonical == 0*/) {
     new->cdna_direction = 0;
-    new->sensedir = sensedir;
+    new->sensedir = SENSE_NULL;	/* was sensedir, but this gives bad XS output */
   } else {
     new->cdna_direction = cdna_direction;
     new->sensedir = sensedir;
@@ -5192,6 +5600,7 @@ Stage3_new (struct Pair_T *pairarray, List_T pairs, int npairs, int goodness, in
 void
 Stage3_free (T *old) {
 
+  debug99(printf("Freeing %p\n",*old));
   if (*old) {
     /* Don't free strain.  Belongs to altstrain_iit. */
     Pair_tokens_free(&(*old)->cigar_tokens);
@@ -5514,6 +5923,7 @@ Stage3_print_pathsummary (Filestring_T fp, T this, int pathnum, Univ_IIT_T chrom
   Pair_T start, end;
   bool referencealignp;
 
+  debug99(printf("Printing %p\n",this));
   start = &(this->pairarray[0]);
   end = &(this->pairarray[this->npairs-1]);
   referencealignp = this->straintype == 0 ? true : false;
@@ -5605,6 +6015,19 @@ Stage3_print_gff3 (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_i
 }
 
 
+#ifndef PMAP
+void
+Stage3_print_bedpe (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_iit,
+		    Sequence_T queryseq, int querylength) {
+  Pair_print_bedpe(fp,this->pairarray,this->npairs,
+		   this->chrnum,querylength,this->watsonp,this->cdna_direction,
+		   chromosome_iit);
+  return;
+}
+#endif
+
+
+
 #ifndef GSNAP
 #ifndef PMAP
 /* Only for GMAP program */
@@ -5631,7 +6054,8 @@ Stage3_print_sam (Filestring_T fp, char *abbrev, T this, int pathnum, int npaths
     Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,this->cigar_tokens,this->intronp,
 		   Sequence_accession(queryseq),/*acc2*/NULL,this->chrnum,chromosome_iit,usersegment,
 		   Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
-		   /*hardclip5*/0,/*hardclip3*/querylength-this->circularpos,querylength,
+		   /*hardclip5*/0,/*hardclip3*/querylength-this->circularpos,
+		   /*mate_hardclip_low*/0,/*mate_hardclip_high*/0,querylength,
 		   this->watsonp,this->sensedir,chimera_part,chimera,
 		   quality_shift,Sequence_firstp(queryseq),
 		   pathnum,npaths_primary,npaths_altloc,absmq_score,second_absmq,chrpos,this->chrlength,
@@ -5640,7 +6064,8 @@ Stage3_print_sam (Filestring_T fp, char *abbrev, T this, int pathnum, int npaths
     Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,this->cigar_tokens,this->intronp,
 		   Sequence_accession(queryseq),/*acc2*/NULL,this->chrnum,chromosome_iit,usersegment,
 		   Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
-		   /*hardclip5*/this->circularpos,/*hardclip3*/0,querylength,
+		   /*hardclip5*/this->circularpos,/*hardclip3*/0,
+		   /*mate_hardclip_low*/0,/*mate_hardclip_high*/0,querylength,
 		   this->watsonp,this->sensedir,chimera_part,chimera,
 		   quality_shift,Sequence_firstp(queryseq),
 		   pathnum,npaths_primary,npaths_altloc,absmq_score,second_absmq,/*chrpos*/1,this->chrlength,
@@ -5650,7 +6075,8 @@ Stage3_print_sam (Filestring_T fp, char *abbrev, T this, int pathnum, int npaths
     Pair_print_sam(fp,abbrev,this->pairarray,this->npairs,this->cigar_tokens,this->intronp,
 		   Sequence_accession(queryseq),/*acc2*/NULL,this->chrnum,chromosome_iit,usersegment,
 		   Sequence_fullpointer(queryseq),Sequence_quality_string(queryseq),
-		   /*hardclip5*/0,/*hardclip3*/0,querylength,
+		   /*hardclip5*/0,/*hardclip3*/0,
+		   /*mate_hardclip_low*/0,/*mate_hardclip_high*/0,querylength,
 		   this->watsonp,this->sensedir,chimera_part,chimera,
 		   quality_shift,Sequence_firstp(queryseq),
 		   pathnum,npaths_primary,npaths_altloc,absmq_score,second_absmq,chrpos,this->chrlength,
@@ -6565,8 +6991,8 @@ peel_leftward_intron (int *n_peeled_indels, bool *protectedp, List_T *peeled_pat
 
       intron_nt = get_genomic_nt(&intron_nt_alt,genomedp3--,chroffset,chrhigh,watsonp);
       if ((cdna = ((Pair_T) path->first)->cdna) != intron_nt && cdna != intron_nt_alt) {
-	debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp3+1));
 	nmismatches++;
+	debug(printf(" (1) Intron mismatch #%d: %c != %c or %c at %u\n",nmismatches,cdna,intron_nt,intron_nt_alt,genomedp3+1));
       }
 
       if (((Pair_T) path->first)->protectedp == true) {
@@ -6595,8 +7021,8 @@ peel_leftward_intron (int *n_peeled_indels, bool *protectedp, List_T *peeled_pat
 
       intron_nt = get_genomic_nt(&intron_nt_alt,genomedp3--,chroffset,chrhigh,watsonp);
       if ((cdna = ((Pair_T) path->first)->cdna) != intron_nt && cdna != intron_nt_alt) {
-	debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp3+1));
 	nmismatches++;
+	debug(printf(" (2) Intron mismatch #%d: %c != %c or %c at %u\n",nmismatches,cdna,intron_nt,intron_nt_alt,genomedp3+1));
       }
 
       if (((Pair_T) path->first)->comp == MATCH_COMP || ((Pair_T) path->first)->comp == DYNPROG_MATCH_COMP || ((Pair_T) path->first)->comp == AMBIGUOUS_COMP) {
@@ -7194,8 +7620,8 @@ peel_rightward_intron (int *n_peeled_indels, bool *protectedp, List_T *peeled_pa
 
       intron_nt = get_genomic_nt(&intron_nt_alt,genomedp5++,chroffset,chrhigh,watsonp);
       if ((cdna = ((Pair_T) pairs->first)->cdna) != intron_nt && cdna != intron_nt_alt) {
-	debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp5-1));
 	nmismatches++;
+	debug(printf(" (3) Intron mismatch #%d: %c != %c or %c at %u\n",nmismatches,cdna,intron_nt,intron_nt_alt,genomedp5-1));
       }
 
       if (((Pair_T) pairs->first)->protectedp == true) {
@@ -7224,8 +7650,8 @@ peel_rightward_intron (int *n_peeled_indels, bool *protectedp, List_T *peeled_pa
 
       intron_nt = get_genomic_nt(&intron_nt_alt,genomedp5++,chroffset,chrhigh,watsonp);
       if ((cdna = ((Pair_T) pairs->first)->cdna) != intron_nt && cdna != intron_nt_alt) {
-	debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp5-1));
 	nmismatches++;
+	debug(printf(" (4) Intron mismatch #%d: %c != %c or %c at %u\n",nmismatches,cdna,intron_nt,intron_nt_alt,genomedp5-1));
       }
 
       if (((Pair_T) pairs->first)->comp == MATCH_COMP || ((Pair_T) pairs->first)->comp == DYNPROG_MATCH_COMP || ((Pair_T) pairs->first)->comp == AMBIGUOUS_COMP) {
@@ -7548,7 +7974,7 @@ traverse_cdna_gap (bool *filledp, bool *incompletep, int *dynprogindex_minor, in
   queryjump = querydp3 - querydp5 + 1;
   genomejump = genomedp3 - genomedp5 + 1;
 
-  if (queryjump <= genomejump + MININTRONLEN) {
+  if (queryjump > 0 && queryjump <= genomejump + MININTRONLEN) {
     debug(printf("Really a single gap, not a cDNA gap, since queryjump %d <= genomejump %d + minintronlen %d\n",
 		 queryjump,genomejump,MININTRONLEN));
     gappairs = Dynprog_single_gap(&(*dynprogindex_minor),&finalscore,
@@ -7748,7 +8174,7 @@ traverse_genome_gap (bool *filledp, bool *shiftp, int *dynprogindex_minor, int *
   genomejump = genomedp3 - genomedp5 + 1;
 
   /* genomedp5 + genomejump - 1 >= genomedp3 - genomejump + 1) ?  but doesn't work on AA669154, chr1*/
-  if (genomejump <= queryjump + MININTRONLEN) {
+  if (queryjump > 0 && genomejump <= queryjump + MININTRONLEN) {
     debug(printf("Really a single gap, not an intron\n"));
     gappairs = Dynprog_single_gap(&(*dynprogindex_minor),&finalscore,
 				  &nmatches,&nmismatches,&nopens,&nindels,dynprogM,
@@ -8489,7 +8915,7 @@ distalmedial_ending5 (bool *knownsplicep, bool *chop_exon_p, int *dynprogindex_m
 						   &(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]),
 						   queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap,
 						   chroffset,chrhigh,watsonp,jump_late_p,pairpool,
-						   extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS);
+						   extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/false);
   *ambig_end_length = 0;
   *ambig_prob = 0.0;
 
@@ -8600,7 +9026,7 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
 						     &(queryseq_ptr[querydp3_distalgap]),&(queryuc_ptr[querydp3_distalgap]),
 						     queryjump,genomejump,querydp3_distalgap,genomedp3_distalgap,
 						     chroffset,chrhigh,watsonp,jump_late_p,pairpool,
-						     extraband_end,defect_rate,endalign);
+						     extraband_end,defect_rate,endalign,/*require_pos_score_p*/false);
     *ambig_end_length = 0;
     *ambig_prob = 0.0;
     *knownsplicep = false;
@@ -8618,7 +9044,7 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
 		   firstpair->querypos,querydp3_distalgap));
       return (List_T) NULL;
 
-    } else if (*finalscore < 0) {
+    } else if (*finalscore <= 0) {
       *knownsplicep = false;
 #if 0
       return (List_T) NULL;
@@ -8656,6 +9082,7 @@ distalmedial_ending3 (bool *knownsplicep, bool *chop_exon_p, int *dynprogindex_m
   genomedp5_distalgap = leftpair->genomepos + 1;
   /* if (leftpair->cdna == ' ') querydp5_distalgap--; -- For old dynamic programming */
   /* if (leftpair->genome == ' ') genomedp5_distalgap--; -- For old dynamic programming */
+
   querydp5_medialgap = querydp5_distalgap;
   genomedp5_medialgap = genomedp5_distalgap;
   querydp3 = rightquerypos - 1;
@@ -8709,10 +9136,14 @@ distalmedial_ending3 (bool *knownsplicep, bool *chop_exon_p, int *dynprogindex_m
 						   &(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]),
 						   queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap,
 						   chroffset,chrhigh,watsonp,jump_late_p,pairpool,
-						   extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS);
+						   extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,
+						   /*require_pos_score_p*/false);
   *ambig_end_length = 0;
   *ambig_prob = 0.0;
 
+  debug(printf("Medial gap\n"));
+  debug(Pair_dump_list(continuous_gappairs_medialgap,true));
+
   continuous_goodness_medialgap = nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels;
   debug(printf("Continuous_goodness_medialgap %d = %d + %d*%d + %d*%d + %d*%d\n",
 	       continuous_goodness_medialgap,nmatches,MISMATCH,nmismatches,QOPEN,nopens,QINDEL,nindels));
@@ -8816,7 +9247,7 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
 						     &(queryseq_ptr[querydp5_distalgap]),&(queryuc_ptr[querydp5_distalgap]),
 						     queryjump,genomejump,querydp5_distalgap,genomedp5_distalgap,
 						     chroffset,chrhigh,watsonp,jump_late_p,pairpool,
-						     extraband_end,defect_rate,endalign);
+						     extraband_end,defect_rate,endalign,/*require_pos_score_p*/false);
     *ambig_end_length = 0;
     *ambig_prob = 0.0;
     *knownsplicep = false;
@@ -8835,7 +9266,7 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
 		   firstpair->querypos,querydp5_distalgap));
       return (List_T) NULL;
       
-    } else if (*finalscore < 0) {
+    } else if (*finalscore <= 0) {
       *knownsplicep = false;
 #if 0
       return (List_T) NULL;
@@ -8871,7 +9302,6 @@ find_dual_break_spliceends (List_T path, List_T pairs,
   Univcoord_T splice_genomepos_5, splice_genomepos_3, splice_genomepos_5_mm, splice_genomepos_3_mm;
   Univcoord_T start, middle, end; /* start to middle has mismatches, while middle to end has none */
   double donor_prob, acceptor_prob;
-  double max_prob_5 = 0.0, max_prob_3 = 0.0, max_prob_5_mm = 0.0, max_prob_3_mm = 0.0;
 
 
   debug13(printf("\nEntered find_dual_break_spliceends with cdna_direction %d\n",cdna_direction));
@@ -8971,9 +9401,9 @@ find_dual_break_spliceends (List_T path, List_T pairs,
 	} else {
 	  /* splicetype5 = splicetype5_mm = ANTIDONOR; */
 
-	  start_genomicpos = (chrhigh - chroffset) - start;
-	  middle_genomicpos = (chrhigh - chroffset) - middle;
-	  end_genomicpos = (chrhigh - chroffset) - end;
+	  start_genomicpos = (start > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - start;
+	  middle_genomicpos = (middle > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - middle;
+	  end_genomicpos = (end > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - end;
 
 	  /* assert(start_genomicpos <= end_genomicpos); */
 	  genomicpos = start_genomicpos;
@@ -9063,9 +9493,9 @@ find_dual_break_spliceends (List_T path, List_T pairs,
 	} else {
 	  /* splicetype5 = splicetype5_mm = ACCEPTOR; */
 
-	  start_genomicpos = (chrhigh - chroffset) - start;
-	  middle_genomicpos = (chrhigh - chroffset) - middle;
-	  end_genomicpos = (chrhigh - chroffset) - end;
+	  start_genomicpos = (start > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - start;
+	  middle_genomicpos = (middle > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - middle;
+	  end_genomicpos = (end > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - end;
 
 	  /* assert(start_genomicpos <= end_genomicpos); */
 	  genomicpos = start_genomicpos;
@@ -9201,9 +9631,9 @@ find_dual_break_spliceends (List_T path, List_T pairs,
 	} else {
 	  /* splicetype3 = splicetype3_mm = ANTIACCEPTOR; */
 
-	  start_genomicpos = (chrhigh - chroffset) - start + 1;
-	  middle_genomicpos = (chrhigh - chroffset) - middle + 1;
-	  end_genomicpos = (chrhigh - chroffset) - end + 1;
+	  start_genomicpos = (start > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - start + 1;
+	  middle_genomicpos = (middle > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - middle + 1;
+	  end_genomicpos = (end > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - end + 1;
 
 	  /* assert(start_genomicpos >= end_genomicpos); */
 	  genomicpos = start_genomicpos;
@@ -9293,9 +9723,9 @@ find_dual_break_spliceends (List_T path, List_T pairs,
 	} else {
 	  /* splicetype3 = splicetype3_mm = DONOR; */
 
-	  start_genomicpos = (chrhigh - chroffset) - start + 1;
-	  middle_genomicpos = (chrhigh - chroffset) - middle + 1;
-	  end_genomicpos = (chrhigh - chroffset) - end + 1;
+	  start_genomicpos = (start > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - start + 1;
+	  middle_genomicpos = (middle > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - middle + 1;
+	  end_genomicpos = (end > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - end + 1;
 
 	  /* assert(start_genomicpos >= end_genomicpos); */
 	  genomicpos = start_genomicpos;
@@ -9722,6 +10152,7 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
       pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/UNKNOWNJUMP,/*genomejump*/UNKNOWNJUMP,
 				      /*leftpair*/(*path)->first,/*rightpair*/pairs->first,/*knownp*/false);
     }
+
   } else {
     lastpair = (Pair_T) gappairs->first;
     firstpair = (Pair_T) List_last_value(gappairs);
@@ -9731,6 +10162,7 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
       /* fprintf(stderr,"%d..%d .. %d..%d\n",querydp5,firstpair->querypos,lastpair->querypos,querydp3); */
       debug14(printf("  => entire query sequence bridged or not, but taking it regardless\n"));
       pairs = Pairpool_transfer(pairs,gappairs);
+
     } else {
       debug14(printf("  => entire query sequence not bridged, so abort\n"));
       pairs = Pairpool_transfer(pairs,peeled_pairs);
@@ -10184,6 +10616,7 @@ build_pairs_dualintrons (int *dynprogindex, List_T path,
   bool left_end_intron_p = false, right_end_intron_p, exonp;
 
   debug(printf("\n** Starting build_pairs_dualintrons\n"));
+  debug(Pair_dump_list(path,true));
 
   /* Remove gaps at beginning */
   while (path != NULL && ((Pair_T) path->first)->gapp == true) {
@@ -11864,7 +12297,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
   while ((/* filterp == true || */ dual_break_p == true) && iter0 < MAXITER_CYCLES) {
     /* path = List_reverse(pairs); */
     /* Need to insert gapholders after Pairpool_join_end5 and Pairpool_join_end3 */
-    path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+    path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			     /*finalp*/false);
 
 #ifdef PMAP
 #if 0
@@ -11895,7 +12329,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
 #if 0
       /* gapholders shouldn't be necessary before fix_adjacent_indels,
 	 but is necessary afterward for build_pairs_singles */
-      path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+      path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			     /*finalp*/false);
       pairs = List_reverse(path);
 #endif
 
@@ -11903,7 +12338,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
 		   cdna_direction,iter0));
       path = fix_adjacent_indels(pairs);
       pairs = List_reverse(path);
-      path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+      path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			       /*finalp*/false);
 
 
       /* Pass 2C: solve straight gaps again.  path --> pairs (for defect rate) */
@@ -11932,7 +12368,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
     debug(printf("*** Pass 3 (dir %d): Smooth\n",cdna_direction));
 
     /* Smoothing by probability */
-    path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+    path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			     /*finalp*/false);
     pairs = assign_intron_probs(path,cdna_direction,watsonp,chrnum,chroffset,chrhigh,pairpool);
     Smooth_reset(pairs);
     pairs = Smooth_pairs_by_intronprobs(&badp,pairs,pairpool);
@@ -11945,8 +12382,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
 #endif
       
     /* Smoothing by size: This can undo the short exons found by traverse_dual_genome, so we use protectedp in traverse_dual_genome  */
-    debug(printf("*** Pass 3a (dir %d): Smoothing by size.  Iteration0 %d, iteration1 %d\n",
-		 cdna_direction,iter0,iter1));
+    debug(printf("*** Pass 3a (dir %d): Smoothing by size.  Iteration0 %d\n",
+		 cdna_direction,iter0));
     path = List_reverse(pairs);
     pairs = remove_indel_gaps(path);
     pairs = Smooth_pairs_by_size(&shortp,&deletep,pairs,pairpool,/*stage2_indexsize*/6);
@@ -12000,7 +12437,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
       iter2 = 0;
       shiftp = true;
       while ((shiftp == true || incompletep == true) && iter2++ < MAXITER_INTRONS) {
-	path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+	path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+				 /*finalp*/false);
 	pairs = build_pairs_introns(&shiftp,&incompletep,
 				    &dynprogindex_minor,&dynprogindex_major,path,
 				    chrnum,chroffset,chrhigh,
@@ -12041,13 +12479,21 @@ path_compute_dir (double *defect_rate, List_T pairs,
 
 #endif
 
+      /* Re-evaluate any small exons inserted by build_dual_breaks */
+      path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			       /*finalp*/false);
+      pairs = assign_intron_probs(path,cdna_direction,watsonp,chrnum,chroffset,chrhigh,pairpool);
+      Smooth_reset(pairs);
+      pairs = Smooth_pairs_by_intronprobs(&badp,pairs,pairpool);
+
       debug(printf("*** Pass 6 (dir %d): Solve dual introns.  Iteration0 %d, Iteration1 %d\n",
 		   cdna_direction,iter0,iter1));
       if (badp == false && shortp == false && deletep == false) {
 	debug(printf("  no shortp or deletep, so do nothing\n"));
       } else {
 	debug(printf("  shortp or deletep is true, so running build_pairs_dualintrons\n"));
-	path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+	path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+				 /*finalp*/false);
 	/* XX */
 	/* pairs = assign_gap_types(path,cdna_direction,watsonp,queryseq_ptr,
 	   chrnum,chroffset,chrhigh,pairpool); */
@@ -12080,7 +12526,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
     }
 #endif
 
-    path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+    path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			     /*finalp*/false);
     debug(Pair_dump_list(path,/*zerobasedp*/true));
 
     pairs = List_reverse(path);
@@ -12117,7 +12564,7 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
     ncanonical, nsemicanonical, nnoncanonical;
   double min_splice_prob;
   bool knownsplice5p, chop_exon_p;
-  bool trim5p;
+  bool trim5p, indelp;
 
   *ambig_end_length_5 = 0;
   *ambig_prob_5 = 0.0;
@@ -12168,7 +12615,8 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
   /* Necessary to insert gaps and assign gap types (fills in cDNA
      insertions, so they don't get trimmed), in case an insertion was
      introduced at ends */
-  path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+  path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			   /*finalp*/false);
   pairs = assign_gap_types(path,cdna_direction,watsonp,queryseq_ptr,
 			   chrnum,chroffset,chrhigh,pairpool);
 
@@ -12192,7 +12640,14 @@ path_compute_end5 (int *ambig_end_length_5, Splicetype_T *ambig_splicetype_5, do
     /* Using iter1 to avoid the possibility of an infinite loop */
     iter1 = 0;
     while (iter1 < 5 && trim5p == true) {
-      pairs = trim_end5_exon_indels(&trim5p,*ambig_end_length_5,pairs,cdna_direction);
+      pairs = trim_end5_exons(&indelp,&trim5p,*ambig_end_length_5,pairs,dynprogR,chroffset,chrhigh,
+			      queryseq_ptr,queryuc_ptr,
+			      cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
+      if (indelp == true) {
+	pairs = trim_end5_indels(pairs,*ambig_end_length_5,dynprogR,chroffset,chrhigh,
+				queryseq_ptr,queryuc_ptr,
+				cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
+      }
       if (trim5p == true) {
 	pairs = build_pairs_end5(&knownsplice5p,&(*ambig_end_length_5),&(*ambig_splicetype_5),&(*ambig_prob_5),
 				 &chop_exon_p,&dynprogindex_minor,pairs,
@@ -12259,7 +12714,7 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
     ncanonical, nsemicanonical, nnoncanonical;
   double min_splice_prob;
   bool knownsplice3p, chop_exon_p;
-  bool trim3p;
+  bool trim3p, indelp;
 
   *ambig_end_length_3 = 0;
   *ambig_prob_3 = 0.0;
@@ -12309,7 +12764,8 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
      insertions, so they don't get trimmed), in case an insertion was
      introduced at ends */
   pairs = List_reverse(path);
-  path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+  path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			   /*finalp*/false);
   pairs = assign_gap_types(path,cdna_direction,watsonp,queryseq_ptr,
 			   chrnum,chroffset,chrhigh,pairpool);
   path = List_reverse(pairs);
@@ -12333,7 +12789,14 @@ path_compute_end3 (int *ambig_end_length_3, Splicetype_T *ambig_splicetype_3, do
     /* Using iter1 to avoid the possibility of an infinite loop */
     iter1 = 0;
     while (iter1 < 5 && trim3p == true) {
-      path = trim_end3_exon_indels(&trim3p,*ambig_end_length_3,path,cdna_direction);
+      path = trim_end3_exons(&indelp,&trim3p,*ambig_end_length_3,path,dynprogL,chroffset,chrhigh,
+			     queryseq_ptr,queryuc_ptr,querylength,
+			     cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
+      if (indelp == true) {
+	path = trim_end3_indels(path,*ambig_end_length_3,dynprogL,chroffset,chrhigh,
+				queryseq_ptr,queryuc_ptr,querylength,
+				cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
+      }
       if (trim3p == true) {
 	path = build_path_end3(&knownsplice3p,&(*ambig_end_length_3),&(*ambig_splicetype_3),&(*ambig_prob_3),
 			       &chop_exon_p,&dynprogindex_minor,path,
@@ -12438,7 +12901,8 @@ path_compute_final (double defect_rate, List_T pairs, int cdna_direction, bool w
 			    oligoindices_minor,diagpool,cellpool,
 			    defect_rate,/*finalp*/true,/*simplep*/true);
 
-  path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool);
+  path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+			   /*finalp*/true);
   pairs = assign_gap_types(path,cdna_direction,watsonp,queryseq_ptr,
 			   chrnum,chroffset,chrhigh,pairpool);
 
@@ -12477,15 +12941,21 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
   int splice_sensedir_5, splice_sensedir_3;
   /* int splice_cdna_direction_5_mm, splice_cdna_direction_3_mm; */
   int splice_sensedir_5_mm, splice_sensedir_3_mm;
+  int nmismatches, *scorei;
   bool mismatchp;
 
-
   debug13(printf("\nEntered gmap_trim_novel_spliceends with orig_sensedir %d, ambig_end_lengths %d and %d\n",
 		 orig_sensedir,*ambig_end_length_5,*ambig_end_length_3));
   *new_sensedir = SENSE_NULL;
 
   Pair_trim_distances(&trim5,&trim3,pairs);
   debug13(printf("Trim distances are %d and %d\n",trim5,trim3));
+  if (trim5 > trim3) {
+    scorei = (int *) MALLOC((trim5 + 1) * sizeof(int));
+  } else {
+    scorei = (int *) MALLOC((trim3 + 1) * sizeof(int));
+  }
+
 
   path = List_reverse(pairs);
   if (path != NULL && knownsplice3p == false && *ambig_end_length_3 == 0
@@ -12504,6 +12974,7 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
     }
 
     i = 0;
+    nmismatches = 0;
     while (i < trim3) {
       if ((p = List_next(p)) == NULL) {
 	break;
@@ -12511,15 +12982,18 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	break;
       } else if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) {
 	middle = pair->genomepos;
+	scorei[i] = nmismatches;
 	debug13(printf("Resetting middle to be %u\n",middle));
       } else {
 	middle = pair->genomepos;
+	scorei[i] = ++nmismatches;
 	mismatchp = true;
 	debug13(printf("Resetting middle to be %u\n",middle));
       }
       pair = (Pair_T) List_head(p);
       i++;
     }
+    scorei[i] = ++nmismatches;
 
     while (i < trim3 + END_SPLICESITE_SEARCH) {
       if ((p = List_next(p)) == NULL) {
@@ -12558,15 +13032,22 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 
 	/* assert(start_genomicpos >= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("3', watson, sense anti %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos--;
+	  i++;
+	}
 	while (genomicpos >= middle_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
 	  donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
-	  debug13(printf("3', watson, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob));
+	  debug13(printf("3', watson, sense anti %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,nmismatches - scorei[i]));
 	  if (donor_prob > max_prob_3_mm) {
 	    max_prob_3_mm = donor_prob;
 	    splice_genomepos_3_mm = genomicpos - 1;
 	  }
 	  genomicpos--;
+	  debug13(i++);
 	}
 	while (genomicpos >= end_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
@@ -12583,21 +13064,28 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
       } else {
 	splicetype3 = splicetype3_mm = ANTIDONOR;
 
-	start_genomicpos = (chrhigh - chroffset) - start;
-	middle_genomicpos = (chrhigh - chroffset) - middle;
-	end_genomicpos = (chrhigh - chroffset) - end;
+	start_genomicpos = (start > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - start;
+	middle_genomicpos = (middle > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - middle;
+	end_genomicpos = (end > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - end;
 
 	/* assert(start_genomicpos <= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("3', crick, sense forward %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos++;
+	  i++;
+	}
 	while (genomicpos <= middle_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
 	  donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
-	  debug13(printf("3', crick, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob));
+	  debug13(printf("3', crick, sense forward %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,nmismatches - scorei[i]));
 	  if (donor_prob > max_prob_3_mm) {
 	    max_prob_3_mm = donor_prob;
 	    splice_genomepos_3_mm = (chrhigh - chroffset) - genomicpos;
 	  }
 	  genomicpos++;
+	  debug13(i++);
 	}
 	while (genomicpos <= end_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
@@ -12622,15 +13110,22 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 
 	/* assert(start_genomicpos >= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("3', watson, sense forward %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos--;
+	  i++;
+	}
 	while (genomicpos >= middle_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
 	  acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
-	  debug13(printf("3', watson, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob));
+	  debug13(printf("3', watson, sense forward %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,acceptor_prob,nmismatches - scorei[i]));
 	  if (acceptor_prob > max_prob_3_mm) {
 	    max_prob_3_mm = acceptor_prob;
 	    splice_genomepos_3_mm = genomicpos - 1;
 	  }
 	  genomicpos--;
+	  debug13(i++);
 	}
 	while (genomicpos >= end_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
@@ -12647,21 +13142,28 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
       } else {
 	splicetype3 = splicetype3_mm = ACCEPTOR;
 
-	start_genomicpos = (chrhigh - chroffset) - start;
-	middle_genomicpos = (chrhigh - chroffset) - middle;
-	end_genomicpos = (chrhigh - chroffset) - end;
+	start_genomicpos = (start > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - start;
+	middle_genomicpos = (middle > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - middle;
+	end_genomicpos = (end > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - end;
 
 	/* assert(start_genomicpos <= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("3', crick, sense anti %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos++;
+	  i++;
+	}
 	while (genomicpos <= middle_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
 	  acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
-	  debug13(printf("3', crick, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob));
+	  debug13(printf("3', crick, sense anti %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,acceptor_prob,nmismatches - scorei[i]));
 	  if (acceptor_prob > max_prob_3_mm) {
 	    max_prob_3_mm = acceptor_prob;
 	    splice_genomepos_3_mm = (chrhigh - chroffset) - genomicpos;
 	  }
 	  genomicpos++;
+	  debug13(i++);
 	}
 	while (genomicpos <= end_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
@@ -12684,11 +13186,17 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 
 	/* assert(start_genomicpos >= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("3', watson, sense null %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos--;
+	  i++;
+	}
 	while (genomicpos >= middle_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
 	  donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */
 	  acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */
-	  debug13(printf("3', watson, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
+	  debug13(printf("3', watson, sense null %u %u %f %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob,nmismatches - scorei[i]));
 	  if (donor_prob > max_prob_sense_forward_3_mm) {
 	    max_prob_sense_forward_3_mm = donor_prob;
 	    if (donor_prob > max_prob_3_mm) {
@@ -12710,6 +13218,7 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	    }
 	  }
 	  genomicpos--;
+	  debug13(i++);
 	}
 	while (genomicpos >= end_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
@@ -12741,17 +13250,23 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	debug13(printf("\n"));
 
       } else {
-	start_genomicpos = (chrhigh - chroffset) - start;
-	middle_genomicpos = (chrhigh - chroffset) - middle;
-	end_genomicpos = (chrhigh - chroffset) - end;
+	start_genomicpos = (start > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - start;
+	middle_genomicpos = (middle > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - middle;
+	end_genomicpos = (end > chrhigh - chroffset) ? 0 : (chrhigh - chroffset) - end;
 
 	/* assert(start_genomicpos <= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("3', crick, sense null %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos++;
+	  i++;
+	}
 	while (genomicpos <= middle_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
 	  donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */
 	  acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */
-	  debug13(printf("3', crick, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
+	  debug13(printf("3', crick, sense null %u %u %f %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob,nmismatches - scorei[i]));
 	  if (donor_prob > max_prob_sense_forward_3_mm) {
 	    max_prob_sense_forward_3_mm = donor_prob;
 	    if (donor_prob > max_prob_3_mm) {
@@ -12773,6 +13288,7 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	    }
 	  }
 	  genomicpos++;
+	  debug13(i++);
 	}
 	while (genomicpos <= end_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
@@ -12837,6 +13353,8 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
     }
   }
 
+  /* 5' end */
+
   pairs = List_reverse(path);
   if (pairs != NULL && knownsplice5p == false && *ambig_end_length_5 == 0
       /* && exon_length_5(pairs) >= END_SPLICESITE_EXON_LENGTH */) {
@@ -12854,6 +13372,7 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
     }
 
     i = 0;
+    nmismatches = 0;
     while (i < trim5) {
       if ((p = List_next(p)) == NULL) {
 	break;
@@ -12861,15 +13380,18 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	break;
       } else if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) {
 	middle = pair->genomepos;
+	scorei[i] = nmismatches;
 	debug13(printf("Resetting middle to be %u\n",middle));
       } else {
 	middle = pair->genomepos;
+	scorei[i] = ++nmismatches;
 	mismatchp = true;
 	debug13(printf("Resetting middle to be %u\n",middle));
       }
       pair = (Pair_T) List_head(p);
       i++;
     }
+    scorei[i] = nmismatches;
 
     while (i < trim5 + END_SPLICESITE_SEARCH) {
       if ((p = List_next(p)) == NULL) {
@@ -12908,15 +13430,22 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 
 	/* assert(start_genomicpos <= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("5', watson, sense forward %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos++;
+	  i++;
+	}
 	while (genomicpos <= middle_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
 	  acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
-	  debug13(printf("5', watson, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob));
+	  debug13(printf("5', watson, sense forward %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,acceptor_prob,nmismatches - scorei[i]));
 	  if (acceptor_prob > max_prob_5_mm) {
 	    max_prob_5_mm = acceptor_prob;
 	    splice_genomepos_5_mm = genomicpos;
 	  }
 	  genomicpos++;
+	  debug13(i++);
 	}
 	while (genomicpos <= end_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
@@ -12933,21 +13462,28 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
       } else {
 	splicetype5 = splicetype5_mm = ANTIACCEPTOR;
 
-	start_genomicpos = (chrhigh - chroffset) - start + 1;
-	middle_genomicpos = (chrhigh - chroffset) - middle + 1;
-	end_genomicpos = (chrhigh - chroffset) - end + 1;
+	start_genomicpos = (start > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - start + 1;
+	middle_genomicpos = (middle > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - middle + 1;
+	end_genomicpos = (end > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - end + 1;
 
 	/* assert(start_genomicpos >= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("5', crick, sense anti %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos--;
+	  i++;
+	}
 	while (genomicpos >= middle_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
 	  acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
-	  debug13(printf("5', crick, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob));
+	  debug13(printf("5', crick, sense anti %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,acceptor_prob,nmismatches - scorei[i]));
 	  if (acceptor_prob > max_prob_5_mm) {
 	    max_prob_5_mm = acceptor_prob;
 	    splice_genomepos_5_mm = (chrhigh - chroffset) - genomicpos + 1;
 	  }
 	  genomicpos--;
+	  debug13(i++);
 	}
 	while (genomicpos >= end_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
@@ -12972,15 +13508,22 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	
 	/* assert(start_genomicpos <= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("5', watson, sense anti %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos++;
+	  i++;
+	}
 	while (genomicpos <= middle_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
 	  donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
-	  debug13(printf("5', watson, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob));
+	  debug13(printf("5', watson, sense anti %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,nmismatches - scorei[i]));
 	  if (donor_prob > max_prob_5_mm) {
 	    max_prob_5_mm = donor_prob;
 	    splice_genomepos_5_mm = genomicpos;
 	  }
 	  genomicpos++;
+	  debug13(i++);
 	}
 	while (genomicpos <= end_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
@@ -12997,21 +13540,28 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
       } else {
 	splicetype5 = splicetype5_mm = DONOR;
 
-	start_genomicpos = (chrhigh - chroffset) - start + 1;
-	middle_genomicpos = (chrhigh - chroffset) - middle + 1;
-	end_genomicpos = (chrhigh - chroffset) - end + 1;
+	start_genomicpos = (start > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - start + 1;
+	middle_genomicpos = (middle > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - middle + 1;
+	end_genomicpos = (end > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - end + 1;
 
 	/* assert(start_genomicpos >= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("5', crick, sense forward %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos--;
+	  i++;
+	}
 	while (genomicpos >= middle_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
 	  donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
-	  debug13(printf("5', crick, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob));
+	  debug13(printf("5', crick, sense forward %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,nmismatches - scorei[i]));
 	  if (donor_prob > max_prob_5_mm) {
 	    max_prob_5_mm = donor_prob;
 	    splice_genomepos_5_mm = (chrhigh - chroffset) - genomicpos + 1;
 	  }
 	  genomicpos--;
+	  debug13(i++);
 	}
 	while (genomicpos >= end_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
@@ -13034,11 +13584,17 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 
 	/* assert(start_genomicpos <= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("5', watson, sense null %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos++;
+	  debug13(i++);
+	}
 	while (genomicpos <= middle_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
 	  acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */
 	  donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */
-	  debug13(printf("5', watson, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
+	  debug13(printf("5', watson, sense null %u %u %f %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob,nmismatches - scorei[i]));
 	  if (acceptor_prob > max_prob_sense_forward_5_mm) {
 	    max_prob_sense_forward_5_mm = acceptor_prob;
 	    if (acceptor_prob > max_prob_5_mm) {
@@ -13060,6 +13616,7 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	    }
 	  }
 	  genomicpos++;
+	  debug13(i++);
 	}
 	while (genomicpos <= end_genomicpos &&
 	       genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) {
@@ -13091,17 +13648,23 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	debug13(printf("\n"));
 
       } else {
-	start_genomicpos = (chrhigh - chroffset) - start + 1;
-	middle_genomicpos = (chrhigh - chroffset) - middle + 1;
-	end_genomicpos = (chrhigh - chroffset) - end + 1;
+	start_genomicpos = (start > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - start + 1;
+	middle_genomicpos = (middle > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - middle + 1;
+	end_genomicpos = (end > chrhigh - chroffset) ? 1 : (chrhigh - chroffset) - end + 1;
 
 	/* assert(start_genomicpos >= end_genomicpos); */
 	genomicpos = start_genomicpos;
+	i = 0;
+	while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) {
+	  debug13(printf("5', crick, sense null %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i]));
+	  genomicpos--;
+	  i++;
+	}
 	while (genomicpos >= middle_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
 	  acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */
 	  donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */
-	  debug13(printf("5', crick, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob));
+	  debug13(printf("5', crick, sense null %u %u %f %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob,nmismatches - scorei[i]));
 	  if (acceptor_prob > max_prob_sense_forward_5_mm) {
 	    max_prob_sense_forward_5_mm = acceptor_prob;
 	    if (acceptor_prob > max_prob_5_mm) {
@@ -13123,6 +13686,7 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
 	    }
 	  }
 	  genomicpos--;
+	  debug13(i++);
 	}
 	while (genomicpos >= end_genomicpos &&
 	       genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) {
@@ -13366,6 +13930,7 @@ trim_novel_spliceends (int *new_sensedir, List_T pairs,
     }
   }
 
+  FREE(scorei);
   return pairs;
 }
 #endif
@@ -13644,7 +14209,7 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
   int dynprogindex_minor = DYNPROGINDEX_MINOR;
   bool chop_exon_p;
   bool knownsplice5p = false, knownsplice3p = false;
-  bool trimp, trim5p, trim3p, trim5p_ignore, trim3p_ignore;
+  bool trimp, trim5p, trim3p, trim5p_ignore, trim3p_ignore, indelp;
   int iter = 0;
   int new_sensedir;		/* Not used currently */
 
@@ -13656,6 +14221,8 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
   debug3(printf("Entering path_trim with cdna_direction %d\n",*cdna_direction));
 #endif
 
+  debug3(Pair_dump_list(pairs,true));
+
 #ifdef GSNAP
   if (novelsplicingp == true) {
     pairs = trim_novel_spliceends(&new_sensedir,pairs,&(*ambig_end_length_5),&(*ambig_end_length_3),
@@ -13702,7 +14269,13 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
 				 *cdna_direction,watsonp,jump_late_p,
 				 maxpeelback,defect_rate,pairpool,dynprogR,
 				 /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
-	pairs = trim_end5_exon_indels(&trim5p,*ambig_end_length_5,pairs,*cdna_direction);
+	pairs = trim_end5_exons(&indelp,&trim5p,*ambig_end_length_5,pairs,dynprogR,chroffset,chrhigh,
+				queryseq_ptr,queryuc_ptr,*cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
+	if (indelp == true) {
+	  pairs = trim_end5_indels(pairs,*ambig_end_length_5,dynprogR,chroffset,chrhigh,
+				   queryseq_ptr,queryuc_ptr,
+				   *cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
+	}
 	if (trim5p == true) {
 	  trimp = true;
 	}
@@ -13720,11 +14293,18 @@ path_trim (double defect_rate, int *ambig_end_length_5, int *ambig_end_length_3,
 			       *cdna_direction,watsonp,jump_late_p,
 			       maxpeelback,defect_rate,pairpool,dynprogL,
 			       /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
-	path = trim_end3_exon_indels(&trim3p,*ambig_end_length_3,path,*cdna_direction);
-	pairs = List_reverse(path);
+	path = trim_end3_exons(&indelp,&trim3p,*ambig_end_length_3,path,dynprogL,chroffset,chrhigh,
+			       queryseq_ptr,queryuc_ptr,querylength,
+			       *cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
+	if (indelp == true) {
+	  path = trim_end3_indels(path,*ambig_end_length_3,dynprogL,chroffset,chrhigh,
+				  queryseq_ptr,queryuc_ptr,querylength,
+				  *cdna_direction,watsonp,jump_late_p,pairpool,defect_rate);
+	}
 	if (trim3p == true) {
 	  trimp = true;
 	}
+	pairs = List_reverse(path);
       }
 
       /* Important to end the alignment with Pair_trim_ends, or else trimming will be faulty */
@@ -13756,7 +14336,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
 		Splicetype_T *ambig_splicetype_5_1, Splicetype_T *ambig_splicetype_3_1,
 		double *ambig_prob_5_1, double *ambig_prob_3_1,
 		int *unknowns1, int *mismatches1, int *qopens1, int *qindels1, int *topens1, int *tindels1,
-		int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *min_splice_prob_1,
+		int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *avg_splice_score_1,
 
 #ifdef GSNAP
 		struct Pair_T **pairarray2, List_T *finalpairs2, int *npairs2, int *goodness2,
@@ -13765,7 +14345,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
 		Splicetype_T *ambig_splicetype_5_2, Splicetype_T *ambig_splicetype_3_2,
 		double *ambig_prob_5_2, double *ambig_prob_3_2,
 		int *unknowns2, int *mismatches2, int *qopens2, int *qindels2, int *topens2, int *tindels2,
-		int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *min_splice_prob_2,
+		int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *avg_splice_score_2,
 #endif
 
 		List_T stage2pairs, List_T all_stage2_starts, List_T all_stage2_ends,
@@ -13789,6 +14369,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
   int nknown_fwd, ncanonical_fwd, nsemicanonical_fwd, nnoncanonical_fwd,
     nknown_rev, ncanonical_rev, nsemicanonical_rev, nnoncanonical_rev;
   int nbadintrons_fwd, nbadintrons_rev;
+  double min_splice_prob_1, min_splice_prob_2;
   double max_intron_score_fwd = 0.0, max_intron_score_rev = 0.0,
     avg_donor_score_fwd = 0.0, avg_acceptor_score_fwd = 0.0,
     avg_donor_score_rev = 0.0, avg_acceptor_score_rev = 0.0;
@@ -14224,10 +14805,12 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
     pairs_pretrim = pairs_fwd;
     *cdna_direction = +1;
     *sensedir = SENSE_FORWARD;
+
   } else if (pairs_fwd == NULL) {
     pairs_pretrim = pairs_rev;
     *cdna_direction = -1;
     *sensedir = SENSE_ANTI;
+
   } else {
     path_fwd = List_reverse(pairs_fwd);
     debug11(printf("Calling score_introns for path_fwd before path_trim\n"));
@@ -14274,10 +14857,6 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
 					sense_filter);
   }
 
-  if (splicingp == false) {
-    *sensedir = SENSE_NULL;
-  }
-
 
   if (pairs_pretrim == NULL) {
 #if 0
@@ -14288,11 +14867,15 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
     *ambig_prob_5_1 = *ambig_prob_3_1 = 0.0;
 #endif
     return (struct Pair_T *) NULL;
+  }
 
+  if (splicingp == false) {
+    *sensedir = SENSE_NULL;
   }
 
 #ifdef GSNAP
   if (*cdna_direction == 0) {
+    /* If both pairarrays are returned, then first one is fwd and second one is rev */
     debug11(printf("Initial cdna_direction is 0\n"));
     *ambig_end_length_5_1 = fwd_ambig_end_length_5;
     *ambig_end_length_3_1 = fwd_ambig_end_length_3;
@@ -14341,6 +14924,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
 
     if (*finalpairs1 != NULL && *finalpairs2 != NULL) {
       debug11(printf("Both directions are non-null, so returning both\n"));
+      /* Pairarray 1 (cdna_direction +1): */
       *nmatches_posttrim_1 = Pair_nmatches_posttrim(&(*max_match_length_1),*finalpairs1,/*pos5*/*ambig_end_length_5_1,
 						    /*pos3*/querylength - (*ambig_end_length_3_1));
       pairarray1 = make_pairarray(&(*npairs1),&(*finalpairs1),/*cdna_direction*/+1,watsonp,
@@ -14349,25 +14933,40 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
       *goodness1 = Pair_fracidentity_array(&(*matches1),&(*unknowns1),&(*mismatches1),
 					   &(*qopens1),&(*qindels1),&(*topens1),&(*tindels1),
 					   &(*ncanonical1),&(*nsemicanonical1),&(*nnoncanonical1),
-					   &(*min_splice_prob_1),pairarray1,*npairs1,/*cdna_direction*/+1);
+					   &min_splice_prob_1,pairarray1,*npairs1,/*cdna_direction*/+1);
+      *avg_splice_score_1 = avg_donor_score_fwd + avg_acceptor_score_fwd;
 
-      debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n",
-		    *npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1));
+
+      debug0(printf("Result 1 (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels, splice score %f\n",
+		    *npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1,*avg_splice_score_1));
       debug0(Pair_dump_array(pairarray1,*npairs1,/*zerobasedp*/true));
 
-      *nmatches_posttrim_2 = Pair_nmatches_posttrim(&(*max_match_length_2),*finalpairs2,/*pos5*/*ambig_end_length_5_2,
-						    /*pos3*/querylength - (*ambig_end_length_3_2));
-      *pairarray2 = make_pairarray(&(*npairs2),&(*finalpairs2),/*cdna_direction*/-1,watsonp,
-				   pairpool,queryseq_ptr,chroffset,chrhigh,
-				   ngap,query_subseq_offset,skiplength);
-      *goodness2 = Pair_fracidentity_array(&(*matches2),&(*unknowns2),&(*mismatches2),
-					   &(*qopens2),&(*qindels2),&(*topens2),&(*tindels2),
-					   &(*ncanonical2),&(*nsemicanonical2),&(*nnoncanonical2),
-					   &(*min_splice_prob_2),*pairarray2,*npairs2,/*cdna_direction*/-1);
+      /* Note avg_donor_score_fwd and so on do not include evaluations
+	 of the end splice junctions.  So if cdna_direction == 0,
+	 callers should assume that the sensedir is not known */
+
+      if (0 /*&& Pair_identical_p(*finalpairs1,*finalpairs2) == true*/) {
+	/* This causes misses in resolve-inside procedures */
+	debug0(printf("Result 2 is identical to Result 1, so not returning it\n"));
+	*pairarray2 = (struct Pair_T *) NULL;
 
-      debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n",
-		    *npairs2,*matches2,*mismatches2,*qopens2,*qindels2,*topens2,*tindels2));
-      debug0(Pair_dump_array(*pairarray2,*npairs2,/*zerobasedp*/true));
+      } else {
+	/* Pairarray 2 (cdna_direction -1): */
+	*nmatches_posttrim_2 = Pair_nmatches_posttrim(&(*max_match_length_2),*finalpairs2,/*pos5*/*ambig_end_length_5_2,
+						      /*pos3*/querylength - (*ambig_end_length_3_2));
+	*pairarray2 = make_pairarray(&(*npairs2),&(*finalpairs2),/*cdna_direction*/-1,watsonp,
+				     pairpool,queryseq_ptr,chroffset,chrhigh,
+				     ngap,query_subseq_offset,skiplength);
+	*goodness2 = Pair_fracidentity_array(&(*matches2),&(*unknowns2),&(*mismatches2),
+					     &(*qopens2),&(*qindels2),&(*topens2),&(*tindels2),
+					     &(*ncanonical2),&(*nsemicanonical2),&(*nnoncanonical2),
+					     &min_splice_prob_2,*pairarray2,*npairs2,/*cdna_direction*/-1);
+	*avg_splice_score_2 = avg_donor_score_rev + avg_acceptor_score_rev;
+	
+	debug0(printf("Result 2 (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels, splice score %f\n",
+		      *npairs2,*matches2,*mismatches2,*qopens2,*qindels2,*topens2,*tindels2,*avg_splice_score_2));
+	debug0(Pair_dump_array(*pairarray2,*npairs2,/*zerobasedp*/true));
+      }
 
       *cdna_direction = 0;
       *sensedir = SENSE_NULL;
@@ -14400,6 +14999,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
     *ambig_prob_5_1 = fwd_ambig_prob_5;
     *ambig_prob_3_1 = fwd_ambig_prob_3;
     *sensedir = SENSE_FORWARD;
+    *avg_splice_score_1 = avg_donor_score_fwd + avg_acceptor_score_fwd;
     defect_rate = defect_rate_fwd;
 
   } else if (*cdna_direction < 0) {
@@ -14411,6 +15011,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
     *ambig_prob_5_1 = rev_ambig_prob_5;
     *ambig_prob_3_1 = rev_ambig_prob_3;
     *sensedir = SENSE_ANTI;
+    *avg_splice_score_1 = avg_donor_score_rev + avg_acceptor_score_rev;
     defect_rate = defect_rate_rev;
 
   } else {
@@ -14425,6 +15026,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
     *ambig_prob_5_1 = fwd_ambig_prob_5;
     *ambig_prob_3_1 = fwd_ambig_prob_3;
     *sensedir = SENSE_FORWARD;
+    *avg_splice_score_1 = 0.0;
     defect_rate = defect_rate_fwd;
 #endif
   }
@@ -14450,10 +15052,12 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *finalpairs1, int *np
   *goodness1 = Pair_fracidentity_array(&(*matches1),&(*unknowns1),&(*mismatches1),
 				       &(*qopens1),&(*qindels1),&(*topens1),&(*tindels1),
 				       &(*ncanonical1),&(*nsemicanonical1),&(*nnoncanonical1),
-				       &(*min_splice_prob_1),pairarray1,*npairs1,*cdna_direction);
+				       &min_splice_prob_1,pairarray1,*npairs1,*cdna_direction);
+  /* *avg_splice_score_1 assigned above */
+
   
-  debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n",
-		*npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1));
+  debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels, splice score %f\n",
+		*npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1,*avg_splice_score_1));
   debug0(Pair_dump_array(pairarray1,*npairs1,/*zerobasedp*/true));
   
 #ifdef GSNAP
@@ -14581,30 +15185,54 @@ Stage3_merge_chimera (T this_left, T this_right,
 		      int minpos1, int maxpos1, int minpos2, int maxpos2,
 		      char *queryseq_ptr, char *queryuc_ptr, Pairpool_T pairpool, 
 		      Dynprog_T dynprogL, Dynprog_T dynprogR, int maxpeelback) {
-  List_T path;
+  List_T path, peeled_path, pairs, peeled_pairs, orig_left_pairs, orig_right_pairs;
   bool knownsplicep, chop_exon_p;
   int ambig_end_length_5 = 0, ambig_end_length_3 = 0;	/* Need to be set for build_pairs_end5 and build_path_end3 */
   double ambig_prob_5, ambig_prob_3;
   int dynprogindex_minor = 0;
   Splicetype_T ambig_splicetype;
 
+  Pair_T endpair;
+  int querydp5, querydp3, n_peeled_indels;
+  Chrpos_T genomedp5, genomedp3;
+  bool protectedp;
+
+
+  orig_left_pairs = Pairpool_copy(this_left->pairs,pairpool);
+  orig_right_pairs = Pairpool_copy(this_right->pairs,pairpool);
 
   this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1);
   this_right->pairs = Pair_clip_bounded_list(this_right->pairs,minpos2,maxpos2);
 
   if (this_left->pairs == NULL && this_right->pairs == NULL) {
+    this_left->pairs = orig_left_pairs;
+    this_right->pairs = orig_right_pairs;
+
+#if 0
     Stage3_free_pairarray(&this_left);
     Stage3_free_pairarray(&this_right);
     this_left->pairarray = (struct Pair_T *) NULL;
     this_right->pairarray = (struct Pair_T *) NULL;
     this_left->pairarray_freeable_p = false;
     this_right->pairarray_freeable_p = false;
+#endif
+    return false;
+
+  } else if (this_left->pairs == NULL || this_right->pairs == NULL) {
+    this_left->pairs = orig_left_pairs;
+    this_right->pairs = orig_right_pairs;
     return false;
 
   } else {
     path = List_reverse(this_left->pairs);
 
-    /* To avoid indels at chimeric join, need to clean ends, extend with nogaps, and then clip*/
+    /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then clip*/
+    endpair = (Pair_T) path->first;
+    querydp5 = endpair->querypos + 1;
+    genomedp5 = endpair->genomepos + 1;
+    protectedp = false;
+    path = peel_leftward(&n_peeled_indels,&protectedp,&peeled_path,path,&querydp5,&genomedp5,
+			 maxpeelback,/*stop_at_indels_p*/false);
     path = clean_path_end3_gap_indels(path);
 
     path = build_path_end3(&knownsplicep,&ambig_end_length_3,&ambig_splicetype,&ambig_prob_3,
@@ -14620,21 +15248,31 @@ Stage3_merge_chimera (T this_left, T this_right,
     this_left->pairs = List_reverse(path);
     this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1);
 
-    /* To avoid indels at chimeric join, need to clean ends, extend with nogaps, and then clip*/
-    this_right->pairs = clean_pairs_end5_gap_indels(this_right->pairs);
+    /* To avoid indels at chimeric join, need to peelback, clean ends, extend with nogaps, and then clip*/
+    pairs = this_right->pairs;
+
+    endpair = (Pair_T) pairs->first;
+    querydp3 = endpair->querypos - 1;
+    genomedp3 = endpair->genomepos - 1;
+    protectedp = false;
+    pairs = peel_rightward(&n_peeled_indels,&protectedp,&peeled_pairs,pairs,&querydp3,&genomedp3,
+			   maxpeelback,/*stop_at_indels_p*/false);
+    pairs = clean_pairs_end5_gap_indels(pairs);
 
-    this_right->pairs = build_pairs_end5(&knownsplicep,&ambig_end_length_5,&ambig_splicetype,&ambig_prob_5,
-					 &chop_exon_p,&dynprogindex_minor,this_right->pairs,
-					 this_right->chroffset,this_right->chrhigh,
-					 /*knownsplice_limit_low*/-1U,/*knownsplice_limit_high*/0,
-					 queryseq_ptr,queryuc_ptr,
-					 this_right->cdna_direction,this_right->watsonp,
-					 /*jump_late_p*/this_right->watsonp ? false : true,
-					 maxpeelback,/*defect_rate*/0.0,pairpool,dynprogR,
-					 /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
-    this_right->pairs = Pair_clip_bounded_list(this_right->pairs,minpos2,maxpos2);
+    pairs = build_pairs_end5(&knownsplicep,&ambig_end_length_5,&ambig_splicetype,&ambig_prob_5,
+			     &chop_exon_p,&dynprogindex_minor,pairs,
+			     this_right->chroffset,this_right->chrhigh,
+			     /*knownsplice_limit_low*/-1U,/*knownsplice_limit_high*/0,
+			     queryseq_ptr,queryuc_ptr,
+			     this_right->cdna_direction,this_right->watsonp,
+			     /*jump_late_p*/this_right->watsonp ? false : true,
+			     maxpeelback,/*defect_rate*/0.0,pairpool,dynprogR,
+			     /*extendp*/true,/*endalign*/QUERYEND_NOGAPS);
+    this_right->pairs = Pair_clip_bounded_list(pairs,minpos2,maxpos2);
 
     if (this_left->pairs == NULL || this_right->pairs == NULL) {
+      this_left->pairs = orig_left_pairs;
+      this_right->pairs = orig_right_pairs;
       return false;
     } else {
       make_pairarrays_chimera(this_left,this_right,queryseq_ptr,pairpool,/*gaplength*/0,ngap);
@@ -15044,6 +15682,118 @@ Stage3_extend_left (T this, int goal,
 }
 
 
+void
+Stage3_trim_right (T this, int goal, char *queryseq_ptr, char *queryuc_ptr,
+		  Pairpool_T pairpool) {
+  List_T path;
+  Pair_T pair;
+
+  int nconsecutive_mismatches;
+  int querypos, querydp5;
+  Chrpos_T genomedp5;
+  int genomepos;
+  char c, c_upper, g, g_alt, comp;
+  bool protectedp;
+  int n_peeled_indels;
+
+  int ncanonical, nsemicanonical;
+  double min_splice_prob;
+
+
+  debug10(printf("Entered Stage3_trim_right with goal %d\n",goal));
+  debug10(printf("LEFT BEFORE TRIM\n"));
+  debug10(Pair_dump_list(this->pairs,true));
+  debug10(printf("END_LEFT BEFORE TRIM\n"));
+
+  path = List_reverse(this->pairs);
+
+  while (((Pair_T) path->first)->querypos > goal /* && pos <= this->chrhigh */) {
+    path = Pairpool_pop(path,&pair);
+  }
+
+  this->pairs = List_reverse(path);
+
+  debug10(printf("LEFT AFTER TRIM\n"));
+  debug10(Pair_dump_list(this->pairs,true));
+  debug10(printf("END_LEFT AFTER TRIM\n"));
+
+  Stage3_free_pairarray(&this);
+  this->pairarray = make_pairarray(&this->npairs,&this->pairs,this->cdna_direction,
+				   this->watsonp,pairpool,queryseq_ptr,
+				   this->chroffset,this->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0);
+  this->goodness = Pair_fracidentity_array(&this->matches,&this->unknowns,&this->mismatches,
+					   &this->qopens,&this->qindels,&this->topens,&this->tindels,
+					   &ncanonical,&nsemicanonical,&this->noncanonical,
+					   &min_splice_prob,this->pairarray,this->npairs,this->cdna_direction);
+
+  if (this->pairarray == NULL) {
+    this->pairarray_freeable_p = false;
+  } else {
+    this->pairarray_freeable_p = true;
+  }
+
+  return;
+}
+
+
+void
+Stage3_trim_left (T this, int goal, char *queryseq_ptr, char *queryuc_ptr,
+		  Pairpool_T pairpool) {
+
+  List_T pairs;
+  Pair_T pair;
+
+  int nconsecutive_mismatches;
+  int querypos, querydp3;
+  Chrpos_T genomedp3;
+  int genomepos;
+  char c, c_upper, g, g_alt, comp;
+  bool protectedp;
+  int n_peeled_indels;
+
+  int ncanonical, nsemicanonical;
+  double min_splice_prob;
+
+
+  debug10(printf("Entered Stage3_trim_left with goal %d\n",goal));
+  debug10(printf("RIGHT BEFORE TRIM\n"));
+  debug10(Pair_dump_list(this->pairs,true));
+  debug10(printf("END_RIGHT BEFORE TRIM\n"));
+
+
+  /* Do not call insert_gapholders */
+  pairs = this->pairs;
+  
+  while (((Pair_T) pairs->first)->querypos < goal) {
+    pairs = Pairpool_pop(pairs,&pair);
+  }
+
+  this->pairs = pairs;
+
+  debug10(printf("RIGHT AFTER TRIM\n"));
+  debug10(Pair_dump_list(this->pairs,true));
+  debug10(printf("END_RIGHT AFTER TRIM\n"));
+
+  Stage3_free_pairarray(&this);
+  this->pairarray = make_pairarray(&this->npairs,&this->pairs,this->cdna_direction,
+				   this->watsonp,pairpool,queryseq_ptr,
+				   this->chroffset,this->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0);
+  this->goodness = Pair_fracidentity_array(&this->matches,&this->unknowns,&this->mismatches,
+					   &this->qopens,&this->qindels,&this->topens,&this->tindels,
+					   &ncanonical,&nsemicanonical,&this->noncanonical,
+					   &min_splice_prob,this->pairarray,this->npairs,this->cdna_direction);
+
+  if (this->pairarray == NULL) {
+    this->pairarray_freeable_p = false;
+  } else {
+    this->pairarray_freeable_p = true;
+  }
+
+  return;
+}
+
+
+
 #if 0
 static void
 adjust_genomepos (T this, int delta) {
@@ -15068,7 +15818,7 @@ merge_local_single (T this_left, T this_right,
 		    int maxpeelback) {
   bool successp;
   Pair_T leftpair, rightpair;
-  List_T path;
+  List_T path, orig_left_pairs, orig_right_pairs;
   bool watsonp, filledp;
 
   int ncanonical, nsemicanonical;
@@ -15081,17 +15831,25 @@ merge_local_single (T this_left, T this_right,
   int dynprogindex_minor = 0;
 
 
+  orig_left_pairs = Pairpool_copy(this_left->pairs,pairpool);
+  orig_right_pairs = Pairpool_copy(this_right->pairs,pairpool);
+
   this_left->pairs = Pair_clip_bounded_list(this_left->pairs,minpos1,maxpos1);
   this_right->pairs = Pair_clip_bounded_list(this_right->pairs,minpos2,maxpos2);
 
-  Stage3_free_pairarray(&this_left);
-  Stage3_free_pairarray(&this_right);
+  /* Stage3_free_pairarray(&this_left); */
+  /* Stage3_free_pairarray(&this_right); */
 
   if (this_left->pairs == NULL && this_right->pairs == NULL) {
+    this_left->pairs = orig_left_pairs;
+    this_right->pairs = orig_right_pairs;
+
+#if 0
     this_left->pairarray = (struct Pair_T *) NULL;
     this_right->pairarray = (struct Pair_T *) NULL;
     this_left->pairarray_freeable_p = false;
     this_right->pairarray_freeable_p = false;
+#endif
     return false;
 
   } else if ((watsonp = this_left->watsonp) == true) {
@@ -15223,6 +15981,7 @@ merge_local_single (T this_left, T this_right,
   }
 
   if (successp == false) {
+    this_left->pairs = orig_left_pairs;
     this_left->pairarray = make_pairarray(&this_left->npairs,&this_left->pairs,this_left->cdna_direction,
 					  this_left->watsonp,pairpool,queryseq_ptr,
 					  this_left->chroffset,this_left->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0);
@@ -15231,6 +15990,7 @@ merge_local_single (T this_left, T this_right,
 						  &ncanonical,&nsemicanonical,&this_left->noncanonical,
 						  &min_splice_prob,this_left->pairarray,this_left->npairs,this_left->cdna_direction);
 
+    this_right->pairs = orig_right_pairs;
     this_right->pairarray = make_pairarray(&this_right->npairs,&this_right->pairs,this_right->cdna_direction,
 					   this_right->watsonp,pairpool,queryseq_ptr,
 					   this_right->chroffset,this_right->chrhigh,ngap,/*subseq_offset*/0,/*skiplength*/0);
@@ -15610,8 +16370,112 @@ Stage3_merge_local (T this_left, T this_right,
 }
 
 
+List_T
+Stage3_split (T this, Sequence_T queryseq, Pairpool_T pairpool) {
+  List_T split_objects = NULL;
+  Stage3_T stage3;
+  struct Pair_T *pairarray;
+  int npairs;
+  int goodness, matches, unknowns, mismatches, qopens, qindels, topens, tindels,
+    ncanonical, nsemicanonical, nnoncanonical;
+  double min_splice_prob;
+
+  List_T path, pairs, p;
+  Pair_T pair;
+  bool large_intron_p;
+
+  debug(printf("\n** Starting Stage3_split with watsonp %d and cdna_direction %d\n",this->watsonp,this->cdna_direction));
+
+  large_intron_p = false;
+  for (p = this->pairs; p != NULL; p = p->rest) {
+    pair = (Pair_T) p->first;
+    if (pair->gapp == true && pair->genomejump > maxintronlen) {
+      large_intron_p = true;
+    }
+  }
+
+  if (large_intron_p == false) {
+    return (List_T) NULL;
+
+  } else {
+    pairs = (List_T) NULL;
+    path = List_reverse(this->pairs);
+    while (path != NULL) {
+      /* pairptr = path; */
+      /* path = Pairpool_pop(path,&pair); */
+      pair = (Pair_T) path->first;
+      if (pair->gapp == false) {
+#ifdef WASTE
+	pairs = Pairpool_push_existing(pairs,pairpool,pair);
+#else
+	pairs = List_transfer_one(pairs,&path);
+#endif
+
+      } else if (pair->genomejump <= maxintronlen) {
+#ifdef WASTE
+	pairs = Pairpool_push_existing(pairs,pairpool,pair);
+#else
+	pairs = List_transfer_one(pairs,&path);
+#endif
+
+      } else {
+	/* Start a new path */
+	/* Pair_dump_list(pairs,true); */
+
+	pairarray = make_pairarray(&npairs,&pairs,this->cdna_direction,this->watsonp,
+				   pairpool,/*queryseq_ptr*/Sequence_fullpointer(queryseq),
+				   this->chroffset,this->chrhigh,
+				   ngap,/*query_subseq_offset*/Sequence_subseq_offset(queryseq),
+				   /*skiplength*/Sequence_skiplength(queryseq));
+
+	goodness = Pair_fracidentity_array(&matches,&unknowns,&mismatches,
+					   &qopens,&qindels,&topens,&tindels,
+					   &ncanonical,&nsemicanonical,&nnoncanonical,
+					   &min_splice_prob,pairarray,npairs,this->cdna_direction);
+
+	stage3 = Stage3_new(pairarray,pairs,npairs,goodness,this->cdna_direction,this->sensedir,
+			    matches,unknowns,mismatches,
+			    qopens,qindels,topens,tindels,ncanonical,nsemicanonical,nnoncanonical,
+			    this->chrnum,this->chroffset,this->chrhigh,this->chrlength,this->watsonp,
+			    /*querylength*/Sequence_fulllength(queryseq),
+			    /*skiplength*/Sequence_skiplength(queryseq),
+			    /*trimlength*/Sequence_trimlength(queryseq),
+			    this->straintype,this->strain,/*altstrain_iit*/NULL);
+
+	split_objects = List_push(split_objects,(void *) stage3);
+	pairs = (List_T) NULL;
+	path = path->rest;	/* Discard gap */
+      }
+    }
+
+    /* Handle final path */
+    /* Pair_dump_list(pairs,true); */
+
+    pairarray = make_pairarray(&npairs,&pairs,this->cdna_direction,this->watsonp,
+			       pairpool,/*queryseq_ptr*/Sequence_fullpointer(queryseq),
+			       this->chroffset,this->chrhigh,
+			       ngap,/*query_subseq_offset*/Sequence_subseq_offset(queryseq),
+			       /*skiplength*/Sequence_skiplength(queryseq));
 
+    goodness = Pair_fracidentity_array(&matches,&unknowns,&mismatches,
+				       &qopens,&qindels,&topens,&tindels,
+				       &ncanonical,&nsemicanonical,&nnoncanonical,
+				       &min_splice_prob,pairarray,npairs,this->cdna_direction);
 
+    stage3 = Stage3_new(pairarray,pairs,npairs,goodness,this->cdna_direction,this->sensedir,
+			matches,unknowns,mismatches,
+			qopens,qindels,topens,tindels,ncanonical,nsemicanonical,nnoncanonical,
+			this->chrnum,this->chroffset,this->chrhigh,this->chrlength,this->watsonp,
+			/*querylength*/Sequence_fulllength(queryseq),
+			/*skiplength*/Sequence_skiplength(queryseq),
+			/*trimlength*/Sequence_trimlength(queryseq),
+			this->straintype,this->strain,/*altstrain_iit*/NULL);
+
+    split_objects = List_push(split_objects,(void *) stage3);
+  }
+
+  return split_objects;
+}
 
 
 #ifndef PMAP
diff --git a/src/stage3.h b/src/stage3.h
index c3a5899..9d5d364 100644
--- a/src/stage3.h
+++ b/src/stage3.h
@@ -1,4 +1,4 @@
-/* $Id: stage3.h 198076 2016-09-21 00:29:14Z twu $ */
+/* $Id: stage3.h 207201 2017-06-12 18:40:57Z twu $ */
 #ifndef STAGE3_INCLUDED
 #define STAGE3_INCLUDED
 
@@ -41,7 +41,7 @@ typedef struct Stage3_T *Stage3_T;
 
 typedef enum {SIMPLE, SUMMARY, ALIGNMENT, COMPRESSED, CONTINUOUS, CONTINUOUS_BY_EXON,
 	      EXONS_CDNA, EXONS_GENOMIC, CDNA, PROTEIN_GENOMIC,
-	      PSL_NT, PSL_PRO, GFF3_GENE, GFF3_MATCH_CDNA, GFF3_MATCH_EST,
+	      PSL_NT, PSL_PRO, GFF3_GENE, GFF3_MATCH_CDNA, GFF3_MATCH_EST, BEDPE,
 	      SAM, COORDS, SPLICESITES, INTRONS, MAP_RANGES, MAP_EXONS} Printtype_T;
 
 /* POST_CANONICAL is the path_compute_final() step */
@@ -161,6 +161,12 @@ Stage3_querystart_cmp (const void *a, const void *b);
 extern int
 Stage3_queryend_cmp (const void *a, const void *b);
 extern int
+Stage3_chrnum_cmp (const void *a, const void *b);
+extern int
+Stage3_chrnum_querystart_cmp (const void *a, const void *b);
+extern int
+Stage3_chrnum_queryend_cmp (const void *a, const void *b);
+extern int
 Stage3_identity_cmp (const void *a, const void *b);
 extern bool
 Stage3_overlap (T x, T y);
@@ -221,6 +227,9 @@ Stage3_print_gff3 (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_i
 		   Sequence_T queryseq, int querylength, Printtype_T printtype, char *sourcename);
 #ifndef PMAP
 extern void
+Stage3_print_bedpe (Filestring_T fp, T this, int pathnum, Univ_IIT_T chromosome_iit,
+		    Sequence_T queryseq, int querylength);
+extern void
 Stage3_print_sam (Filestring_T fp, char *abbrev, T this, int pathnum, int npaths_primary, int npaths_altloc,
 		  int absmq_score, int second_absmq, int mapq_score,
 		  Univ_IIT_T chromosome_iit, Sequence_T usersegment,
@@ -281,13 +290,14 @@ extern int
 Stage3_good_part (struct Pair_T *pairarray, int npairs, int pos5, int pos3);
 
 extern struct Pair_T *
-Stage3_compute (int *cdna_direction, int *sensedir, List_T *pairs1, int *npairs1, int *goodness1,
+Stage3_compute (int *cdna_direction, int *sensedir,
+		List_T *pairs1, int *npairs1, int *goodness1,
 		int *matches1, int *nmatches_posttrim_1, int *max_match_length_1,
 		int *ambig_end_length_5_1, int *ambig_end_length_3_1,
 		Splicetype_T *ambig_splicetype_5_1, Splicetype_T *ambig_splicetype_3_1,
 		double *ambig_prob_5_1, double *ambig_prob_3_1,
 		int *unknowns1, int *mismatches1, int *qopens1, int *qindels1, int *topens1, int *tindels1,
-		int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *min_splice_prob_1,
+		int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *avg_splice_score_1,
 
 #ifdef GSNAP
 		struct Pair_T **pairarray2, List_T *pairs2, int *npairs2, int *goodness2,
@@ -296,7 +306,7 @@ Stage3_compute (int *cdna_direction, int *sensedir, List_T *pairs1, int *npairs1
 		Splicetype_T *ambig_splicetype_5_2, Splicetype_T *ambig_splicetype_3_2,
 		double *ambig_prob_5_2, double *ambig_prob_3_2,
 		int *unknowns2, int *mismatches2, int *qopens2, int *qindels2, int *topens2, int *tindels2,
-		int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *min_splice_prob_2,
+		int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *avg_splice_score_2,
 #endif
 
 		List_T stage2pairs, List_T all_stage2_starts, List_T all_stage2_ends,
@@ -344,6 +354,13 @@ Stage3_extend_left (T this, int goal,
 		    bool max_extend_p, Pairpool_T pairpool,
 		    int maxpeelback);
 
+extern void
+Stage3_trim_right (T this, int goal, char *queryseq_ptr, char *queryuc_ptr,
+		   Pairpool_T pairpool);
+extern void
+Stage3_trim_left (T this, int goal, char *queryseq_ptr, char *queryuc_ptr,
+		  Pairpool_T pairpool);
+
 extern bool
 Stage3_merge_local (T this_left, T this_right,
 		    int minpos1, int maxpos1, int minpos2, int maxpos2, int genestrand,
@@ -355,6 +372,9 @@ Stage3_merge_local (T this_left, T this_right,
 		    int maxpeelback,
 		    Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool);
 
+extern List_T
+Stage3_split (T this, Sequence_T queryseq, Pairpool_T pairpool);
+
 #ifndef PMAP
 extern void
 Stage3_guess_cdna_direction (T this);
diff --git a/src/stage3hr.c b/src/stage3hr.c
index f0e8170..b701757 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 200238 2016-11-08 00:59:56Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 209126 2017-08-15 19:34:28Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -22,6 +22,7 @@ static char rcsid[] = "$Id: stage3hr.c 200238 2016-11-08 00:59:56Z twu $";
 #include "mapq.h"
 #include "pair.h"		/* For Pair_print_gsnap and Pair_compute_mapq */
 #include "pairdef.h"
+#include "cigar.h"
 #include "comp.h"		/* For Stage3end_run_gmap */
 #include "maxent_hr.h"
 #include "fastlog.h"
@@ -207,6 +208,10 @@ static char rcsid[] = "$Id: stage3hr.c 200238 2016-11-08 00:59:56Z twu $";
 #define MAPQ_MAXIMUM_SCORE 40
 
 
+static bool omit_concordant_uniq_p = false;
+static bool omit_concordant_mult_p = false;
+
+
 /* Controlled by --end-detail.  high (2): true/true, medium (1): false/true, low (0): false/false */
 
 /* Previously had major performance hit, about 3x, for 5% of cases,
@@ -230,17 +235,16 @@ static int *tally_divint_crosstable;
 static IIT_T runlength_iit;
 static int *runlength_divint_crosstable;
 
-static int pairmax_linear;
-static int pairmax_circular;
+static Chrpos_T pairmax_linear;
+static Chrpos_T pairmax_circular;
 
-#if 0
-static int expected_pairlength;
-static int pairlength_deviation;
-#else
-static int expected_pairlength_low;
-static int expected_pairlength_high;
-static int expected_pairlength_very_high;
-#endif
+static Chrpos_T expected_pairlength;
+static Chrpos_T pairlength_deviation;
+static int maxpeelback;
+
+static Chrpos_T expected_pairlength_low;
+static Chrpos_T expected_pairlength_high;
+static Chrpos_T expected_pairlength_very_high;
 
 static int amb_penalty = 2;
 static int localsplicing_penalty;
@@ -283,14 +287,15 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
 		IIT_T genes_iit_in, int *genes_divint_crosstable_in,
 		IIT_T tally_iit_in, int *tally_divint_crosstable_in,
 		IIT_T runlength_iit_in, int *runlength_divint_crosstable_in,
-		bool distances_observed_p, int pairmax_linear_in, int pairmax_circular_in,
-		Chrpos_T expected_pairlength, Chrpos_T pairlength_deviation,
+		bool distances_observed_p, Chrpos_T pairmax_linear_in, Chrpos_T pairmax_circular_in,
+		Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in, int maxpeelback_in,
 		int localsplicing_penalty_in, int indel_penalty_middle_in,
 		int antistranded_penalty_in, bool favor_multiexon_p_in,
 		int gmap_min_nconsecutive_in, int end_detail, int subopt_levels_in,
 		int max_middle_insertions_in, int max_middle_deletions_in,
 		bool novelsplicingp_in, Chrpos_T shortsplicedist_in, bool merge_samechr_p_in,
 		bool *circularp_in, bool *altlocp_in, Univcoord_T *alias_starts_in, Univcoord_T *alias_ends_in,
+		bool omit_concordant_uniq_p_in, bool omit_concordant_mult_p_in,
 		char *failedinput_root_in, bool print_m8_p_in, bool want_random_p_in) {
 
   invert_first_p = invert_first_p_in;
@@ -314,6 +319,10 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
 
   pairmax_linear = pairmax_linear_in;
   pairmax_circular = pairmax_circular_in;
+  expected_pairlength = expected_pairlength_in;
+  pairlength_deviation = pairlength_deviation_in;
+  maxpeelback = maxpeelback_in;
+
   if (pairlength_deviation > expected_pairlength) {
     expected_pairlength_low = 0;
   } else {
@@ -372,6 +381,9 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
 
   failedinput_root = failedinput_root_in;
 
+  omit_concordant_uniq_p = omit_concordant_uniq_p_in;
+  omit_concordant_mult_p = omit_concordant_mult_p_in;
+
   print_m8_p = print_m8_p_in;
   want_random_p = want_random_p_in;
 
@@ -445,8 +457,10 @@ struct T {
   int nmatches;
   int nmatches_posttrim;
 
-  int gmap_max_match_length;		/* Used only by GMAP */
-  double gmap_min_splice_prob;		/* Used only by GMAP */
+  int gmap_goodness;		/* Used only by GMAP */
+  int gmap_max_match_length;	/* Used only by GMAP */
+  double gmap_avg_splice_score;	/* Used only by GMAP */
+  double splice_score;		/* Used by various SPLICE types */
 
   /* trim_left and trim_right should really be named trim_start and trim_end */
   /* if trim_left_splicep or trim_right_splicep is true, then trim is of type "unknown amb" */
@@ -477,6 +491,7 @@ struct T {
   int gmap_nindelbreaks;
   int gmap_cdna_direction;
   int gmap_nintrons;
+  int gmap_nbadintrons;		/* Filled in during Stage3pair_optimal_score */
   int sensedir;			/* for splicing */
 
   int nsplices;
@@ -529,6 +544,7 @@ struct T {
   List_T substrings_1toN;	/* query position 1 to N */
   List_T substrings_Nto1;	/* query position N to 1.  Keeps only pointers to the substrings. */
   List_T substrings_LtoH;	/* Chromosomal low-to-high.  Keeps only pointers to the substrings. */
+  List_T substrings_HtoL;	/* Chromosomal high-to-low.  Keeps only pointers to the substrings. */
 
   List_T junctions_LtoH;
   List_T junctions_1toN;
@@ -557,7 +573,7 @@ struct Stage3pair_T {
 
   Univcoord_T low;
   Univcoord_T high;
-  int insertlength;
+  Chrpos_T insertlength;
   int insertlength_expected_sign;	/* 1 if in (expected_pairlength_low, expected_pairlength_high),
 					   0 if in (expected_pairlength_low, expected_pairlength_very_high), and
 					   -1 if < expected_pairlength_low or > expected_pairlength_very_high */
@@ -685,6 +701,16 @@ Stage3end_effective_chrnum (T this) {
   }
 }
 
+Chrnum_T
+Stage3end_other_chrnum (T this) {
+  if (this == NULL) {
+    /* Can happen if we call upon a mate in a halfmapping */
+    return 0;
+  } else {
+    return this->other_chrnum;
+  }
+}
+
 Univcoord_T
 Stage3end_chroffset (T this) {
   return this->chroffset;
@@ -787,13 +813,18 @@ Stage3end_score (T this) {
 }
 
 int
+Stage3end_gmap_goodness (T this) {
+  return this->gmap_goodness;
+}
+
+int
 Stage3end_gmap_max_match_length (T this) {
   return this->gmap_max_match_length;
 }
 
 double
-Stage3end_gmap_min_splice_prob (T this) {
-  return this->gmap_min_splice_prob;
+Stage3end_gmap_avg_splice_score (T this) {
+  return this->gmap_avg_splice_score;
 }
 
 
@@ -1158,6 +1189,71 @@ Stage3end_substring_low (T this, int hardclip_low) {
 }
 
 
+#if 0
+/* Needed only to generate mate_chrpos_high, which we don't print currently */
+/* Modified from Stage3end_substring_low */
+Substring_T
+Stage3end_substring_high (T this, int hardclip_high) {
+  List_T p;
+
+  if (this == NULL) {
+    return (Substring_T) NULL;
+
+  } else if (this->plusp == true) {
+    p = this->substrings_HtoL;
+    if (Substring_ambiguous_p((Substring_T) List_head(p)) == true) {
+      p = List_next(p);
+    }
+    while (p != NULL && Substring_querystart((Substring_T) List_head(p)) >= this->querylength - hardclip_high) {
+      debug15(printf("Plus: Skipp substring %d..%d against %d = querylength %d - hardclip_high %d\n",
+		     Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
+		     this->querylength - hardclip_high,this->querylength,hardclip_high));
+      p = List_next(p);
+    }
+    assert(p != NULL);
+    if (p == NULL) {
+      return (Substring_T) NULL;
+    } else {
+      debug15(printf("Plus: Returning substring %d..%d against %d = querylength %d - hardclip_high %d\n",
+		     Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
+		     this->querylength - hardclip_high,this->querylength,hardclip_high));
+      return (Substring_T) List_head(p);
+    }
+
+  } else {
+#ifdef DEBUG15
+    for (p = this->substrings_HtoL; p != NULL; p = List_next(p)) {
+      printf("HtoL: %d..%d\n",
+	     Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)));
+    }
+#endif
+
+    p = this->substrings_HtoL;
+    if (Substring_ambiguous_p((Substring_T) List_head(p)) == true) {
+      p = List_next(p);
+    }
+
+    while (p != NULL && Substring_queryend((Substring_T) List_head(p)) <= hardclip_high) {
+      debug15(printf("Minus: Skipping substring %d..%d against hardclip_high %d\n",
+		     Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
+		     hardclip_high));
+      p = List_next(p);
+    }
+    assert(p != NULL);
+    if (p == NULL) {
+      return (Substring_T) NULL;
+    } else {
+      debug15(printf("Minus: Returning substring %d..%d against hardclip_high %d\n",
+		     Substring_querystart((Substring_T) List_head(p)),Substring_queryend((Substring_T) List_head(p)),
+		     hardclip_high));
+      return (Substring_T) List_head(p);
+    }
+  }
+}
+#endif
+
+
+
 Substring_T
 Stage3end_substring_containing (T this, int querypos) {
   Substring_T substring;
@@ -1620,6 +1716,30 @@ Stage3end_gmap_queryend (T this) {
   return this->pairarray[this->npairs - 1].querypos;
 }
 
+static int
+Stage3end_querystart (T this) {
+  Substring_T substring;
+
+  if (this->hittype == GMAP) {
+    return this->pairarray[0].querypos;
+  } else {
+    substring = (Substring_T) List_head(this->substrings_1toN);
+    return Substring_querystart(substring);
+  }
+}
+
+static int
+Stage3end_queryend (T this) {
+  Substring_T substring;
+
+  if (this->hittype == GMAP) {
+    return this->pairarray[this->npairs - 1].querypos;
+  } else {
+    substring = (Substring_T) List_head(this->substrings_Nto1);
+    return Substring_queryend(substring);
+  }
+}
+
 int
 Stage3end_terminal_trim (T this) {
   Substring_T substring;
@@ -1877,6 +1997,7 @@ Stage3end_free (T *old) {
   List_free(&(*old)->substrings_1toN);
   List_free(&(*old)->substrings_Nto1);
   List_free(&(*old)->substrings_LtoH);
+  List_free(&(*old)->substrings_HtoL);
 
   for (p = (*old)->junctions_1toN; p != NULL; p = List_next(p)) {
     junction = (Junction_T) List_head(p);
@@ -2193,8 +2314,7 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
 
   } else if (hit->plusp == true) {
 #ifdef DEBUG15
-    printf("plus.  Checking common genomicpos %llu against\n",
-	   common_genomicpos - hit->chroffset);
+    printf("plus.  Checking common genomicpos %llu against\n",common_genomicpos - hit->chroffset);
     for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
       substring = (Substring_T) List_head(p);
       printf("substring %p: %u..%u, trim %d..%d\n",
@@ -2243,8 +2363,7 @@ find_ilengths (int *ilength_low, int *ilength_high, Stage3end_T hit, Univcoord_T
     }
   } else {
 #ifdef DEBUG15
-    printf("minus.  Checking common genomicpos %llu against\n",
-      common_genomicpos - hit->chroffset);
+    printf("minus.  Checking common genomicpos %llu against\n",common_genomicpos - hit->chroffset);
     for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
       substring = (Substring_T) List_head(p);
       printf("substring %p: %u..%u, trim %d..%d\n",
@@ -4106,12 +4225,16 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
 	  common_shift = common_right/2 - (common_left - 1)/2;
 	  debug15(printf("Common shift is %d = common_right %d/2 - (common_left %d - 1)/2\n",
 			 common_shift,common_right,common_left));
+	  assert(ilength5_low > 0);
+	  assert(ilength3_low > 0);
 	  ilength5_low -= 1;
 	  ilength3_low -= 1;
 	} else {
 	  common_shift = (common_right - 1)/2 - common_left/2;
 	  debug15(printf("Common shift is %d = (common_right %d - 1)/2 - common_left %d/2\n",
 			 common_shift,common_right,common_left));
+	  assert(ilength5_high > 0);
+	  assert(ilength3_high > 0);
 	  ilength5_high -= 1;
 	  ilength3_high -= 1;
 	}
@@ -4171,9 +4294,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
 	      return 0;
 	    } else if (ilength3_low > ilength5_high) {
 	      debug15(printf("Uneven: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+	      assert(ilength3_low > 0);
 	      ilength3_low -= 1;
 	    } else {
 	      debug15(printf("Uneven: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+	      assert(ilength5_high > 0);
 	      ilength5_high -= 1;
 	    }
 	    debug15(printf("Even: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
@@ -4257,9 +4382,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
 	      return 0;
 	    } else if (ilength5_low > ilength3_high) {
 	      debug15(printf("Uneven: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+	      assert(ilength5_low > 0);
 	      ilength5_low -= 1;
 	    } else {
 	      debug15(printf("Uneven: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+	      assert(ilength3_high > 0);
 	      ilength3_high -= 1;
 	    }
 	    debug15(printf("Even: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
@@ -4329,12 +4456,16 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
 	  common_shift = common_right/2 - (common_left - 1)/2;
 	  debug15(printf("Common shift is %d = common_right %d/2 - (common_left %d - 1)/2\n",
 			 common_shift,common_right,common_left));
+	  assert(ilength5_low > 0);
+	  assert(ilength3_low > 0);
 	  ilength5_low -= 1;
 	  ilength3_low -= 1;
 	} else {
 	  common_shift = (common_right - 1)/2 - common_left/2;
 	  debug15(printf("Common shift is %d = (common_right %d - 1)/2 - common_left %d/2\n",
 			 common_shift,common_right,common_left));
+	  assert(ilength5_high > 0);
+	  assert(ilength3_high > 0);
 	  ilength5_high -= 1;
 	  ilength3_high -= 1;
 	}
@@ -4395,9 +4526,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
 	      return 0;
 	    } else if (ilength3_low > ilength5_high) {
 	      debug15(printf("Uneven: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+	      assert(ilength3_low > 0);
 	      ilength3_low -= 1;
 	    } else {
 	      debug15(printf("Uneven: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+	      assert(ilength5_high > 0);
 	      ilength5_high -= 1;
 	    }
 	    debug15(printf("Even: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
@@ -4481,9 +4614,11 @@ Stage3pair_overlap (int *hardclip5_low, int *hardclip5_high, int *hardclip3_low,
 	      return 0;
 	    } else if (ilength5_low > ilength3_high) {
 	      debug15(printf("Uneven: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+	      assert(ilength5_low > 0);
 	      ilength5_low -= 1;
 	    } else {
 	      debug15(printf("Uneven: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
+	      assert(ilength3_high > 0);
 	      ilength3_high -= 1;
 	    }
 	    debug15(printf("Even: ilengths5: %d|%d.  ilengths3: %d|%d\n",ilength5_low,ilength5_high,ilength3_low,ilength3_high));
@@ -4682,8 +4817,10 @@ Stage3end_copy (T old) {
   new->ntscore = old->ntscore;
   new->nmatches_posttrim = old->nmatches_posttrim;
   new->nmatches = old->nmatches;
+  new->gmap_goodness = old->gmap_goodness;
   new->gmap_max_match_length = old->gmap_max_match_length;
-  new->gmap_min_splice_prob = old->gmap_min_splice_prob;
+  new->gmap_avg_splice_score = old->gmap_avg_splice_score;
+  new->splice_score = old->splice_score;
 
   new->trim_left = old->trim_left;
   new->trim_right = old->trim_right;
@@ -4709,6 +4846,7 @@ Stage3end_copy (T old) {
   new->gmap_nindelbreaks = old->gmap_nindelbreaks;
   new->gmap_cdna_direction = old->gmap_cdna_direction;
   new->gmap_nintrons = old->gmap_nintrons;
+  new->gmap_nbadintrons = old->gmap_nbadintrons;
   new->sensedir = old->sensedir;
 
   new->gmap_start_endtype = old->gmap_start_endtype;
@@ -4719,6 +4857,7 @@ Stage3end_copy (T old) {
   new->substrings_1toN = (List_T) NULL;
   new->substrings_Nto1 = (List_T) NULL;
   new->substrings_LtoH = (List_T) NULL;
+  new->substrings_HtoL = (List_T) NULL;
 
   new->junctions_1toN = (List_T) NULL;
   new->junctions_Nto1 = (List_T) NULL;
@@ -4760,9 +4899,11 @@ Stage3end_copy (T old) {
       if (old->sensedir == SENSE_FORWARD) {
 	new->substrings_LtoH = List_copy(new->substrings_1toN);
 	new->junctions_LtoH = List_copy(new->junctions_1toN);
+	new->substrings_HtoL = List_copy(new->substrings_Nto1);
       } else if (old->sensedir == SENSE_ANTI) {
 	new->substrings_LtoH = List_copy(new->substrings_Nto1);
 	new->junctions_LtoH = List_copy(new->junctions_Nto1);
+	new->substrings_HtoL = List_copy(new->substrings_1toN);
       } else {
 	abort();
       }
@@ -4771,9 +4912,11 @@ Stage3end_copy (T old) {
       if (old->plusp == true) {
 	new->substrings_LtoH = List_copy(new->substrings_1toN);
 	new->junctions_LtoH = List_copy(new->junctions_1toN);
+	new->substrings_HtoL = List_copy(new->substrings_Nto1);
       } else {
 	new->substrings_LtoH = List_copy(new->substrings_Nto1);
 	new->junctions_LtoH = List_copy(new->junctions_Nto1);
+	new->substrings_HtoL = List_copy(new->substrings_1toN);
       }
     }
     assert(Substring_querystart(List_head(new->substrings_1toN)) <= Substring_querystart(List_head(new->substrings_Nto1)));
@@ -4946,8 +5089,8 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
   bool contradictionp;
 
 
-  debug0(printf("Entered Stage3end_new_substrings at left %u [%u], with plusp %d, orig_sensedir %d, and endpoints %s\n",
-		Uintlist_head(lefts),Uintlist_head(lefts) - chroffset,plusp,orig_sensedir,Intlist_to_string(endpoints)));
+  debug0(printf("Entered Stage3end_new_substrings at left %u [%u], with chrnum %d, plusp %d, orig_sensedir %d, and endpoints %s\n",
+		Uintlist_head(lefts),Uintlist_head(lefts) - chroffset,chrnum,plusp,orig_sensedir,Intlist_to_string(endpoints)));
   debug0(printf("There are %d endpoints, %d lefts, %d nmismatches, and %d junctions\n",
 		Intlist_length(endpoints),Uintlist_length(lefts),Intlist_length(nmismatches_list),List_length(junctions)));
   debug0(printf("Ambig left %p, right %p\n",left_ambig,right_ambig));
@@ -5016,7 +5159,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
       } else if (genomicstart < chroffset) {
 	outofbounds_start = chroffset - genomicstart;
 	outofbounds_end = genomicend - chroffset;
-	debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+	debug0(printf("1. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
 	Junction_gc(&junctions);
 	debug0(printf("Stage3end_new_substrings returning NULL\n"));
 	return (T) NULL;
@@ -5036,7 +5179,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
       } else if (genomicend > chrhigh) {
 	outofbounds_start = chrhigh - genomicstart;
 	outofbounds_end = genomicend - chrhigh;
-	debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+	debug0(printf("2. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
 	Junction_gc(&junctions);
 	debug0(printf("Stage3end_new_substrings returning NULL\n"));
 	return (T) NULL;
@@ -5173,7 +5316,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
       } else if (genomicend < chroffset) {
 	outofbounds_end = chroffset - genomicend;
 	outofbounds_start = genomicstart - chroffset;
-	debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+	debug0(printf("3. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
 	Junction_gc(&junctions);
 	debug0(printf("Stage3end_new_substrings returning NULL\n"));
 	return (T) NULL;
@@ -5192,7 +5335,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
       } else if (genomicstart > chrhigh) {
 	outofbounds_end = chrhigh - genomicend;
 	outofbounds_start = genomicstart - chrhigh;
-	debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+	debug0(printf("4. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
 	Junction_gc(&junctions);
 	debug0(printf("Stage3end_new_substrings returning NULL\n"));
 	return (T) NULL;
@@ -5337,6 +5480,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
   }
 #else
   /* Correct for both plus and minus */
+  new->substrings_HtoL = List_copy(new->substrings_LtoH);
   new->substrings_LtoH = List_reverse(new->substrings_LtoH);
   if (plusp == true) {
     new->substrings_1toN = List_reverse(new->substrings_1toN);
@@ -5385,8 +5529,17 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
   new->genestrand = genestrand;
   new->sarrayp = sarrayp;
   new->gmap_source = GMAP_NOT_APPLICABLE;
+  new->gmap_goodness = 0;
+  new->gmap_max_match_length = 0;
+  new->gmap_avg_splice_score = 0.0;
+  new->splice_score = 0.0;
   new->improved_by_gmap_p = false;
 
+  new->gmap_nindelbreaks = 0;
+  new->gmap_cdna_direction = 0;
+  new->gmap_nintrons = 0;
+  new->gmap_nbadintrons = 0;
+
   new->distant_splice_p = false;
   new->chrnum = new->effective_chrnum = chrnum;
   new->other_chrnum = 0;
@@ -5495,7 +5648,7 @@ Stage3end_new_substrings (int *found_score, Intlist_T endpoints,
     debug0(printf("Stage3end_new_substrings returning NULL\n"));
     return (T) NULL;
 
-  } else if (new->circularalias >= 0) {
+  } else if (new->circularpos >= 0) {
     new->altlocp = false;
     debug0(printf("Returning circular %p from Stage3end_new_substrings with found_score %d\n",new,*found_score));
     debug0(printf("Stage3end_new_substrings returning %p\n\n",new));
@@ -5550,8 +5703,8 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
     max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
     unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
     ncanonical2, nsemicanonical2, nnoncanonical2;
-  double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
-  double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+  double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1;
+  double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2;
   Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
   Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
   Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
@@ -5815,7 +5968,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
 				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
 				       &ambig_prob_5_1,&ambig_prob_3_1,
 				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
-				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
 
 				       &pairarray2,&pairs2,&npairs2,&goodness2,
 				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
@@ -5823,7 +5976,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
 				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
 				       &ambig_prob_5_2,&ambig_prob_3_2,
 				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
-				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
 
 				       stage2pairs,all_stage2_starts,all_stage2_ends,
 #ifdef END_KNOWNSPLICING_SHORTCUT
@@ -5843,7 +5996,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
 				       oligoindices_minor,diagpool,cellpool)) == NULL) {
 	hit1 = (T) NULL;
 
-      } else if (cdna_direction == 0) {
+      } else if (pairarray2 != NULL) {
 	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
 					 pairarray1,npairs1);
 	start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
@@ -5854,7 +6007,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
 	if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				       ambig_end_length_5_1,ambig_end_length_3_1,
 				       ambig_splicetype_5_1,ambig_splicetype_3_1,
-				       min_splice_prob_1,
+				       avg_splice_score_1,goodness1,
 				       pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				       /*left*/start,/*genomiclength*/end - start + 1,
 				       /*plusp*/true,genestrand,
@@ -5874,7 +6027,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
 	if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
 					ambig_end_length_5_2,ambig_end_length_3_2,
 					ambig_splicetype_5_2,ambig_splicetype_3_2,
-					min_splice_prob_2,
+					avg_splice_score_2,goodness2,
 					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
 					/*left*/start,/*genomiclength*/end - start + 1,
 					/*plusp*/true,genestrand,
@@ -5895,7 +6048,7 @@ Stage3end_substrings_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int quer
 	if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				       ambig_end_length_5_1,ambig_end_length_3_1,
 				       ambig_splicetype_5_1,ambig_splicetype_3_1,
-				       min_splice_prob_1,
+				       avg_splice_score_1,goodness1,
 				       pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				       /*left*/start,/*genomiclength*/end - start + 1,
 				       /*plusp*/true,genestrand,
@@ -5955,8 +6108,8 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
     max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
     unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
     ncanonical2, nsemicanonical2, nnoncanonical2;
-  double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
-  double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+  double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1;
+  double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2;
   Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
   Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
   Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
@@ -6224,7 +6377,7 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
 				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
 				       &ambig_prob_5_1,&ambig_prob_3_1,
 				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
-				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
 
 				       &pairarray2,&pairs2,&npairs2,&goodness2,
 				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
@@ -6232,7 +6385,7 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
 				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
 				       &ambig_prob_5_2,&ambig_prob_3_2,
 				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
-				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
 
 				       stage2pairs,all_stage2_starts,all_stage2_ends,
 #ifdef END_KNOWNSPLICING_SHORTCUT
@@ -6252,17 +6405,18 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
 				       oligoindices_minor,diagpool,cellpool)) == NULL) {
 	hit1 = (T) NULL;
 
-      } else if (cdna_direction == 0) {
+      } else if (pairarray2 != NULL) {
 	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
 					 pairarray1,npairs1);
 	start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
 			    /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh);
 	end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
 			       /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset);
+
 	if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				       ambig_end_length_5_1,ambig_end_length_3_1,
 				       ambig_splicetype_5_1,ambig_splicetype_3_1,
-				       min_splice_prob_1,
+				       avg_splice_score_1,goodness1,
 				       pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				       /*left*/end,/*genomiclength*/start - end + 1,
 				       /*plusp*/false,genestrand,
@@ -6278,10 +6432,11 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
 			    /*plusterm*/Pair_querypos(&(pairarray2[0])),this->chrhigh);
 	end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
 			       /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),this->chroffset);
+
 	if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
 					ambig_end_length_5_2,ambig_end_length_3_2,
 					ambig_splicetype_5_2,ambig_splicetype_3_2,
-					min_splice_prob_2,
+					avg_splice_score_2,goodness2,
 					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
 					/*left*/end,/*genomiclength*/start - end + 1,
 					/*plusp*/false,genestrand,
@@ -6298,10 +6453,11 @@ Stage3end_substrings_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int que
 			    /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh);
 	end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
 			       /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset);
+
 	if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				       ambig_end_length_5_1,ambig_end_length_3_1,
 				       ambig_splicetype_5_1,ambig_splicetype_3_1,
-				       min_splice_prob_1,
+				       avg_splice_score_1,goodness1,
 				       pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				       /*left*/end,/*genomiclength*/start - end + 1,
 				       /*plusp*/false,genestrand,
@@ -6352,8 +6508,8 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
     max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
     unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
     ncanonical2, nsemicanonical2, nnoncanonical2;
-  double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
-  double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+  double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1;
+  double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2;
   Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
   Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
   Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
@@ -6457,7 +6613,7 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
 				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
 				       &ambig_prob_5_1,&ambig_prob_3_1,
 				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
-				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
 
 				       &pairarray2,&pairs2,&npairs2,&goodness2,
 				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
@@ -6465,7 +6621,7 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
 				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
 				       &ambig_prob_5_2,&ambig_prob_3_2,
 				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
-				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
 
 				       stage2pairs,all_stage2_starts,all_stage2_ends,
 #ifdef END_KNOWNSPLICING_SHORTCUT
@@ -6485,7 +6641,7 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
 				       oligoindices_minor,diagpool,cellpool)) == NULL) {
 	hit1 = (T) NULL;
 
-      } else if (cdna_direction == 0) {
+      } else if (pairarray2 != NULL) {
 	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
 					 pairarray1,npairs1);
 	start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
@@ -6496,7 +6652,7 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
 	if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				       ambig_end_length_5_1,ambig_end_length_3_1,
 				       ambig_splicetype_5_1,ambig_splicetype_3_1,
-				       min_splice_prob_1,
+				       avg_splice_score_1,goodness1,
 				       pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				       /*left*/start,/*genomiclength*/end - start + 1,
 				       /*plusp*/true,genestrand,
@@ -6516,7 +6672,7 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
 	if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
 					ambig_end_length_5_2,ambig_end_length_3_2,
 					ambig_splicetype_5_2,ambig_splicetype_3_2,
-					min_splice_prob_2,
+					avg_splice_score_2,goodness2,
 					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
 					/*left*/start,/*genomiclength*/end - start + 1,
 					/*plusp*/true,genestrand,
@@ -6537,7 +6693,7 @@ Stage3end_gmap_run_gmap_plus (T *hit2, T this, char *queryuc_ptr, int querylengt
 	if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				       ambig_end_length_5_1,ambig_end_length_3_1,
 				       ambig_splicetype_5_1,ambig_splicetype_3_1,
-				       min_splice_prob_1,
+				       avg_splice_score_1,goodness1,
 				       pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				       /*left*/start,/*genomiclength*/end - start + 1,
 				       /*plusp*/true,genestrand,
@@ -6581,8 +6737,8 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
     max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
     unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
     ncanonical2, nsemicanonical2, nnoncanonical2;
-  double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1;
-  double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2;
+  double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1;
+  double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2;
   Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
   Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
   Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
@@ -6686,7 +6842,7 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
 				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
 				       &ambig_prob_5_1,&ambig_prob_3_1,
 				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
-				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
 
 				       &pairarray2,&pairs2,&npairs2,&goodness2,
 				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
@@ -6694,7 +6850,7 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
 				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
 				       &ambig_prob_5_2,&ambig_prob_3_2,
 				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
-				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
 
 				       stage2pairs,all_stage2_starts,all_stage2_ends,
 #ifdef END_KNOWNSPLICING_SHORTCUT
@@ -6714,17 +6870,18 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
 				       oligoindices_minor,diagpool,cellpool)) == NULL) {
 	hit1 = (T) NULL;
 
-      } else if (cdna_direction == 0) {
+      } else if (pairarray2 != NULL) {
 	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
 					 pairarray1,npairs1);
 	start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])),
 			    /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh);
 	end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
 			       /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset);
+
 	if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				       ambig_end_length_5_1,ambig_end_length_3_1,
 				       ambig_splicetype_5_1,ambig_splicetype_3_1,
-				       min_splice_prob_1,
+				       avg_splice_score_1,goodness1,
 				       pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				       /*left*/end,/*genomiclength*/start - end + 1,
 				       /*plusp*/false,genestrand,
@@ -6740,10 +6897,11 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
 			    /*plusterm*/Pair_querypos(&(pairarray2[0])),this->chrhigh);
 	end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
 			       /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),this->chroffset);
+
 	if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
 					ambig_end_length_5_2,ambig_end_length_3_2,
 					ambig_splicetype_5_2,ambig_splicetype_3_2,
-					min_splice_prob_2,
+					avg_splice_score_2,goodness2,
 					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
 					/*left*/end,/*genomiclength*/start - end + 1,
 					/*plusp*/false,genestrand,
@@ -6760,10 +6918,11 @@ Stage3end_gmap_run_gmap_minus (T *hit2, T this, char *queryuc_ptr, int queryleng
 			    /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh);
 	end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
 			       /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset);
+
 	if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
 				       ambig_end_length_5_1,ambig_end_length_3_1,
 				       ambig_splicetype_5_1,ambig_splicetype_3_1,
-				       min_splice_prob_1,
+				       avg_splice_score_1,goodness1,
 				       pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
 				       /*left*/end,/*genomiclength*/start - end + 1,
 				       /*plusp*/false,genestrand,
@@ -6804,7 +6963,7 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
     } else if (genomicstart < chroffset) {
       outofbounds_start = chroffset - genomicstart;
       outofbounds_end = genomicend - chroffset;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("5. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_start = 0;
@@ -6818,7 +6977,7 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
     } else if (genomicend > chrhigh) {
       outofbounds_start = chrhigh - genomicstart;
       outofbounds_end = genomicend - chrhigh;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("6. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_start = 0;
@@ -6843,7 +7002,7 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
     } else if (genomicend < chroffset) {
       outofbounds_end = chroffset - genomicend;
       outofbounds_start = genomicstart - chroffset;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("7. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_end = 0;
@@ -6857,7 +7016,7 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
     } else if (genomicstart > chrhigh) {
       outofbounds_end = chrhigh - genomicend;
       outofbounds_start = genomicstart - chrhigh;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("8. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_end = 0;
@@ -6883,9 +7042,10 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
 
   } else {
     new = (T) MALLOC_OUT(sizeof(*new));
-    debug0(printf("Stage3end_new_exact %p: left %llu, chrnum %d, sarrayp %d\n",new,(unsigned long long) left,chrnum,sarrayp));
+    debug0(printf("Stage3end_new_exact %p: left %llu, chrnum %d, sarrayp %d\n",new,(unsigned long long) left - chroffset,chrnum,sarrayp));
 
     new->substrings_LtoH = List_push(NULL,(void *) substring);
+    new->substrings_HtoL = List_push(NULL,(void *) substring);
     new->substrings_1toN = List_push(NULL,(void *) substring);
     new->substrings_Nto1 = List_push(NULL,(void *) substring);
 
@@ -6921,8 +7081,17 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
     new->genestrand = genestrand;
     new->sarrayp = sarrayp;
     new->gmap_source = GMAP_NOT_APPLICABLE;
+    new->gmap_goodness = 0;
+    new->gmap_max_match_length = 0;
+    new->gmap_avg_splice_score = 0.0;
+    new->splice_score = 0.0;
     new->improved_by_gmap_p = false;
 
+    new->gmap_nindelbreaks = 0;
+    new->gmap_cdna_direction = 0;
+    new->gmap_nintrons = 0;
+    new->gmap_nbadintrons = 0;
+
     new->distant_splice_p = false;
     new->chrnum = new->effective_chrnum = chrnum;
     new->other_chrnum = 0;
@@ -6972,7 +7141,7 @@ Stage3end_new_exact (int *found_score, Univcoord_T left, int genomiclength, Comp
       Stage3end_free(&new);
       return (T) NULL;
 
-    } else if (new->circularalias >= 0) {
+    } else if (new->circularpos >= 0) {
       new->altlocp = false;
       return new;
     
@@ -7010,7 +7179,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
     } else if (genomicstart < chroffset) {
       outofbounds_start = chroffset - genomicstart;
       outofbounds_end = genomicend - chroffset;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("9. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_start = 0;
@@ -7023,7 +7192,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
     } else if (genomicend > chrhigh) {
       outofbounds_start = chrhigh - genomicstart;
       outofbounds_end = genomicend - chrhigh;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("10. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_start = 0;
@@ -7047,7 +7216,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
     } else if (genomicend < chroffset) {
       outofbounds_end = chroffset - genomicend;
       outofbounds_start = genomicstart - chroffset;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("11. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_end = 0;
@@ -7060,7 +7229,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
     } else if (genomicstart > chrhigh) {
       outofbounds_end = chrhigh - genomicend;
       outofbounds_start = genomicstart - chrhigh;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("12. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_end = 0;
@@ -7090,6 +7259,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
 		  new,(unsigned long long) left,chrnum,nmismatches_whole,sarrayp));
 
     new->substrings_LtoH = List_push(NULL,(void *) substring);
+    new->substrings_HtoL = List_push(NULL,(void *) substring);
     new->substrings_1toN = List_push(NULL,(void *) substring);
     new->substrings_Nto1 = List_push(NULL,(void *) substring);
 
@@ -7124,8 +7294,17 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
     new->genestrand = genestrand;
     new->sarrayp = sarrayp;
     new->gmap_source = GMAP_NOT_APPLICABLE;
+    new->gmap_goodness = 0;
+    new->gmap_max_match_length = 0;
+    new->gmap_avg_splice_score = 0.0;
+    new->splice_score = 0.0;
     new->improved_by_gmap_p = false;
 
+    new->gmap_nindelbreaks = 0;
+    new->gmap_cdna_direction = 0;
+    new->gmap_nintrons = 0;
+    new->gmap_nbadintrons = 0;
+
     new->distant_splice_p = false;
     new->chrnum = new->effective_chrnum = chrnum;
     new->other_chrnum = 0;
@@ -7193,7 +7372,7 @@ Stage3end_new_substitution (int *found_score, int nmismatches_whole, Univcoord_T
       Stage3end_free(&new);
       return (T) NULL;
 
-    } else if (new->circularalias >= 0) {
+    } else if (new->circularpos >= 0) {
       new->altlocp = false;
       return new;
     
@@ -7258,7 +7437,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
     } else if (genomicstart1 < chroffset) {
       outofbounds_start = chroffset - genomicstart1;
       outofbounds_end = genomicend2 - chroffset;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("13. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	if (querylength - indel_pos - nindels < outofbounds_end) {
@@ -7279,7 +7458,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
     } else if (genomicend2 > chrhigh) {
       outofbounds_start = chrhigh - genomicstart1;
       outofbounds_end = genomicend2 - chrhigh;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("14. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	if (querylength - indel_pos - nindels < outofbounds_end) {
@@ -7321,7 +7500,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
     } else if (genomicend2 < chroffset) {
       outofbounds_end = chroffset - genomicend2;
       outofbounds_start = genomicstart1 - chroffset;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("15. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	if (indel_pos < outofbounds_start) {
@@ -7342,7 +7521,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
     } else if (genomicstart1 > chrhigh) {
       outofbounds_end = chrhigh - genomicend2;
       outofbounds_start = genomicstart1 - chrhigh;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("16. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	if (indel_pos < outofbounds_start) {
@@ -7396,9 +7575,16 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
     if (plusp == true) {
       new->substrings_LtoH = List_push(NULL,substring2);
       new->substrings_LtoH = List_push(new->substrings_LtoH,substring1);
+
+      new->substrings_HtoL = List_push(NULL,substring1);
+      new->substrings_HtoL = List_push(new->substrings_HtoL,substring2);
+
     } else {
       new->substrings_LtoH = List_push(NULL,substring1);
       new->substrings_LtoH = List_push(new->substrings_LtoH,substring2);
+
+      new->substrings_HtoL = List_push(NULL,substring2);
+      new->substrings_HtoL = List_push(new->substrings_HtoL,substring1);
     }
     junction = Junction_new_insertion(nindels);
     new->junctions_LtoH = List_push(NULL,junction);
@@ -7428,8 +7614,17 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
     new->genestrand = genestrand;
     new->sarrayp = sarrayp;
     new->gmap_source = GMAP_NOT_APPLICABLE;
+    new->gmap_goodness = 0;
+    new->gmap_max_match_length = 0;
+    new->gmap_avg_splice_score = 0.0;
+    new->splice_score = 0.0;
     new->improved_by_gmap_p = false;
 
+    new->gmap_nindelbreaks = 0;
+    new->gmap_cdna_direction = 0;
+    new->gmap_nintrons = 0;
+    new->gmap_nbadintrons = 0;
+
     new->distant_splice_p = false;
     new->chrnum = new->effective_chrnum = chrnum;
     new->other_chrnum = 0;
@@ -7505,7 +7700,7 @@ Stage3end_new_insertion (int *found_score, int nindels, int indel_pos, int nmism
       Stage3end_free(&new);
       return (T) NULL;
 
-    } else if (new->circularalias >= 0) {
+    } else if (new->circularpos >= 0) {
       new->altlocp = false;
       return new;
     
@@ -7577,7 +7772,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
     } else if (genomicstart1 < chroffset) {
       outofbounds_start = chroffset - genomicstart1;
       outofbounds_end = genomicend2 - chroffset;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("17. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	if (querylength - indel_pos - nindels < outofbounds_end) {
@@ -7598,7 +7793,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
     } else if (genomicend2 > chrhigh) {
       outofbounds_start = chrhigh - genomicstart1;
       outofbounds_end = genomicend2 - chrhigh;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("18. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	if (querylength - indel_pos - nindels < outofbounds_end) {
@@ -7651,7 +7846,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
     } else if (genomicend2 < chroffset) {
       outofbounds_end = chroffset - genomicend2;
       outofbounds_start = genomicstart1 - chroffset;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("19. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	if (indel_pos < outofbounds_start) {
@@ -7672,7 +7867,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
     } else if (genomicstart1 > chrhigh) {
       outofbounds_end = chrhigh - genomicend2;
       outofbounds_start = genomicstart1 - chrhigh;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("20. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	if (indel_pos < outofbounds_start) {
@@ -7743,9 +7938,16 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
     if (plusp == true) {
       new->substrings_LtoH = List_push(NULL,substring2);
       new->substrings_LtoH = List_push(new->substrings_LtoH,substring1);
+
+      new->substrings_HtoL = List_push(NULL,substring1);
+      new->substrings_HtoL = List_push(new->substrings_HtoL,substring2);
+
     } else {
       new->substrings_LtoH = List_push(NULL,substring1);
       new->substrings_LtoH = List_push(new->substrings_LtoH,substring2);
+
+      new->substrings_HtoL = List_push(NULL,substring2);
+      new->substrings_HtoL = List_push(new->substrings_HtoL,substring1);
     }
 
     new->querylength = querylength;
@@ -7767,8 +7969,17 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
     new->genestrand = genestrand;
     new->sarrayp = sarrayp;
     new->gmap_source = GMAP_NOT_APPLICABLE;
+    new->gmap_goodness = 0;
+    new->gmap_max_match_length = 0;
+    new->gmap_avg_splice_score = 0.0;
+    new->splice_score = 0.0;
     new->improved_by_gmap_p = false;
 
+    new->gmap_nindelbreaks = 0;
+    new->gmap_cdna_direction = 0;
+    new->gmap_nintrons = 0;
+    new->gmap_nbadintrons = 0;
+
     new->distant_splice_p = false;
     new->chrnum = new->effective_chrnum = chrnum;
     new->other_chrnum = 0;
@@ -7842,7 +8053,7 @@ Stage3end_new_deletion (int *found_score, int nindels, int indel_pos, int nmisma
       Stage3end_free(&new);
       return (T) NULL;
 
-    } else if (new->circularalias >= 0) {
+    } else if (new->circularpos >= 0) {
       new->altlocp = false;
       return new;
     
@@ -7880,8 +8091,8 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
   Junction_T junction;
 
   new = (T) MALLOC_OUT(sizeof(*new));
-  debug0(printf("Stage3end_new_splice %p with sensedir %d, donor substring %p and acceptor substring %p, sarrayp %d\n",
-		new,sensedir,donor,acceptor,sarrayp));
+  debug0(printf("Stage3end_new_splice %p with sensedir %d, donor substring %p and acceptor substring %p, donor_prob %f and acceptor_prob %f, sarrayp %d\n",
+		new,sensedir,donor,acceptor,donor_prob,acceptor_prob,sarrayp));
 
 #if 0
   assert(Substring_match_length_orig(donor) + Substring_match_length_orig(acceptor) + amb_length == querylength);
@@ -7926,8 +8137,17 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
 
   new->sarrayp = sarrayp;
   new->gmap_source = GMAP_NOT_APPLICABLE;
+  new->gmap_goodness = 0;
+  new->gmap_max_match_length = 0;
+  new->gmap_avg_splice_score = 0.0;
+  new->splice_score = donor_prob + acceptor_prob;
   new->improved_by_gmap_p = false;
 
+  new->gmap_nindelbreaks = 0;
+  new->gmap_cdna_direction = 0;
+  new->gmap_nintrons = 0;
+  new->gmap_nbadintrons = 0;
+
   if (donor == NULL) {
     new->hittype = SPLICE;
     new->genestrand = Substring_genestrand(acceptor);
@@ -7987,7 +8207,17 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
     new->hittype = DISTANT_SPLICE;
     new->sarrayp = sarrayp;
     new->gmap_source = GMAP_NOT_APPLICABLE;
+    new->gmap_goodness = 0;
+    new->gmap_max_match_length = 0;
+    new->gmap_avg_splice_score = 0.0;
+    new->splice_score = donor_prob + acceptor_prob;
     new->improved_by_gmap_p = false;
+
+    new->gmap_nindelbreaks = 0;
+    new->gmap_cdna_direction = 0;
+    new->gmap_nintrons = 0;
+    new->gmap_nbadintrons = 0;
+
     new->chrnum = 0;
     new->chroffset = 0;
     new->chrhigh = 0;
@@ -7997,7 +8227,17 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
   } else {
     new->sarrayp = sarrayp;
     new->gmap_source = GMAP_NOT_APPLICABLE;
+    new->gmap_goodness = 0;
+    new->gmap_max_match_length = 0;
+    new->gmap_avg_splice_score = 0.0;
+    new->splice_score = donor_prob + acceptor_prob;
     new->improved_by_gmap_p = false;
+
+    new->gmap_nindelbreaks = 0;
+    new->gmap_cdna_direction = 0;
+    new->gmap_nintrons = 0;
+    new->gmap_nbadintrons = 0;
+
     new->distant_splice_p = true;
     if (Substring_chrnum(donor) == Substring_chrnum(acceptor)) {
       new->hittype = SAMECHR_SPLICE;
@@ -8018,7 +8258,11 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
     } else if (Substring_nmatches_posttrim(donor) < 15 || 
 	       Substring_nmatches_posttrim(acceptor) < 15) {
       /* Not enough evidence to find each end of the translocation */
+#ifdef DEBUG0
+      new->hittype = SPLICE;
+#endif
       new->substrings_LtoH = (List_T) NULL;
+      new->substrings_HtoL = (List_T) NULL;
       new->substrings_1toN = (List_T) NULL;
       new->substrings_Nto1 = (List_T) NULL;
       new->junctions_LtoH = (List_T) NULL;
@@ -8173,10 +8417,17 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
       new->sensedir = SENSE_FORWARD;
       new->substrings_LtoH = List_push(NULL,(void *) acceptor);
       new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
+
+      new->substrings_HtoL = List_push(NULL,(void *) donor);
+      new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) acceptor);
+
     } else {
       new->sensedir = SENSE_ANTI;
       new->substrings_LtoH = List_push(NULL,(void *) donor);
       new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
+
+      new->substrings_HtoL = List_push(NULL,(void *) acceptor);
+      new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) donor);
     }
     debug0(printf("sensedir %d\n",new->sensedir));
 
@@ -8207,6 +8458,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
     new->other_chrnum = 0;
 
     new->substrings_LtoH = (List_T) NULL;
+    new->substrings_HtoL = (List_T) NULL;
     new->junctions_LtoH = (List_T) NULL;
     new->sensedir = sensedir;
 
@@ -8219,12 +8471,18 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
 	new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction);
 	new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
 	  
+	new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) donor);
+	new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) acceptor);
+
       } else {
 	/* Order is acceptor, donor.  Same as substring2, substring1, as expected */
 	new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
 	junction = Junction_new_splice(distance,sensedir,donor_prob,acceptor_prob);
 	new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction);
 	new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
+
+	new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) acceptor);
+	new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) donor);
       }
 
     } else {
@@ -8235,12 +8493,19 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
 	junction = Junction_new_splice(distance,sensedir,donor_prob,acceptor_prob);
 	new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction);
 	new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
+
+	new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) acceptor);
+	new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) donor);
+
       } else {
 	/* Order is donor, acceptor.  Same as substring2, substring1, as expected */
 	new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) acceptor);
 	junction = Junction_new_splice(distance,sensedir,donor_prob,acceptor_prob);
 	new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction);
 	new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) donor);
+
+	new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) donor);
+	new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) acceptor);
       }
     }
 
@@ -8338,7 +8603,7 @@ Stage3end_new_splice (int *found_score, int nmismatches_donor, int nmismatches_a
     return (T) NULL;
 
   } else {
-    if (new->circularalias >= 0) {
+    if (new->circularpos >= 0) {
       new->altlocp = false;
     } else if ((new->altlocp = altlocp[new->chrnum]) == false) {
     } else {
@@ -8388,8 +8653,17 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
 
   new->sarrayp = sarrayp;
   new->gmap_source = GMAP_NOT_APPLICABLE;
+  new->gmap_goodness = 0;
+  new->gmap_max_match_length = 0;
+  new->gmap_avg_splice_score = 0.0;
+  new->splice_score = 0.0;
   new->improved_by_gmap_p = false;
 
+  new->gmap_nindelbreaks = 0;
+  new->gmap_cdna_direction = 0;
+  new->gmap_nintrons = 0;
+  new->gmap_nbadintrons = 0;
+
 #if 0
   if (donor == NULL && acceptor == NULL) {
     new->hittype = ONE_THIRD_SHORTEXON;
@@ -8573,6 +8847,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
   /* substring_for_concordance = (Substring_T) NULL; */
 
   new->substrings_LtoH = (List_T) NULL;
+  new->substrings_HtoL = (List_T) NULL;
   new->junctions_LtoH = (List_T) NULL;
   if (new->plusp == true) {
     if (substring2 != NULL) {
@@ -8583,6 +8858,11 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
     if (substring0 != NULL) {
       new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) substring0);
       new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction0);
+      new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) substring0);
+    }
+    new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) substring1);
+    if (substring2 != NULL) {
+      new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) substring2);
     }
 
   } else {
@@ -8594,6 +8874,11 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
     if (substring2 != NULL) {
       new->substrings_LtoH = List_push(new->substrings_LtoH,(void *) substring2);
       new->junctions_LtoH = List_push(new->junctions_LtoH,(void *) junction2);
+      new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) substring2);
+    }
+    new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) substring1);
+    if (substring0 != NULL) {
+      new->substrings_HtoL = List_push(new->substrings_HtoL,(void *) substring0);
     }
   }
 
@@ -8680,7 +8965,7 @@ Stage3end_new_shortexon (int *found_score, Substring_T donor, Substring_T accept
     Stage3end_free(&new);
     return (T) NULL;
 
-  } else if (new->circularalias >= 0) {
+  } else if (new->circularpos >= 0) {
     new->altlocp = false;
     return new;
     
@@ -8734,7 +9019,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
     } else if (genomicstart < chroffset) {
       outofbounds_start = chroffset - genomicstart;
       outofbounds_end = genomicend - chroffset;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("21. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_start = 0;
@@ -8747,7 +9032,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
     } else if (genomicend > chrhigh) {
       outofbounds_start = chrhigh - genomicstart;
       outofbounds_end = genomicend - chrhigh;
-      debug0(printf("Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("22. Out of bounds left (low) %d, out of bounds right (high) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_start > outofbounds_end) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_start = 0;
@@ -8774,7 +9059,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
     } else if (genomicend < chroffset) {
       outofbounds_end = chroffset - genomicend;
       outofbounds_start = genomicstart - chroffset;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("23. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_end = 0;
@@ -8787,7 +9072,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
     } else if (genomicstart > chrhigh) {
       outofbounds_end = chrhigh - genomicend;
       outofbounds_start = genomicstart - chrhigh;
-      debug0(printf("Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
+      debug0(printf("24. Out of bounds left (high) %d, out of bounds right (low) %d\n",outofbounds_start,outofbounds_end));
       if (outofbounds_end > outofbounds_start) {
 	/* Consider high part to be out of bounds and keep existing chromosome */
 	outofbounds_end = 0;
@@ -9049,6 +9334,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
 		(unsigned long long) chrhigh,chrnum,querystart,queryend));
 
   new->substrings_LtoH = List_push(NULL,(void *) substring);
+  new->substrings_HtoL = List_push(NULL,(void *) substring);
   new->substrings_1toN = List_push(NULL,(void *) substring);
   new->substrings_Nto1 = List_push(NULL,(void *) substring);
 
@@ -9080,8 +9366,17 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
   new->genestrand = genestrand;
   new->sarrayp = sarrayp;
   new->gmap_source = GMAP_NOT_APPLICABLE;
+  new->gmap_goodness = 0;
+  new->gmap_max_match_length = 0;
+  new->gmap_avg_splice_score = 0.0;
+  new->splice_score = 0.0;
   new->improved_by_gmap_p = false;
 
+  new->gmap_nindelbreaks = 0;
+  new->gmap_cdna_direction = 0;
+  new->gmap_nintrons = 0;
+  new->gmap_nbadintrons = 0;
+
   new->distant_splice_p = false;
   new->chrnum = new->effective_chrnum = chrnum;
   new->other_chrnum = 0;
@@ -9142,7 +9437,7 @@ Stage3end_new_terminal (int querystart, int queryend, Univcoord_T left, Compress
     Stage3end_free(&new);
     return (T) NULL;
 
-  } else if (new->circularalias >= 0) {
+  } else if (new->circularpos >= 0) {
     new->altlocp = false;
     return new;
     
@@ -9159,7 +9454,7 @@ T
 Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_length,
 		    int ambig_end_length_5, int ambig_end_length_3,
 		    Splicetype_T ambig_splicetype_5, Splicetype_T ambig_splicetype_3,
-		    double min_splice_prob,
+		    double avg_splice_score, int goodness,
 		    struct Pair_T *pairarray, int npairs, int nsegments, int nintrons, int nindelbreaks,
 		    Univcoord_T left, int genomiclength, bool plusp, int genestrand,
 		    char *accession, int querylength, Chrnum_T chrnum,
@@ -9182,6 +9477,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
   */
   /* However, this leads to fatal bugs later, so restored these statements */
 
+
   debug0(printf("Entered Stage3end_new_gmap with orig_sensedir %d\n",orig_sensedir));
   assert(orig_sensedir == SENSE_NULL || orig_sensedir == SENSE_ANTI || orig_sensedir == SENSE_FORWARD);
 
@@ -9192,9 +9488,9 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
 
   cigar_tokens = Pair_compute_cigar(&intronp,&hardclip_start,&hardclip_end,pairarray,npairs,querylength,
 				    /*watsonp*/plusp,orig_sensedir,/*chimera_part*/0);
-  if (Pair_tokens_cigarlength(cigar_tokens) + hardclip_start + hardclip_end != querylength) {
+  if (Pair_cigar_length(cigar_tokens) + hardclip_start + hardclip_end != querylength) {
     fprintf(stderr,"Could not compute a valid cigar for %s: %d + %d + %d != %d\n",
-	    accession,Pair_tokens_cigarlength(cigar_tokens),hardclip_start,hardclip_end,querylength);
+	    accession,Pair_cigar_length(cigar_tokens),hardclip_start,hardclip_end,querylength);
     Pair_dump_array_stderr(pairarray,npairs,/*zerobasedp*/true);
     Pair_tokens_free(&cigar_tokens);
 #ifdef CHECK_ASSERTIONS
@@ -9237,13 +9533,15 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
 
   new = (T) MALLOC_OUT(sizeof(*new));
 
-  debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, nmatches_posttrim %d, cdna_direction %d, orig_sensedir %d, max_match_length %d, gmap_source %d\n",
+  debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, nmatches_posttrim %d, cdna_direction %d, orig_sensedir %d, avg_splice_score %f, max_match_length %d, gmap_source %d\n",
 		new,(unsigned long long) left,(unsigned int) (genomicstart - chroffset),(unsigned int) (genomicend - chroffset),
-		(unsigned long long) chrhigh,chrnum,nmismatches_whole,nmatches_posttrim,cdna_direction,orig_sensedir,max_match_length,gmap_source));
+		(unsigned long long) chrhigh,chrnum,nmismatches_whole,nmatches_posttrim,cdna_direction,orig_sensedir,avg_splice_score,max_match_length,gmap_source));
   debug0(printf("  ambig_end_length_5 %d, ambig_end_length_3 %d\n",ambig_end_length_5,ambig_end_length_3));
   debug0(Pair_dump_comp_array(pairarray,npairs));
 
+
   new->substrings_LtoH = (List_T) NULL;
+  new->substrings_HtoL = (List_T) NULL;
   new->substrings_1toN = (List_T) NULL;
   new->substrings_Nto1 = (List_T) NULL;
 
@@ -9291,6 +9589,16 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
   new->gmap_nintrons = nintrons;
 
 #if 0
+  /* Not sure if this nindelbreaks compares with the parameter value */
+  Pair_nmismatches_region(&nindelbreaks,&new->gmap_nbadintrons,pairarray,npairs,
+			  /*trim_left*/0,/*trim_right*/0,/*start_amb_length*/0,/*end_amb_length*/0,
+			  querylength);
+#else
+  new->gmap_nbadintrons = 0;	/* Filled in during Stage3pair_optimal_score */
+#endif
+
+
+#if 0
   new->mapq_loglik = Substring_mapq_loglik(substring);
   new->mapq_score = 0;
   new->absmq_score = 0;
@@ -9351,9 +9659,10 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
     return (T) NULL;
   }
 
+  new->gmap_goodness = goodness;
   new->gmap_max_match_length = max_match_length;
-  new->gmap_min_splice_prob = min_splice_prob;
-
+  new->gmap_avg_splice_score = avg_splice_score;
+  new->splice_score = 0.0;
 
   new->trim_left = Pair_querypos(&(pairarray[0])) /*- ambig_end_length_5*/;  /* Do not subtract ambig_end_length, so we are equivalent with substrings */
   if (ambig_end_length_5 > 0) {
@@ -9445,10 +9754,14 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
     new->sensedir = orig_sensedir;
     debug0(printf("sensedir is %d (original), because known, plusp %d\n",orig_sensedir,plusp));
 
+  } else if (novelsplicingp == false) {
+    /* Could be transcript splicing */
+    new->sensedir = SENSE_NULL;
+
   } else if ((prob5_sense_forward > 0.90 && prob5_sense_forward > prob5_sense_anti + 0.10) ||
 	     (prob3_sense_forward > 0.90 && prob3_sense_forward > prob3_sense_anti + 0.10)) {
     debug0(printf("sensedir is %d, based on probabilities: %f %f fwd, %f %f anti, plusp %d\n",
-		  SENSE_FORWARD,prob5_sense_forward,prob3_sense_forward,prob5_sense_anti,prob3_sense_anti, plusp));
+		  SENSE_FORWARD,prob5_sense_forward,prob3_sense_forward,prob5_sense_anti,prob3_sense_anti,plusp));
     new->sensedir = SENSE_FORWARD;
 
   } else if ((prob5_sense_anti > 0.90 && prob5_sense_anti > prob5_sense_forward + 0.10) ||
@@ -9526,11 +9839,11 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_
     /* Stage3end_free(&new); -- Cannot use, because it frees pairarray */
     Pair_tokens_free(&new->cigar_tokens);
     /* No substrings or junctions */
-    FREE(new);
+    FREE_OUT(new);
     debug0(printf("Returning NULL\n"));
     return (T) NULL;
 
-  } else if (new->circularalias >= 0) {
+  } else if (new->circularpos >= 0) {
     debug0(printf("Returning GMAP %p with trims %d and %d (splicep %d and %d)\n",new,new->trim_left,new->trim_right,new->trim_left_splicep,new->trim_right_splicep));
     new->altlocp = false;
     return new;
@@ -10106,7 +10419,7 @@ Stage3end_eval_and_sort (int npaths, int *first_absmq, int *second_absmq,
 
 
 static int
-insertlength_expected (int insertlength) {
+insertlength_expected (Chrpos_T insertlength) {
   if (insertlength < expected_pairlength_low) {
     return -1;
   } else if (insertlength > expected_pairlength_very_high) {
@@ -10532,6 +10845,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
       }
     }
   }
+  debug4(printf("MINSCORE: %d\n",minscore));
 
   debug4(printf("Stage3end_optimal_score over %d hits: minscore = %d + subopt:%d\n",
 		n,minscore,subopt_levels));
@@ -10607,6 +10921,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
 
 
   /* Filter on nsegments */
+  /* TODO: Consider filtering on gmap_nbadintrons instead, as we do for Stage3pair_optimal_score */
   if (finalp == true && optimal != NULL) {
     hitlist = optimal;
     optimal = (List_T) NULL;
@@ -10628,7 +10943,7 @@ Stage3end_optimal_score_aux (bool *eliminatedp, List_T hitlist,
 	*eliminatedp = true;
 	Stage3end_free(&hit);
       } else {
-	debug4(printf("Keeping a hit with nsegments %d\n",hit->nsegments));
+	debug4(printf("Keeping a hit with nsegments %d, nindels %d\n",hit->nsegments,hit->nindels));
 	optimal = List_push(optimal,hit);
       }
     }
@@ -10668,12 +10983,13 @@ unalias_circular (T hit) {
   List_T p;
   Substring_T substring;
 
-  debug12(printf("Calling unalias_circular\n"));
   assert(hit->circularalias == +1);
   if (hit->hittype == GMAP) {
+    debug12(printf("Calling unalias_circular on GMAP\n"));
     Pair_unalias_circular(hit->pairarray,hit->npairs,chrlength);
 
   } else {
+    debug12(printf("Calling unalias_circular on substrings\n"));
     for (p = hit->substrings_1toN; p != NULL; p = List_next(p)) {
       substring = (Substring_T) List_head(p);
       Substring_unalias_circular(substring);
@@ -11598,6 +11914,18 @@ hit_equiv_cmp (Stage3end_T x, Stage3end_T y) {
     return -1;
 #endif
 
+  } else if (x->gmap_avg_splice_score > y->gmap_avg_splice_score) {
+    return -1;
+
+  } else if (y->gmap_avg_splice_score > x->gmap_avg_splice_score) {
+    return +1;
+
+  } else if (x->splice_score > y->splice_score) {
+    return -1;
+
+  } else if (y->splice_score > x->splice_score) {
+    return +1;
+
   } else {
     return 0;
   }
@@ -11734,6 +12062,38 @@ hit_goodness_cmp (bool *equalp, Stage3end_T hit,
     debug7(printf("  => indistinguishable\n"));
     return 0;
 
+  } else if (hit->hittype == GMAP && best_hit->hittype == GMAP) {
+    prob1 = hit->gmap_avg_splice_score;
+    prob2 = best_hit->gmap_avg_splice_score;
+
+    if (prob1 < prob2) {
+      debug7(printf("  => %d loses by GMAP splice prob %f vs %f\n",k,prob1,prob2));
+      return -1;
+    } else if (prob1 > prob2) {
+      debug7(printf("  => %d wins by GMAP splice prob %f vs %f\n",k,prob1,prob2));
+      return +1;
+    } else {
+      debug7(printf("  => equal\n"));
+      *equalp = true;
+      return 0;
+    }
+
+  } else if (hit->hittype == TRANSLOC_SPLICE && best_hit->hittype == TRANSLOC_SPLICE) {
+    prob1 = hit->splice_score;
+    prob2 = best_hit->splice_score;
+
+    if (prob1 < prob2) {
+      debug7(printf("  => %d loses by TRANSLOC_SPLICE splice prob %f vs %f\n",k,prob1,prob2));
+      return -1;
+    } else if (prob1 > prob2) {
+      debug7(printf("  => %d wins by TRANSLOC_SPLICE splice prob %f vs %f\n",k,prob1,prob2));
+      return +1;
+    } else {
+      debug7(printf("  => equal\n"));
+      *equalp = true;
+      return 0;
+    }
+
   } else {
     prob1 = Stage3end_prob(hit);
     prob2 = Stage3end_prob(best_hit);
@@ -12803,30 +13163,36 @@ Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
       Filestring_set_split_output(fp,OUTPUT_CU);
     }
 
-    if (print_m8_p == false) {
-      print_query_header(fp,initchar,queryseq,invertp);
-      FPRINTF(fp,"\t1 %s",CONCORDANT_TEXT);
+    if (omit_concordant_uniq_p == true && stage3pair->circularp == false) {
+      /* Skip printing */
+      Filestring_set_split_output(fp,OUTPUT_NONE);
+
+    } else {
+      if (print_m8_p == false) {
+	print_query_header(fp,initchar,queryseq,invertp);
+	FPRINTF(fp,"\t1 %s",CONCORDANT_TEXT);
     
-      print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
+	print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
 
-      FPRINTF(fp,"\t");
-      Shortread_print_header(fp,headerseq1,headerseq2);
-    }
+	FPRINTF(fp,"\t");
+	Shortread_print_header(fp,headerseq1,headerseq2);
+      }
     
-    if (firstp == true) {
-      Stage3end_print(fp,hit5,hit5->score,
-		      chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/1",
-		      invertp,hit5,hit3,stage3pair->insertlength,
-		      stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
-    } else {
-      Stage3end_print(fp,hit3,hit3->score,
-		      chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/2",
-		      invertp,hit5,hit3,stage3pair->insertlength,
-		      stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
-    }
+      if (firstp == true) {
+	Stage3end_print(fp,hit5,hit5->score,
+			chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/1",
+			invertp,hit5,hit3,stage3pair->insertlength,
+			stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
+      } else {
+	Stage3end_print(fp,hit3,hit3->score,
+			chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/2",
+			invertp,hit5,hit3,stage3pair->insertlength,
+			stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
+      }
 
-    if (print_m8_p == false) {
-      FPRINTF(fp,"\n");
+      if (print_m8_p == false) {
+	FPRINTF(fp,"\n");
+      }
     }
 
   } else if (resulttype == CONCORDANT_TRANSLOC) {
@@ -12892,12 +13258,16 @@ Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
   } else if (resulttype == CONCORDANT_MULT) {
     stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
 
-    if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths) {
+    if (omit_concordant_mult_p == true) {
+      /* Skip printing */
+      Filestring_set_split_output(fp,OUTPUT_NONE);
+
+    } else if (quiet_if_excessive_p && npaths_primary + npaths_altloc > maxpaths) {
       Filestring_set_split_output(fp,OUTPUT_CX);
       if (print_m8_p == false) {
 	print_query_header(fp,initchar,queryseq,invertp);
 	FPRINTF(fp,"\t%d %s",npaths_primary + npaths_altloc,CONCORDANT_TEXT);
-
+	
 	print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
 
 	FPRINTF(fp,"\t");
@@ -12905,7 +13275,7 @@ Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
 
 	/* No further output */
 	FPRINTF(fp,"\n");
-
+	
 	if (failedinput_root != NULL) {
 	  Shortread_print_query_singleend(fp_failedinput,queryseq,headerseq1);
 	}
@@ -12918,7 +13288,7 @@ Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
 	FPRINTF(fp,"\t%d %s",npaths_primary + npaths_altloc,CONCORDANT_TEXT);
 	
 	print_barcode_and_quality(fp,queryseq,invertp,quality_shift);
-
+	
 	FPRINTF(fp,"\t");
 	Shortread_print_header(fp,headerseq1,headerseq2);
       }
@@ -12927,7 +13297,7 @@ Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
 	stage3pair = stage3pairarray[pathnum-1];
 	hit5 = stage3pair->hit5;
 	hit3 = stage3pair->hit3;
-
+	
 	if (firstp == true) {
 	  Stage3end_print(fp,hit5,hit5->score,
 			  chromosome_iit,queryseq,headerseq1,/*acc_suffix*/"/1",
@@ -12940,13 +13310,12 @@ Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
 			  stage3pair->score,stage3pair->pairtype,stage3pair->mapq_score);
 	}
       }
-
+      
       if (print_m8_p == false) {
 	FPRINTF(fp,"\n");
       }
     }
 
-
   } else if (resulttype == PAIRED_UNIQ) {
     stage3pairarray = (Stage3pair_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result);
     stage3pair = stage3pairarray[0];
@@ -13194,11 +13563,81 @@ strip_gaps_at_tail (List_T pairs) {
 }
 
 
+/* Used for resolve_inside_general_splice_plus and resolve_inside_general_splice_minus */
+static List_T
+Stage3end_convert_to_pairs (List_T pairs, T hit, char *queryuc_ptr, int querylength,
+			    Compress_T query_compress_fwd, Compress_T query_compress_rev,
+			    Chrpos_T chrlength, Pairpool_T pairpool) {
+  List_T p, q;
+  /* Chrpos_T genomicpos1, genomicpos2; */
+  Substring_T substring, prev_substring;
+  Junction_T junction;
+  Junctiontype_T type;
+  char *deletion_string;
+
+  if (hit->hittype == TRANSLOC_SPLICE) {
+    /* Cannot handle translocations within a single GMAP alignment */
+    abort();
+    return NULL;
+    
+  } else if (hit->hittype == GMAP) {
+    debug9(printf("Converting gmap to pairs\n"));
+    /* Use querylength here, but adj elsewhere */
+    return Pair_convert_array_to_pairs(pairs,hit->pairarray,hit->npairs,hit->plusp,
+				       chrlength,pairpool);
+  } else {
+    p = hit->substrings_1toN;
+    prev_substring = (Substring_T) List_head(p);
+    debug9(printf("Converting substring\n"));
+    /* Normally done during Stage3pair_eval_and_sort */
+    Substring_display_prep(prev_substring,queryuc_ptr,querylength,/*extraleft*/0,/*extraright*/0,
+			   query_compress_fwd,query_compress_rev,genome);
+    pairs = Substring_convert_to_pairs(pairs,prev_substring,queryuc_ptr,chrlength,pairpool);
+
+    for (q = hit->junctions_1toN, p = List_next(p); p != NULL; q = List_next(q), p = List_next(p)) {
+      junction = (Junction_T) List_head(q);
+      substring = (Substring_T) List_head(p);
+    
+      if ((type = Junction_type(junction)) == INS_JUNCTION) {
+	debug9(printf("Converting insertion\n"));
+	pairs = Substring_add_insertion(pairs,prev_substring,substring,
+					/*insertionlength*/Junction_nindels(junction),queryuc_ptr,
+					pairpool);
+      } else if (type == DEL_JUNCTION) {
+	debug9(printf("Converting deletion\n"));
+	deletion_string = Junction_deletion_string(junction,genome,hit->plusp);
+	pairs = Substring_add_deletion(pairs,prev_substring,substring,
+				       deletion_string,/*deletionlength*/Junction_nindels(junction),
+				       pairpool);
+	FREE(deletion_string);
+      } else if (type == SPLICE_JUNCTION) {
+	/* Causes problems with bad comps.  Stage3_compute will insert gaps anyway */
+	debug9(printf("(Not converting splice)\n"));
+	/* pairs = Substring_add_intron(pairs,prev_substring,substring,pairpool); */
+	
+      } else {
+	abort();
+      }
+    
+      debug9(printf("Converting substring\n"));
+      /* Normally done during Stage3pair_eval_and_sort */
+      Substring_display_prep(substring,queryuc_ptr,querylength,/*extraleft*/0,/*extraright*/0,
+			     query_compress_fwd,query_compress_rev,genome);
+      pairs = Substring_convert_to_pairs(pairs,substring,queryuc_ptr,chrlength,pairpool);
+      prev_substring = substring;
+    }
+
+    debug9(Pair_dump_list(pairs,true));
+    return pairs;
+  }
+}
+
+
 /* Used only for --merge-overlap features, so obey hardclip and not querystart/queryend */
 /* If use querylength_adj, ss.bug.4 fails.  If use querylength, ss.bug.3 fails */
 static List_T
-Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
-			    int hardclip_low, int hardclip_high, int queryseq_offset) {
+Stage3end_convert_to_pairs_out (List_T pairs, T hit, Shortread_T queryseq,
+				int hardclip_low, int hardclip_high, int queryseq_offset) {
   List_T p, q;
   /* Chrpos_T genomicpos1, genomicpos2; */
   Substring_T substring, prev_substring;
@@ -13214,37 +13653,37 @@ Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq,
   } else if (hit->hittype == GMAP) {
     debug15(printf("Converting gmap to pairs\n"));
     /* Use querylength here, but adj elsewhere */
-    return Pair_convert_array_to_pairs(pairs,hit->pairarray,hit->npairs,hit->plusp,hit->querylength,
-				       hardclip_low,hardclip_high,queryseq_offset);
+    return Pair_convert_array_to_pairs_out(pairs,hit->pairarray,hit->npairs,hit->plusp,hit->querylength,
+					   hardclip_low,hardclip_high,queryseq_offset);
   } else {
     p = hit->substrings_1toN;
     prev_substring = (Substring_T) List_head(p);
-    pairs = Substring_convert_to_pairs(pairs,prev_substring,hit->querylength,
-				       queryseq,hardclip_low,hardclip_high,queryseq_offset);
+    pairs = Substring_convert_to_pairs_out(pairs,prev_substring,hit->querylength,
+					   queryseq,hardclip_low,hardclip_high,queryseq_offset);
 
     for (q = hit->junctions_1toN, p = List_next(p); p != NULL; q = List_next(q), p = List_next(p)) {
       junction = (Junction_T) List_head(q);
       substring = (Substring_T) List_head(p);
     
       if ((type = Junction_type(junction)) == INS_JUNCTION) {
-	pairs = Substring_add_insertion(pairs,prev_substring,substring,hit->querylength,
-					/*insertionlength*/Junction_nindels(junction),queryseq,
-					hardclip_low,hardclip_high,queryseq_offset);
+	pairs = Substring_add_insertion_out(pairs,prev_substring,substring,hit->querylength,
+					    /*insertionlength*/Junction_nindels(junction),queryseq,
+					    hardclip_low,hardclip_high,queryseq_offset);
       } else if (type == DEL_JUNCTION) {
 	deletion_string = Junction_deletion_string(junction,genome,hit->plusp);
-	pairs = Substring_add_deletion(pairs,prev_substring,substring,hit->querylength,
-				       deletion_string,/*deletionlength*/Junction_nindels(junction),
-				       hardclip_low,hardclip_high,queryseq_offset);
+	pairs = Substring_add_deletion_out(pairs,prev_substring,substring,hit->querylength,
+					   deletion_string,/*deletionlength*/Junction_nindels(junction),
+					   hardclip_low,hardclip_high,queryseq_offset);
       } else if (type == SPLICE_JUNCTION) {
-	pairs = Substring_add_intron(pairs,prev_substring,substring,hit->querylength,
-				     hardclip_low,hardclip_high,queryseq_offset);
+	pairs = Substring_add_intron_out(pairs,prev_substring,substring,hit->querylength,
+					 hardclip_low,hardclip_high,queryseq_offset);
 	
       } else {
 	abort();
       }
     
-      pairs = Substring_convert_to_pairs(pairs,substring,hit->querylength,
-					 queryseq,hardclip_low,hardclip_high,queryseq_offset);
+      pairs = Substring_convert_to_pairs_out(pairs,substring,hit->querylength,
+					     queryseq,hardclip_low,hardclip_high,queryseq_offset);
       prev_substring = substring;
     }
 
@@ -13279,11 +13718,11 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
 
   if (hit5->plusp == true) {
     if (clipdir > 0) {
-      pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0);
+      pairs5 = Stage3end_convert_to_pairs_out(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0);
       pairs5 = strip_gaps_at_head(pairs5);
 
-      pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,
-					  /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
+      pairs3 = Stage3end_convert_to_pairs_out(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,
+					      /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
       pairs3 = strip_gaps_at_tail(pairs3);
 
 #ifdef CHECK_ASSERTIONS
@@ -13317,11 +13756,11 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
       }
 
     } else if (clipdir < 0) {
-      pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0);
+      pairs3 = Stage3end_convert_to_pairs_out(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0);
       pairs3 = strip_gaps_at_head(pairs3);
 
-      pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,
-					  /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
+      pairs5 = Stage3end_convert_to_pairs_out(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,
+					      /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
       pairs5 = strip_gaps_at_tail(pairs5);
 
 #ifdef CHECK_ASSERTIONS
@@ -13362,11 +13801,11 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
 
   } else {
     if (clipdir > 0) {
-      pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0);
+      pairs3 = Stage3end_convert_to_pairs_out(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0);
       pairs3 = strip_gaps_at_head(pairs3);
 
-      pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,
-					  /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
+      pairs5 = Stage3end_convert_to_pairs_out(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,
+					      /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high);
       pairs5 = strip_gaps_at_tail(pairs5);
 
 #ifdef CHECK_ASSERTIONS
@@ -13400,11 +13839,11 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
       }
 
     } else if (clipdir < 0) {
-      pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0);
+      pairs5 = Stage3end_convert_to_pairs_out(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0);
       pairs5 = strip_gaps_at_head(pairs5);
 
-      pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,
-					  /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
+      pairs3 = Stage3end_convert_to_pairs_out(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,
+					      /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high);
       pairs3 = strip_gaps_at_tail(pairs3);
 
 #ifdef CHECK_ASSERTIONS
@@ -13458,35 +13897,833 @@ Stage3pair_merge (int *npairs, int *querylength_merged, char **queryseq_merged,
 }
 
 
+static bool
+resolve_inside_general_splice_plus (T *oldhit5, T *oldhit3, bool *private5p, bool *private3p,
+				    Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
+				    Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
+				    char *queryuc_ptr_5, char *queryuc_ptr_3, int querylength5, int querylength3,
+				    int genestrand, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+				    Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
+  bool changep = false;
+  T hit, hit5 = *oldhit5, hit3 = *oldhit3;
 
-#if 0
-List_T
-Stage3end_filter_bymatch (List_T hitlist) {
-  List_T filtered = NULL, p;
-  T hit;
-  int min_nmismatches_whole = 1000;
+#ifdef DEBUG9
+  List_T p;
+#endif
+  List_T stage2pairs, all_stage2_starts, all_stage2_ends;
+  int queryend, endlength;
+  Chrpos_T chrstart, chrend;
+  struct Pair_T *pairarray1, *pairarray2;
+  List_T pairs1, pairs2;
+
+  int cdna_direction, sensedir, sense_try;
+  int npairs1, goodness1, matches1, nmatches_posttrim_1,
+    max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
+    unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+    ncanonical1, nsemicanonical1, nnoncanonical1;
+  int npairs2, goodness2, matches2, nmatches_posttrim_2,
+    max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
+    unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+    ncanonical2, nsemicanonical2, nnoncanonical2;
+  double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1;
+  double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2;
+  Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+  Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
+  Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+
+  Univcoord_T start, end;
+  int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
+  GMAP_source_T gmap_source;
+
+
+  if (hit5->genomicstart > hit3->genomicstart || hit5->genomicend > hit3->genomicend) {
+    /* Scramble, which could occur with circular chromosomes */
+    debug9(printf("Scramble, possibly from a circular chromosome.  Not solving at this time\n"));
+
+  } else if (hit5->querylength - 1 - Stage3end_queryend(hit5) > 10 && Stage3end_querystart(hit3) > 10) {
+    /* Both insides need to be resolved.  Not solving at this time */
+    debug9(printf("Dual to be resolved on inside.  Not solving at this time\n"));
+
+  } else if (hit5->chrnum != 0 && (endlength = hit5->querylength - 1 - Stage3end_queryend(hit5)) > 10) {
+    chrend = hit3->genomicstart - hit5->chroffset; /* Use hit5->chroffset in case hit3 is a transloc */
+    chrstart = subtract_bounded(chrend,(Chrpos_T) expected_pairlength + pairlength_deviation + endlength,0);
+    if (chrstart < hit5->genomicend - hit5->chroffset) {
+      debug9(printf("Revising chrstart\n"));
+      chrstart = hit5->genomicend - hit5->chroffset;
+    }
+    queryend = Stage3end_queryend(hit5) + 1;
+    debug9(printf("For ends, chrstart %u, chrend %u\n",chrstart,chrend));
+    if (chrstart < chrend &&
+	(all_stage2_ends = Stage2_compute_ends(
+#ifdef PMAP
+					       &(queryaaseq_ptr[queryend]),&(queryaaseq_ptr[queryend]),
+					       /*querylength*/endlength,/*query_offset*/0*3,
+#else
+					       &(queryuc_ptr_5[queryend]),&(queryuc_ptr_5[queryend]),
+					       /*querylength*/endlength,/*query_offset*/queryend,
+#endif
+					       chrstart,chrend,hit5->chroffset,hit5->chrhigh,/*plusp*/true,genestrand,
+					    
+					       oligoindices_minor,pairpool,diagpool,cellpool,
+					       /*localp should be false*/true,/*skip_repetitive_p*/false,
+					       /*favor_right_p*/false,/*max_nalignments*/2,/*debug_graphic_p*/false)) != NULL) {
+
+      debug9(printf("Got %d ends\n",List_length(all_stage2_ends)));
+      debug9(printf("5' end to be resolved on inside\n"));
+#ifdef DEBUG9
+      for (p = all_stage2_ends; p != NULL; p = List_next(p)) {
+	Pair_dump_list(List_head(p),true);
+      }
+#endif
+      stage2pairs = Stage3end_convert_to_pairs(/*pairs*/NULL,hit5,queryuc_ptr_5,querylength5,
+					       query5_compress_fwd,query5_compress_rev,
+					       /*chrlength*/hit5->chrhigh - hit5->chroffset,pairpool);
+      debug9(Pair_dump_list(stage2pairs,true));
+
+      knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + hit5->chroffset;
+      stage2pairs = List_reverse(stage2pairs);
+      knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + hit5->chroffset;
+
+      if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) {
+	sense_try = +1;
+      } else if (sensedir == SENSE_ANTI) {
+	sense_try = -1;
+      } else {
+	sense_try = 0;
+      }
+
+      if (hit5->hittype == GMAP) {
+	gmap_source = hit5->gmap_source;
+      } else {
+	gmap_source = GMAP_VIA_SUBSTRINGS;
+      }
+
+      if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+				       &matches1,&nmatches_posttrim_1,&max_match_length_1,
+				       &ambig_end_length_5_1,&ambig_end_length_3_1,
+				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+				       &ambig_prob_5_1,&ambig_prob_3_1,
+				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
+				       
+				       &pairarray2,&pairs2,&npairs2,&goodness2,
+				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
+				       &ambig_end_length_5_2,&ambig_end_length_3_2,
+				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+				       &ambig_prob_5_2,&ambig_prob_3_2,
+				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
+
+				       stage2pairs,/*all_stage2_starts*/NULL,all_stage2_ends,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+				       cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+				       watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+				       /*queryseq_ptr*/queryuc_ptr_5,queryuc_ptr_5,querylength5,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+				       /*query_subseq_offset*/0,
+#else
+				       /*query_subseq_offset*/0,
+#endif
+				       hit5->chrnum,hit5->chroffset,hit5->chrhigh,
+				       knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
+				       /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+				       sense_try,/*sense_filter*/0,
+				       oligoindices_minor,diagpool,cellpool)) == NULL) {
+
+      } else if (pairarray2 != NULL) {
+	if (avg_splice_score_1 > avg_splice_score_2) {
+	  nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					   pairarray1,npairs1);
+	  start = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[0])),
+				   /*minusterm*/Pair_querypos(&(pairarray1[0])),hit5->chroffset);
+	  end = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+			    /*plusterm*/querylength5 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit5->chrhigh);
+
+	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+					ambig_end_length_5_1,ambig_end_length_3_1,
+					ambig_splicetype_5_1,ambig_splicetype_3_1,
+					avg_splice_score_1,goodness1,
+					pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+					/*left*/start,/*genomiclength*/end - start + 1,
+					/*plusp*/true,genestrand,
+					/*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength,
+					/*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
+					gmap_source)) == NULL) {
+	    FREE_OUT(pairarray1);
+	  } else {
+	    if (*private5p == true) {
+	      Stage3end_free(&(*oldhit5));
+	    }
+	    debug9(printf("5' resolved on inside\n"));
+	    *oldhit5 = hit;
+	    *private5p = true;
+	    changep = true;
+	  }
+	  FREE_OUT(pairarray2);
+
+	} else {
+	  nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					   pairarray2,npairs2);
+	  start = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray2[0])),
+				   /*minusterm*/Pair_querypos(&(pairarray2[0])),hit5->chroffset);
+	  end = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+			    /*plusterm*/querylength5 - 1 - Pair_querypos(&(pairarray2[npairs2-1])),hit5->chrhigh);
+
+	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+					ambig_end_length_5_2,ambig_end_length_3_2,
+					ambig_splicetype_5_2,ambig_splicetype_3_2,
+					avg_splice_score_2,goodness2,
+					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+					/*left*/start,/*genomiclength*/end - start + 1,
+					/*plusp*/true,genestrand,
+					/*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength,
+					/*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
+					gmap_source)) == NULL) {
+	    FREE_OUT(pairarray2);
+	  } else {
+	    if (*private5p == true) {
+	      Stage3end_free(&(*oldhit5));
+	    }
+	    debug9(printf("5' resolved on inside\n"));
+	    *oldhit5 = hit;
+	    *private5p = true;
+	    changep = true;
+	  }
+	  FREE_OUT(pairarray1);
+	}
+
+      } else {
+	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					 pairarray1,npairs1);
+	start = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[0])),
+				 /*minusterm*/Pair_querypos(&(pairarray1[0])),hit5->chroffset);
+	end = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+			  /*plusterm*/querylength5 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit5->chrhigh);
+
+	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+				      ambig_end_length_5_1,ambig_end_length_3_1,
+				      ambig_splicetype_5_1,ambig_splicetype_3_1,
+				      avg_splice_score_1,goodness1,
+				      pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+				      /*left*/start,/*genomiclength*/end - start + 1,
+				      /*plusp*/true,genestrand,
+				      /*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength,
+				      cdna_direction,sensedir,/*sensedir_knownp*/true,
+				      gmap_source)) == NULL) {
+	  FREE_OUT(pairarray1);
+	} else {
+	  if (*private5p == true) {
+	    Stage3end_free(&(*oldhit5));
+	  }
+	  debug9(printf("5' resolved on inside\n"));
+	  *oldhit5 = hit;
+	  *private5p = true;
+	  changep = true;
+	}
+      }
+
+      List_free(&all_stage2_ends);
+    }
+
+  } else if (hit3->chrnum != 0 && (endlength = Stage3end_querystart(hit3)) > 10) {
+    chrstart = hit5->genomicend - hit3->chroffset; /* Use hit3->chroffset in case hit5 is a transloc */
+    chrend = add_bounded(chrstart,(Chrpos_T) expected_pairlength + pairlength_deviation + endlength,hit5->chrhigh);
+    if (chrend > hit3->genomicstart - hit3->chroffset) {
+      debug9(printf("Revising chrend\n"));
+      chrend = hit3->genomicstart - hit3->chroffset;
+    }
+    debug9(printf("Resolve plus 3': For starts, chrstart %u, chrend %u\n",chrstart,chrend));
+    if (chrstart < chrend && 
+	(all_stage2_starts = Stage2_compute_starts(
+#ifdef PMAP
+						   &(queryaaseq_ptr[0]),&(queryaaseq_ptr[0]),
+						   /*querylength*/endlength,/*query_offset*/0*3,
+#else
+						   &(queryuc_ptr_3[0]),&(queryuc_ptr_3[0]),
+						   /*querylength*/endlength,/*query_offset*/0,
+#endif
+						   chrstart,chrend,hit3->chroffset,hit3->chrhigh,/*plusp*/true,genestrand,
+					    
+						   oligoindices_minor,pairpool,diagpool,cellpool,
+						   /*localp should be false*/true,/*skip_repetitive_p*/false,
+						   /*favor_right_p*/true,/*max_nalignments*/2,/*debug_graphic_p*/false)) != NULL) {
+
+      debug9(printf("Got %d starts\n",List_length(all_stage2_starts)));
+      debug9(printf("3' start to be resolved on inside\n"));
+#ifdef DEBUG9
+      for (p = all_stage2_starts; p != NULL; p = List_next(p)) {
+	Pair_dump_list(List_head(p),true);
+      }
+#endif
+      stage2pairs = Stage3end_convert_to_pairs(/*pairs*/NULL,hit3,queryuc_ptr_3,querylength3,
+					       query3_compress_fwd,query3_compress_rev,
+					       /*chrlength*/hit3->chrhigh - hit3->chroffset,pairpool);
+      debug9(Pair_dump_list(stage2pairs,true));
+
+      knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + hit3->chroffset;
+      stage2pairs = List_reverse(stage2pairs);
+      knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + hit3->chroffset;
+
+      if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) {
+	sense_try = +1;
+      } else if (sensedir == SENSE_ANTI) {
+	sense_try = -1;
+      } else {
+	sense_try = 0;
+      }
+
+      if (hit3->hittype == GMAP) {
+	gmap_source = hit3->gmap_source;
+      } else {
+	gmap_source = GMAP_VIA_SUBSTRINGS;
+      }
+
+      if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+				       &matches1,&nmatches_posttrim_1,&max_match_length_1,
+				       &ambig_end_length_5_1,&ambig_end_length_3_1,
+				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+				       &ambig_prob_5_1,&ambig_prob_3_1,
+				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
+
+				       &pairarray2,&pairs2,&npairs2,&goodness2,
+				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
+				       &ambig_end_length_5_2,&ambig_end_length_3_2,
+				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+				       &ambig_prob_5_2,&ambig_prob_3_2,
+				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
+
+				       stage2pairs,all_stage2_starts,/*all_stage2_ends*/NULL,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+				       cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+				       watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+				       /*queryseq_ptr*/queryuc_ptr_3,queryuc_ptr_3,querylength3,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+				       /*query_subseq_offset*/0,
+#else
+				       /*query_subseq_offset*/0,
+#endif
+				       hit3->chrnum,hit3->chroffset,hit3->chrhigh,
+				       knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand,
+				       /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+				       sense_try,/*sense_filter*/0,
+				       oligoindices_minor,diagpool,cellpool)) == NULL) {
+
+      } else if (pairarray2 != NULL) {
+	if (avg_splice_score_1 > avg_splice_score_2) {
+	  nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					   pairarray1,npairs1);
+	  start = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[0])),
+				   /*minusterm*/Pair_querypos(&(pairarray1[0])),hit3->chroffset);
+	  end = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+			    /*plusterm*/querylength3 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit3->chrhigh);
+
+	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+					ambig_end_length_5_1,ambig_end_length_3_1,
+					ambig_splicetype_5_1,ambig_splicetype_3_1,
+					avg_splice_score_1,goodness1,
+					pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+					/*left*/start,/*genomiclength*/end - start + 1,
+					/*plusp*/true,genestrand,
+					/*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength,
+					/*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
+					gmap_source)) == NULL) {
+	    FREE_OUT(pairarray1);
+	  } else {
+	    if (*private3p == true) {
+	      Stage3end_free(&(*oldhit3));
+	    }
+	    debug9(printf("3' resolved on inside\n"));
+	    *oldhit3 = hit;
+	    *private3p = true;
+	    changep = true;
+	  }
+	  FREE_OUT(pairarray2);
+
+	} else {
+	  nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					   pairarray2,npairs2);
+	  start = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray2[0])),
+				   /*minusterm*/Pair_querypos(&(pairarray2[0])),hit3->chroffset);
+	  end = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+			    /*plusterm*/querylength3 - 1 - Pair_querypos(&(pairarray2[npairs2-1])),hit3->chrhigh);
+
+	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+					ambig_end_length_5_2,ambig_end_length_3_2,
+					ambig_splicetype_5_2,ambig_splicetype_3_2,
+					avg_splice_score_2,goodness2,
+					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+					/*left*/start,/*genomiclength*/end - start + 1,
+					/*plusp*/true,genestrand,
+					/*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength,
+					/*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
+					gmap_source)) == NULL) {
+	    FREE_OUT(pairarray2);
+	  } else {
+	    if (*private3p == true) {
+	      Stage3end_free(&(*oldhit3));
+	    }
+	    debug9(printf("3' resolved on inside\n"));
+	    *oldhit3 = hit;
+	    *private3p = true;
+	    changep = true;
+	  }
+	  FREE_OUT(pairarray1);
+	}
+
+      } else {
+	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					 pairarray1,npairs1);
+	start = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[0])),
+				 /*minusterm*/Pair_querypos(&(pairarray1[0])),hit3->chroffset);
+	end = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+			  /*plusterm*/querylength3 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit3->chrhigh);
+
+	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+				      ambig_end_length_5_1,ambig_end_length_3_1,
+				      ambig_splicetype_5_1,ambig_splicetype_3_1,
+				      avg_splice_score_1,goodness1,
+				      pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+				      /*left*/start,/*genomiclength*/end - start + 1,
+				      /*plusp*/true,genestrand,
+				      /*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength,
+				      cdna_direction,sensedir,/*sensedir_knownp*/true,
+				      gmap_source)) == NULL) {
+	  FREE_OUT(pairarray1);
+	} else {
+	  if (*private3p == true) {
+	    Stage3end_free(&(*oldhit3));
+          }
+	  debug9(printf("3' resolved on inside\n"));
+	  *oldhit3 = hit;
+	  *private3p = true;
+	  changep = true;
+	}
+      }
+
+      List_free(&all_stage2_starts);
+    }
+  }
+
+  return changep;
+}
+
+static bool
+resolve_inside_general_splice_minus (T *oldhit5, T *oldhit3, bool *private5p, bool *private3p,
+				     Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
+				     Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
+				     char *queryuc_ptr_5, char *queryuc_ptr_3, int querylength5, int querylength3,
+				     int genestrand, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+				     Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
+  bool changep = false;
+  T hit, hit5 = *oldhit5, hit3 = *oldhit3;
+
+#ifdef DEBUG9
+  List_T p;
+#endif
+  List_T stage2pairs, all_stage2_starts, all_stage2_ends;
+  int queryend, endlength;
+  Chrpos_T chrstart, chrend;
+  struct Pair_T *pairarray1, *pairarray2;
+  List_T pairs1, pairs2;
+
+  int cdna_direction, sensedir, sense_try;
+  int npairs1, goodness1, matches1, nmatches_posttrim_1,
+    max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1,
+    unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1,
+    ncanonical1, nsemicanonical1, nnoncanonical1;
+  int npairs2, goodness2, matches2, nmatches_posttrim_2,
+    max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2,
+    unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2,
+    ncanonical2, nsemicanonical2, nnoncanonical2;
+  double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1;
+  double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2;
+  Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1;
+  Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2;
+  Univcoord_T knownsplice_limit_low, knownsplice_limit_high;
+
+  Univcoord_T start, end;
+  int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks;
+  GMAP_source_T gmap_source;
+
+
+  if (hit5->genomicstart < hit3->genomicstart || hit5->genomicend < hit3->genomicend) {
+    /* Scramble, which could occur with circular chromosomes */
+    debug9(printf("Scramble, possibly from a circular chromosome.  Not solving at this time\n"));
+
+  } else if (hit5->querylength - 1 - Stage3end_queryend(hit5) > 10 && Stage3end_querystart(hit3) > 10) {
+    /* Both insides need to be resolved.  Not solving at this time */
+    debug9(printf("Dual to be resolved on inside.  Not solving at this time\n"));
+
+  } else if (hit5->chrnum != 0 && (endlength = hit5->querylength - 1 - Stage3end_queryend(hit5)) > 10) {
+    chrstart = hit3->genomicstart - hit5->chroffset; /* Use hit5->chroffset in case hit3 is a transloc */
+    chrend = add_bounded(chrstart,(Chrpos_T) expected_pairlength + pairlength_deviation + endlength,hit3->chrhigh);
+    if (chrend > hit5->genomicend - hit5->chroffset) {
+      debug9(printf("Revising chrend\n"));
+      chrend = hit5->genomicend - hit5->chroffset;
+    }
+    queryend = Stage3end_queryend(hit5) + 1;
+    debug9(printf("For ends, chrstart %u, chrend %u\n",chrstart,chrend));
+    if (chrstart < chrend && 
+	(all_stage2_ends = Stage2_compute_ends(
+#ifdef PMAP
+					       &(queryaaseq_ptr[queryend]),&(queryaaseq_ptr[queryend]),
+					       /*querylength*/endlength,/*query_offset*/0*3,
+#else
+					       &(queryuc_ptr_5[queryend]),&(queryuc_ptr_5[queryend]),
+					       /*querylength*/endlength,/*query_offset*/queryend,
+#endif
+					       chrstart,chrend,hit5->chroffset,hit5->chrhigh,/*plusp*/false,genestrand,
+					    
+					       oligoindices_minor,pairpool,diagpool,cellpool,
+					       /*localp should be false*/true,/*skip_repetitive_p*/false,
+					       /*favor_right_p*/false,/*max_nalignments*/2,/*debug_graphic_p*/false)) != NULL) {
+
+      debug9(printf("Got %d ends\n",List_length(all_stage2_ends)));
+      debug9(printf("5' end to be resolved on inside\n"));
+#ifdef DEBUG9
+      for (p = all_stage2_ends; p != NULL; p = List_next(p)) {
+	Pair_dump_list(List_head(p),true);
+      }
+#endif
+      stage2pairs = Stage3end_convert_to_pairs(/*pairs*/NULL,hit5,queryuc_ptr_5,querylength5,
+					       query5_compress_fwd,query5_compress_rev,
+					       /*chrlength*/hit5->chrhigh - hit5->chroffset,pairpool);
+      debug9(Pair_dump_list(stage2pairs,true));
+
+      knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + hit5->chroffset;
+      stage2pairs = List_reverse(stage2pairs);
+      knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + hit5->chroffset;
+
+      if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) {
+	sense_try = +1;
+      } else if (sensedir == SENSE_ANTI) {
+	sense_try = -1;
+      } else {
+	sense_try = 0;
+      }
+
+      if (hit5->hittype == GMAP) {
+	gmap_source = hit5->gmap_source;
+      } else {
+	gmap_source = GMAP_VIA_SUBSTRINGS;
+      }
+
+      if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+				       &matches1,&nmatches_posttrim_1,&max_match_length_1,
+				       &ambig_end_length_5_1,&ambig_end_length_3_1,
+				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+				       &ambig_prob_5_1,&ambig_prob_3_1,
+				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
+				       
+				       &pairarray2,&pairs2,&npairs2,&goodness2,
+				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
+				       &ambig_end_length_5_2,&ambig_end_length_3_2,
+				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+				       &ambig_prob_5_2,&ambig_prob_3_2,
+				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
+
+				       stage2pairs,/*all_stage2_starts*/NULL,all_stage2_ends,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+				       cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+				       watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+				       /*queryseq_ptr*/queryuc_ptr_5,queryuc_ptr_5,querylength5,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+				       /*query_subseq_offset*/0,
+#else
+				       /*query_subseq_offset*/0,
+#endif
+				       hit5->chrnum,hit5->chroffset,hit5->chrhigh,
+				       knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
+				       /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+				       sense_try,/*sense_filter*/0,
+				       oligoindices_minor,diagpool,cellpool)) == NULL) {
+
+      } else if (pairarray2 != NULL) {
+	if (avg_splice_score_1 > avg_splice_score_2) {
+	  nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					   pairarray1,npairs1);
+	  start = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[0])),
+			      /*plusterm*/Pair_querypos(&(pairarray1[0])),hit5->chrhigh);
+	  end = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+				 /*minusterm*/querylength5 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit5->chroffset);
+
+	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+					ambig_end_length_5_1,ambig_end_length_3_1,
+					ambig_splicetype_5_1,ambig_splicetype_3_1,
+					avg_splice_score_1,goodness1,
+					pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+					/*left*/end,/*genomiclength*/start - end + 1,
+					/*plusp*/false,genestrand,
+					/*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength,
+					/*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
+					gmap_source)) == NULL) {
+	    FREE_OUT(pairarray1);
+	  } else {
+	    if (*private5p == true) {
+	      Stage3end_free(&(*oldhit5));
+	    }
+	    debug9(printf("5' resolved on inside\n"));
+	    *oldhit5 = hit;
+	    *private5p = true;
+	    changep = true;
+	  }
+	  FREE_OUT(pairarray2);
+
+	} else {
+	  nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					   pairarray2,npairs2);
+	  start = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray2[0])),
+			      /*plusterm*/Pair_querypos(&(pairarray2[0])),hit5->chrhigh);
+	  end = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+				 /*minusterm*/querylength5 - 1 - Pair_querypos(&(pairarray2[npairs2-1])),hit5->chroffset);
+
+	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+					ambig_end_length_5_2,ambig_end_length_3_2,
+					ambig_splicetype_5_2,ambig_splicetype_3_2,
+					avg_splice_score_2,goodness2,
+					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+					/*left*/end,/*genomiclength*/start - end + 1,
+					/*plusp*/false,genestrand,
+					/*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength,
+					/*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
+					gmap_source)) == NULL) {
+	    FREE_OUT(pairarray2);
+	  } else {
+	    if (*private5p == true) {
+	      Stage3end_free(&(*oldhit5));
+	    }
+	    debug9(printf("5' resolved on inside\n"));
+	    *oldhit5 = hit;
+	    *private5p = true;
+	    changep = true;
+	  }
+	  FREE_OUT(pairarray1);
+	}
+
+      } else {
+	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					 pairarray1,npairs1);
+	start = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[0])),
+			    /*plusterm*/Pair_querypos(&(pairarray1[0])),hit5->chrhigh);
+	end = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+			       /*minusterm*/querylength5 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit5->chroffset);
+
+	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+				      ambig_end_length_5_1,ambig_end_length_3_1,
+				      ambig_splicetype_5_1,ambig_splicetype_3_1,
+				      avg_splice_score_1,goodness1,
+				      pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+				      /*left*/end,/*genomiclength*/start - end + 1,
+				      /*plusp*/false,genestrand,
+				      /*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength,
+				      cdna_direction,sensedir,/*sensedir_knownp*/true,
+				      gmap_source)) == NULL) {
+	  FREE_OUT(pairarray1);
+	} else {
+	  if (*private5p == true) {
+	    Stage3end_free(&(*oldhit5));
+	  }
+	  debug9(printf("5' resolved on inside\n"));
+	  *oldhit5 = hit;
+	  *private5p = true;
+	  changep = true;
+	}
+      }
+
+      List_free(&all_stage2_ends);
+    }
+
+  } else if (hit3->chrnum != 0 && (endlength = Stage3end_querystart(hit3)) > 10) {
+    chrend = hit5->genomicend - hit3->chroffset; /* Use hit3->chroffset in case hit5 is a transloc */
+    chrstart = subtract_bounded(chrend,(Chrpos_T) expected_pairlength + pairlength_deviation + endlength,0);
+    if (chrstart < hit3->genomicstart - hit3->chroffset) {
+      debug9(printf("Revising chrstart\n"));
+      chrstart = hit3->genomicstart - hit3->chroffset;
+    }
+    debug9(printf("For starts, chrstart %u, chrend %u\n",chrstart,chrend));
+    if (chrstart < chrend && 
+	(all_stage2_starts = Stage2_compute_starts(
+#ifdef PMAP
+						   &(queryaaseq_ptr[0]),&(queryaaseq_ptr[0]),
+						   /*querylength*/endlength,/*query_offset*/0*3,
+#else
+						   &(queryuc_ptr_3[0]),&(queryuc_ptr_3[0]),
+						   /*querylength*/endlength,/*query_offset*/0,
+#endif
+						   chrstart,chrend,hit3->chroffset,hit3->chrhigh,/*plusp*/false,genestrand,
+					    
+						   oligoindices_minor,pairpool,diagpool,cellpool,
+						   /*localp should be false*/true,/*skip_repetitive_p*/false,
+						   /*favor_right_p*/true,/*max_nalignments*/2,/*debug_graphic_p*/false)) != NULL) {
+
+      debug9(printf("Got %d starts\n",List_length(all_stage2_starts)));
+      debug9(printf("3' start to be resolved on inside\n"));
+#ifdef DEBUG9
+      for (p = all_stage2_starts; p != NULL; p = List_next(p)) {
+	Pair_dump_list(List_head(p),true);
+      }
+#endif
+      stage2pairs = Stage3end_convert_to_pairs(/*pairs*/NULL,hit3,queryuc_ptr_3,querylength3,
+					       query3_compress_fwd,query3_compress_rev,
+					       /*chrlength*/hit3->chrhigh - hit3->chroffset,pairpool);
+      debug9(Pair_dump_list(stage2pairs,true));
+
+      knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + hit3->chroffset;
+      stage2pairs = List_reverse(stage2pairs);
+      knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + hit3->chroffset;
+
+      if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) {
+	sense_try = +1;
+      } else if (sensedir == SENSE_ANTI) {
+	sense_try = -1;
+      } else {
+	sense_try = 0;
+      }
+
+      if (hit3->hittype == GMAP) {
+	gmap_source = hit3->gmap_source;
+      } else {
+	gmap_source = GMAP_VIA_SUBSTRINGS;
+      }
+
+      if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1,
+				       &matches1,&nmatches_posttrim_1,&max_match_length_1,
+				       &ambig_end_length_5_1,&ambig_end_length_3_1,
+				       &ambig_splicetype_5_1,&ambig_splicetype_3_1,
+				       &ambig_prob_5_1,&ambig_prob_3_1,
+				       &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1,
+				       &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1,
+
+				       &pairarray2,&pairs2,&npairs2,&goodness2,
+				       &matches2,&nmatches_posttrim_2,&max_match_length_2,
+				       &ambig_end_length_5_2,&ambig_end_length_3_2,
+				       &ambig_splicetype_5_2,&ambig_splicetype_3_2,
+				       &ambig_prob_5_2,&ambig_prob_3_2,
+				       &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2,
+				       &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2,
+
+				       stage2pairs,all_stage2_starts,/*all_stage2_ends*/NULL,
+#ifdef END_KNOWNSPLICING_SHORTCUT
+				       cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc,
+				       watsonp ? query_compress_fwd : query_compress_rev,
+#endif
+				       /*queryseq_ptr*/queryuc_ptr_3,queryuc_ptr_3,querylength3,/*skiplength*/0,
+#ifdef EXTRACT_GENOMICSEG
+				       /*query_subseq_offset*/0,
+#else
+				       /*query_subseq_offset*/0,
+#endif
+				       hit3->chrnum,hit3->chroffset,hit3->chrhigh,
+				       knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand,
+				       /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR,
+				       sense_try,/*sense_filter*/0,
+				       oligoindices_minor,diagpool,cellpool)) == NULL) {
+
+      } else if (pairarray2 != NULL) {
+	if (avg_splice_score_1 > avg_splice_score_2) {
+	  nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					   pairarray1,npairs1);
+	  start = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[0])),
+			      /*plusterm*/Pair_querypos(&(pairarray1[0])),hit3->chrhigh);
+	  end = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+				 /*minusterm*/querylength3 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit3->chroffset);
+
+	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+					ambig_end_length_5_1,ambig_end_length_3_1,
+					ambig_splicetype_5_1,ambig_splicetype_3_1,
+					avg_splice_score_1,goodness1,
+					pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+					/*left*/end,/*genomiclength*/start - end + 1,
+					/*plusp*/false,genestrand,
+					/*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength,
+					/*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false,
+					gmap_source)) == NULL) {
+	    FREE_OUT(pairarray1);
+	  } else {
+	    if (*private3p == true) {
+	      Stage3end_free(&(*oldhit3));
+	    }
+	    debug9(printf("3' resolved on inside\n"));
+	    *oldhit3 = hit;
+	    *private3p = true;
+	    changep = true;
+	  }
+	  FREE_OUT(pairarray2);
+
+	} else {
+	  nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					   pairarray2,npairs2);
+	  start = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray2[0])),
+			      /*plusterm*/Pair_querypos(&(pairarray2[0])),hit3->chrhigh);
+	  end = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])),
+				 /*minusterm*/querylength3 - 1 - Pair_querypos(&(pairarray2[npairs2-1])),hit3->chroffset);
+
+	  if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2,
+					ambig_end_length_5_2,ambig_end_length_3_2,
+					ambig_splicetype_5_2,ambig_splicetype_3_2,
+					avg_splice_score_2,goodness2,
+					pairarray2,npairs2,nsegments,nintrons,nindelbreaks,
+					/*left*/end,/*genomiclength*/start - end + 1,
+					/*plusp*/false,genestrand,
+					/*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength,
+					/*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false,
+					gmap_source)) == NULL) {
+	    FREE_OUT(pairarray2);
+	  } else {
+	    if (*private3p == true) {
+	      Stage3end_free(&(*oldhit3));
+	    }
+	    debug9(printf("3' resolved on inside\n"));
+	    *oldhit3 = hit;
+	    *private3p = true;
+	    changep = true;
+	  }
+	  FREE_OUT(pairarray1);
+	}
 
-  for (p = hitlist; p != NULL; p = p->rest) {
-    hit = (T) p->first;
-    if (hit->nmismatches_whole < min_nmismatches_whole) {
-      min_nmismatches_whole = hit->nmismatches_whole;
-    }
-  }
+      } else {
+	nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks,
+					 pairarray1,npairs1);
+	start = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[0])),
+			    /*plusterm*/Pair_querypos(&(pairarray1[0])),hit3->chrhigh);
+	end = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])),
+			       /*minusterm*/querylength3 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit3->chroffset);
+
+	if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1,
+				      ambig_end_length_5_1,ambig_end_length_3_1,
+				      ambig_splicetype_5_1,ambig_splicetype_3_1,
+				      avg_splice_score_1,goodness1,
+				      pairarray1,npairs1,nsegments,nintrons,nindelbreaks,
+				      /*left*/end,/*genomiclength*/start - end + 1,
+				      /*plusp*/false,genestrand,
+				      /*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength,
+				      cdna_direction,sensedir,/*sensedir_knownp*/true,
+				      gmap_source)) == NULL) {
+	  FREE_OUT(pairarray1);
+	} else {
+	  if (*private3p == true) {
+	    Stage3end_free(&(*oldhit3));
+	  }
+	  debug9(printf("3' resolved on inside\n"));
+	  *oldhit3 = hit;
+	  *private3p = true;
+	  changep = true;
+	}
+      }
 
-  for (p = hitlist; p != NULL; p = p->rest) {
-    hit = (T) p->first;
-    if (hit->nmismatches_whole == min_nmismatches_whole) {
-      filtered = List_push(filtered,hit);
-    } else {
-      Stage3end_free(&hit);
+      List_free(&all_stage2_starts);
     }
   }
-  List_free(&hitlist);
 
-  return filtered;
+  return changep;
 }
-#endif
-
 
 
 static Chrpos_T
@@ -13569,22 +14806,317 @@ overlap3_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend
   Substring_T substring;
   List_T p;
 
-  debug10(printf("Entered overlap3_gmap_minus with gmap %d..%d\n",
-		 gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos));
-  for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) {
-    substring = (Substring_T) List_head(p);
-    if (Substring_ambiguous_p(substring) == false) {
-      *genomicstart = Substring_alignstart_chr(substring);
-      *genomicend = Substring_alignend_chr(substring);
-      if ((chrpos = Pair_binary_search_descending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
-						  *genomicstart,*genomicend)) > 0) {
-	return chrpos;
-      }
+  debug10(printf("Entered overlap3_gmap_minus with gmap %d..%d\n",
+		 gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos));
+  for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) {
+    substring = (Substring_T) List_head(p);
+    if (Substring_ambiguous_p(substring) == false) {
+      *genomicstart = Substring_alignstart_chr(substring);
+      *genomicend = Substring_alignend_chr(substring);
+      if ((chrpos = Pair_binary_search_descending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray,
+						  *genomicstart,*genomicend)) > 0) {
+	return chrpos;
+      }
+    }
+  }
+
+  return 0;
+}
+
+
+static int
+compute_insertlength (Stage3pair_T this) {
+  T hit5, hit3;
+  Chrpos_T chrstart, chrend, chrpos;
+  int querypos;
+  int querylength5, querylength3;
+
+  hit5 = this->hit5;
+  hit3 = this->hit3;
+  querylength5 = hit5->querylength;
+  querylength3 = hit3->querylength;
+
+  debug10(printf("Computing insertlength on %u..%u to %u..%u\n",
+		 hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
+		 hit3->genomicend - hit3->chroffset,hit3->genomicstart - hit3->chroffset));
+
+  if (hit5->hittype == GMAP && hit3->hittype == GMAP) {
+    debug10(printf("Got hit5 and hit3 both of type GMAP\n"));
+
+    /* Do not try to resolve ambiguity on inside of concordant ends */
+    if (hit5->plusp == true && hit3->plusp == true) {
+      return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+    } else if (hit5->plusp == false && hit3->plusp == false) {
+      return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+    } else {
+      return pair_insert_length_unpaired(hit5,hit3);
+    }
+
+  } else if (hit5->hittype == GMAP) {
+    debug10(printf("Got hit5 of type GMAP\n"));
+    if (hit5->plusp == true && hit3->plusp == true) {
+      /* Have 5-start..end and 3-start..end */
+      debug10(printf("1 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+		     hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
+
+      if (hit5->genomicend <= hit3->genomicstart) {
+	/* No overlap */
+	return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+      } else if ((chrpos = overlap3_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
+	return /* end3 */ chrend - /* start5 */ (chrpos - querypos);
+      } else {
+	/* Still no overlap */
+	return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+      }
+
+    } else if (hit5->plusp == false && hit3->plusp == false) {
+      /* Have 3-end..start and 5-end..start */
+      debug10(printf("2 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+		     hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
+
+      if (hit3->genomicstart <= hit5->genomicend) {
+	return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+      } else if ((chrpos = overlap3_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
+	return /* start5 */ (chrpos + querypos) - /* end3 */ chrend + 1;
+      } else {
+	/* Still no overlap */
+	return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+      }
+    } else {
+      return pair_insert_length_unpaired(hit5,hit3);
+    }
+
+  } else if (hit3->hittype == GMAP) {
+    debug10(printf("Got hit3 of type GMAP\n"));
+    if (hit5->plusp == true && hit3->plusp == true) {
+      /* Have 5-start..end and 3-start..end */
+      debug10(printf("3 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
+		     hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
+
+      if (hit5->genomicend <= hit3->genomicstart) {
+	/* No overlap */
+	return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+      } else if ((chrpos = overlap5_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
+	return /* end3 */ (chrpos - querypos + querylength3) - /* start5 */ chrstart;
+      } else {
+	/* Still no overlap */
+	return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+      }
+
+    } else if (hit5->plusp == false && hit3->plusp == false) {
+      /* Have 3-end..start and 5-end..start */
+      debug10(printf("4 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
+		     hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
+      if (hit3->genomicstart <= hit5->genomicend) {
+	/* No overlap */
+	return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+      } else if ((chrpos = overlap5_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
+	return /* start5 */ chrstart - /* end3 */ (chrpos + querypos - querylength3) - 1;
+      } else {
+	/* Still no overlap */
+	return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+      }
+    } else {
+      return pair_insert_length_unpaired(hit5,hit3);
+    }
+
+  } else if (hit5->plusp == true && hit3->plusp == false) {
+    /* Have 5-start..end and 3-end..start */
+    /*   or 3-end..start and 5-start..end */
+
+    if (hit5->genomicend < hit3->genomicend) {
+      return (hit3->genomicend - hit5->genomicend) + querylength5 + querylength3;
+    } else if (hit3->genomicstart < hit5->genomicstart) {
+      return (hit5->genomicstart - hit3->genomicstart) + querylength5 + querylength3;
+    } else {
+      return pair_insert_length_unpaired(hit5,hit3);
+    }
+
+  } else if (hit5->plusp == false && hit3->plusp == true) {
+    /* Have 5-end..start and 3-start..end */
+    /*   or 3-start..end and 5-end..start */
+
+    if (hit5->genomicstart < hit3->genomicstart) {
+      return (hit3->genomicstart - hit5->genomicstart) + querylength5 + querylength3;
+    } else if (hit3->genomicend < hit5->genomicend) {
+      return (hit5->genomicend - hit3->genomicend) + querylength5 + querylength3;
+    } else {
+      return pair_insert_length_unpaired(hit5,hit3);
+    }
+
+  } else if (hit5->plusp == true) {
+    /* Concordant directions on same chromosome (plus) */
+    debug10(printf("Concordant on plus strand\n"));
+    /* Have 5-start..end and 3-start..end */
+    if (hit5->genomicend < hit3->genomicstart) {
+      /* No overlap */
+      return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
+    } else {
+      return pair_insert_length(hit5,hit3);
+    }
+
+
+  } else {
+    /* Concordant directions on same chromosome (minus) */
+    debug10(printf("Concordant on minus strand\n"));
+    /* Have 3-end..start and 5-end..start */
+    if (hit3->genomicstart < hit5->genomicend) {
+      /* No overlap */
+      return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
+    } else {
+      return pair_insert_length(hit5,hit3);
+    }
+  }
+}
+
+
+List_T
+Stage3pair_resolve_insides (List_T hitpairlist, char *queryuc_ptr_5, char *queryuc_ptr_3,
+			    Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
+			    Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
+			    Pairpool_T pairpool,Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+			    Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) {
+  List_T result = NULL, p;
+  Stage3pair_T stage3pair;
+  T hit5, hit3;
+  int querylength5, querylength3;
+  int genestrand;
+  bool changep;
+
+  for (p = hitpairlist; p != NULL; p = List_next(p)) {
+    stage3pair = (Stage3pair_T) List_head(p);
+    hit5 = stage3pair->hit5;
+    hit3 = stage3pair->hit3;
+    querylength5 = hit5->querylength;
+    querylength3 = hit3->querylength;
+    genestrand = stage3pair->genestrand;
+    
+    if (hit5->plusp == true && hit3->plusp == true) {
+      changep = resolve_inside_general_splice_plus(&hit5,&hit3,&stage3pair->private5p,&stage3pair->private3p,
+						   query5_compress_fwd,query5_compress_rev,
+						   query3_compress_fwd,query3_compress_rev,
+						   queryuc_ptr_5,queryuc_ptr_3,querylength5,querylength3,
+						   genestrand,pairpool,dynprogL,dynprogM,dynprogR,
+						   oligoindices_minor,diagpool,cellpool);
+    } else if (hit5->plusp == false && hit3->plusp == false) {
+      changep = resolve_inside_general_splice_minus(&hit5,&hit3,&stage3pair->private5p,&stage3pair->private3p,
+						    query5_compress_fwd,query5_compress_rev,
+						    query3_compress_fwd,query3_compress_rev,
+						    queryuc_ptr_5,queryuc_ptr_3,querylength5,querylength3,
+						    genestrand,pairpool,dynprogL,dynprogM,dynprogR,
+						    oligoindices_minor,diagpool,cellpool);
+    } else {
+      changep = false;
+    }
+
+    if (changep == true) {
+      stage3pair->hit5 = hit5;
+      stage3pair->hit3 = hit3;
+      stage3pair->insertlength = compute_insertlength(stage3pair);
+
+      /* Rest of this code is taken from the bottom of Stage3pair_new */
+
+      stage3pair->score = hit5->score + hit3->score /* + unresolved_amb_length */;
+
+      stage3pair->nmatches_posttrim = hit5->nmatches_posttrim + hit3->nmatches_posttrim;
+      stage3pair->nmatches = hit5->nmatches + hit3->nmatches /*- unresolved_amb_length -- not available*/;
+      /* stage3pair->overlap_known_gene_p = false; -- initialized later when resolving multimappers */
+      stage3pair->tally = -1L;
+
+      stage3pair->low = (hit5->low < hit3->low) ? hit5->low : hit3->low;
+      stage3pair->high = (hit5->high > hit3->high) ? hit5->high : hit3->high;
+
+#if 0
+      if (stage3pair->low > stage3pair->high) {
+	fprintf(stderr,"stage3pair->low %u > stage3pair->high %u, hit5->chrnum %d\n",
+		stage3pair->low - stage3pair->chroffset,stage3pair->high - stage3pair->chroffset,hit5->chrnum);
+	abort();
+      }
+#endif
+
+      if (hit5->chrnum == 0 || hit3->chrnum == 0) {
+	stage3pair->outerlength = querylength5 + querylength3;
+      } else {
+	stage3pair->outerlength = stage3pair->high - stage3pair->low;
+      }
+
+      stage3pair->nsplices = hit5->nsplices + hit3->nsplices;
+
+      debug0(printf("Revised new pair %p from %p and %p with private %d, %d\n",
+		    stage3pair,hit5,hit3,stage3pair->private5p,stage3pair->private3p));
+      debug0(printf("  hittypes %s and %s\n",hittype_string(hit5->hittype),hittype_string(hit3->hittype)));
+      debug0(printf("  sensedirs %d and %d\n",hit5->sensedir,hit3->sensedir));
+      debug0(printf("  chrpos %u..%u and %u..%u\n",
+		    hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,
+		    hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset));
+
+      if (hit5->circularpos < 0 && hit3->circularpos < 0) {
+	stage3pair->circularp = false;
+      } else {
+	stage3pair->circularp = true;
+      }
+
+      /* Fixing insertlength for circular pairs */
+      if (stage3pair->insertlength > hit5->chrlength) {
+	stage3pair->insertlength -= hit5->chrlength;
+      }
+      
+      /* Note: the new hit5 or hit3 is guaranteed to have private5p or private3p set to true, respectively */
+      if (hit5->circularalias == +1) {
+	debug0(printf("Unaliasing 5' end\n"));
+	if (stage3pair->private5p == false) {
+	  stage3pair->hit5 = Stage3end_copy(hit5);
+	  stage3pair->private5p = true;
+	}
+	unalias_circular(stage3pair->hit5);
+      }
+
+      if (hit3->circularalias == +1) {
+	debug0(printf("Unaliasing 3' end\n"));
+	if (stage3pair->private3p == false) {
+	  stage3pair->hit3 = Stage3end_copy(hit3);
+	  stage3pair->private3p = true;
+	}
+	unalias_circular(stage3pair->hit3);
+      }
+    }
+
+    result = List_push(result,(void *) stage3pair);
+  }
+    
+  List_free(&hitpairlist);
+  return result;
+}
+
+
+
+#if 0
+List_T
+Stage3end_filter_bymatch (List_T hitlist) {
+  List_T filtered = NULL, p;
+  T hit;
+  int min_nmismatches_whole = 1000;
+
+  for (p = hitlist; p != NULL; p = p->rest) {
+    hit = (T) p->first;
+    if (hit->nmismatches_whole < min_nmismatches_whole) {
+      min_nmismatches_whole = hit->nmismatches_whole;
+    }
+  }
+
+  for (p = hitlist; p != NULL; p = p->rest) {
+    hit = (T) p->first;
+    if (hit->nmismatches_whole == min_nmismatches_whole) {
+      filtered = List_push(filtered,hit);
+    } else {
+      Stage3end_free(&hit);
     }
   }
+  List_free(&hitlist);
 
-  return 0;
+  return filtered;
 }
+#endif
 
 
 /* Should not set ambiguous flag in substrings, because resolution of
@@ -13593,7 +15125,7 @@ overlap3_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend
 static void
 resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, int *amb_resolve_5, int *amb_resolve_3,
 				      int *amb_status_inside, T hit5, T hit3, int querylength5, int querylength3) {
-  int insertlength;
+  Chrpos_T insertlength;
   Univcoord_T genomicstart, genomicend;
   int nbingo, bingoi5, bingoi3;
   int nbest, besti5, besti3, i, j;
@@ -13607,18 +15139,24 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, int *amb_resol
 
   *unresolved_amb_length = 0;
 
+  debug9(printf("resolve plus: hit5 %s and hit3 %s\n",
+		hittype_string(hit5->hittype),hittype_string(hit3->hittype)));
   if (hit5->hittype == GMAP) {
     substring5 = (Substring_T) NULL;
+    debug9(printf("hit5 query bounds: %d..%d\n",Stage3end_gmap_querystart(hit5),Stage3end_gmap_queryend(hit5)));
   } else {
     substring5 = (Substring_T) List_head(hit5->substrings_Nto1);
+    debug9(printf("hit5 query bounds: %d..%d\n",Stage3end_substrings_querystart(hit5),Stage3end_substrings_queryend(hit5)));
+    debug9(printf("hit5 ambiguous_p %d\n",Substring_ambiguous_p(substring5)));
   }
   if (hit3->hittype == GMAP) {
     substring3 = (Substring_T) NULL;
+    debug9(printf("hit3 query bounds: %d..%d\n",Stage3end_gmap_querystart(hit3),Stage3end_gmap_queryend(hit3)));
   } else {
     substring3 = (Substring_T) List_head(hit3->substrings_1toN);
+    debug9(printf("hit3 query bounds: %d..%d\n",Stage3end_substrings_querystart(hit3),Stage3end_substrings_queryend(hit3)));
+    debug9(printf("hit3 ambiguous_p %d\n",Substring_ambiguous_p(substring3)));
   }
-  debug9(printf("resolve plus: hit5 %s and hit3 %s\n",
-		hittype_string(hit5->hittype),hittype_string(hit3->hittype)));
 
   if (substring5 != NULL && Substring_ambiguous_p(substring5) == true && 
       substring3 != NULL && Substring_ambiguous_p(substring3) == true) {
@@ -13801,7 +15339,7 @@ resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, int *amb_resol
 static void
 resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, int *amb_resolve_5, int *amb_resolve_3,
 				       int *amb_status_inside, T hit5, T hit3, int querylength5, int querylength3) {
-  int insertlength;
+  Chrpos_T insertlength;
   Univcoord_T genomicstart, genomicend;
   int nbingo, bingoi5, bingoi3;
   int nbest, besti5, besti3, i, j;
@@ -13812,20 +15350,25 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, int *amb_reso
   int *end_amb_nmismatches, *start_amb_nmismatches;
   int end_amb_length_5, start_amb_length_3;
 
+
   *unresolved_amb_length = 0;
 
   debug9(printf("resolve minus: hit5 %s and hit3 %s\n",
 		hittype_string(hit5->hittype),hittype_string(hit3->hittype)));
   if (hit5->hittype == GMAP) {
     substring5 = (Substring_T) NULL;
+    debug9(printf("hit5 query bounds: %d..%d\n",Stage3end_gmap_querystart(hit5),Stage3end_gmap_queryend(hit5)));
   } else {
     substring5 = (Substring_T) List_head(hit5->substrings_Nto1);
+    debug9(printf("hit5 query bounds: %d..%d\n",Stage3end_substrings_querystart(hit5),Stage3end_substrings_queryend(hit5)));
     debug9(printf("hit5 ambiguous_p %d\n",Substring_ambiguous_p(substring5)));
   }
   if (hit3->hittype == GMAP) {
     substring3 = (Substring_T) NULL;
+    debug9(printf("hit3 query bounds: %d..%d\n",Stage3end_gmap_querystart(hit3),Stage3end_gmap_queryend(hit3)));
   } else {
     substring3 = (Substring_T) List_head(hit3->substrings_1toN);
+    debug9(printf("hit3 query bounds: %d..%d\n",Stage3end_substrings_querystart(hit3),Stage3end_substrings_queryend(hit3)));
     debug9(printf("hit3 ambiguous_p %d\n",Substring_ambiguous_p(substring3)));
   }
 
@@ -14006,7 +15549,6 @@ resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, int *amb_reso
     debug9(printf("\n"));
   }
 
-
   return;
 }
 
@@ -14040,151 +15582,6 @@ alias_circular (T hit) {
   return;
 }
 
-
-static int
-compute_insertlength (Stage3pair_T this) {
-  T hit5, hit3;
-  Chrpos_T chrstart, chrend, chrpos;
-  int querypos;
-  int querylength5, querylength3;
-
-
-  hit5 = this->hit5;
-  hit3 = this->hit3;
-  querylength5 = hit5->querylength;
-  querylength3 = hit3->querylength;
-
-  if (hit5->hittype == GMAP && hit3->hittype == GMAP) {
-    debug10(printf("Got hit5 and hit3 both of type GMAP\n"));
-
-    /* Do not try to resolve ambiguity on inside of concordant ends */
-    if (hit5->plusp == true && hit3->plusp == true) {
-      return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
-    } else if (hit5->plusp == false && hit3->plusp == false) {
-      return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
-    } else {
-      return pair_insert_length_unpaired(hit5,hit3);
-    }
-
-  } else if (hit5->hittype == GMAP) {
-    debug10(printf("Got hit5 of type GMAP\n"));
-    if (hit5->plusp == true && hit3->plusp == true) {
-      /* Have 5-start..end and 3-start..end */
-      debug10(printf("1 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
-		     hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
-
-      if (hit5->genomicend <= hit3->genomicstart) {
-	/* No overlap */
-	return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
-      } else if ((chrpos = overlap3_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
-	return /* end3 */ chrend - /* start5 */ (chrpos - querypos);
-      } else {
-	/* Still no overlap */
-	return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
-      }
-
-    } else if (hit5->plusp == false && hit3->plusp == false) {
-      /* Have 3-end..start and 5-end..start */
-      debug10(printf("2 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
-		     hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
-
-      if (hit3->genomicstart <= hit5->genomicend) {
-	return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
-      } else if ((chrpos = overlap3_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) {
-	return /* start5 */ (chrpos + querypos) - /* end3 */ chrend + 1;
-      } else {
-	/* Still no overlap */
-	return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
-      }
-    } else {
-      return pair_insert_length_unpaired(hit5,hit3);
-    }
-
-  } else if (hit3->hittype == GMAP) {
-    debug10(printf("Got hit3 of type GMAP\n"));
-    if (hit5->plusp == true && hit3->plusp == true) {
-      /* Have 5-start..end and 3-start..end */
-      debug10(printf("3 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n",
-		     hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset));
-
-      if (hit5->genomicend <= hit3->genomicstart) {
-	/* No overlap */
-	return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
-      } else if ((chrpos = overlap5_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
-	return /* end3 */ (chrpos - querypos + querylength3) - /* start5 */ chrstart;
-      } else {
-	/* Still no overlap */
-	return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
-      }
-
-    } else if (hit5->plusp == false && hit3->plusp == false) {
-      /* Have 3-end..start and 5-end..start */
-      debug10(printf("4 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n",
-		     hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset));
-      if (hit3->genomicstart <= hit5->genomicend) {
-	/* No overlap */
-	return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
-      } else if ((chrpos = overlap5_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) {
-	return /* start5 */ chrstart - /* end3 */ (chrpos + querypos - querylength3) - 1;
-      } else {
-	/* Still no overlap */
-	return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
-      }
-    } else {
-      return pair_insert_length_unpaired(hit5,hit3);
-    }
-
-  } else if (hit5->plusp == true && hit3->plusp == false) {
-    /* Have 5-start..end and 3-end..start */
-    /*   or 3-end..start and 5-start..end */
-
-    if (hit5->genomicend < hit3->genomicend) {
-      return (hit3->genomicend - hit5->genomicend) + querylength5 + querylength3;
-    } else if (hit3->genomicstart < hit5->genomicstart) {
-      return (hit5->genomicstart - hit3->genomicstart) + querylength5 + querylength3;
-    } else {
-      return pair_insert_length_unpaired(hit5,hit3);
-    }
-
-  } else if (hit5->plusp == false && hit3->plusp == true) {
-    /* Have 5-end..start and 3-start..end */
-    /*   or 3-start..end and 5-end..start */
-
-    if (hit5->genomicstart < hit3->genomicstart) {
-      return (hit3->genomicstart - hit5->genomicstart) + querylength5 + querylength3;
-    } else if (hit3->genomicend < hit5->genomicend) {
-      return (hit5->genomicend - hit3->genomicend) + querylength5 + querylength3;
-    } else {
-      return pair_insert_length_unpaired(hit5,hit3);
-    }
-
-  } else if (hit5->plusp == true) {
-    /* Concordant directions on same chromosome (plus) */
-    debug10(printf("Concordant on plus strand\n"));
-    /* Have 5-start..end and 3-start..end */
-    if (hit5->genomicend < hit3->genomicstart) {
-      /* No overlap */
-      return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3;
-    } else {
-      return pair_insert_length(hit5,hit3);
-    }
-
-
-  } else {
-    /* Concordant directions on same chromosome (minus) */
-    debug10(printf("Concordant on minus strand\n"));
-    /* Have 3-end..start and 5-end..start */
-    if (hit3->genomicstart < hit5->genomicend) {
-      /* No overlap */
-      return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3;
-    } else {
-      return pair_insert_length(hit5,hit3);
-    }
-  }
-}
-
-
-
 Stage3pair_T
 Stage3pair_new (T hit5, T hit3,	int genestrand,	Pairtype_T pairtype,
 		bool private5p, bool private3p, bool expect_concordant_p) {
@@ -14201,7 +15598,7 @@ Stage3pair_new (T hit5, T hit3,	int genestrand,	Pairtype_T pairtype,
   int unresolved_amb_length = 0;
   /* int found_score = 0; */
   bool overreach5p, overreach3p;
-  int pairmax;
+  Chrpos_T pairmax;
 
   int querylength5 = hit5->querylength;
   int querylength3 = hit3->querylength;
@@ -14876,7 +16273,7 @@ Stage3pair_new (T hit5, T hit3,	int genestrand,	Pairtype_T pairtype,
   }
 
   if (SENSE_CONSISTENT_P(hit5->sensedir,hit3->sensedir)) {
-    debug0(printf("senses are consistent\n"));
+    debug0(printf("senses %d and %d are consistent\n",hit5->sensedir,hit3->sensedir));
     new->sense_consistent_p = true;
 
   } else if (expect_concordant_p == true) {
@@ -14948,7 +16345,7 @@ Stage3pair_new (T hit5, T hit3,	int genestrand,	Pairtype_T pairtype,
   }
 
   /* Fixing insertlength for circular pairs */
-  if (new->insertlength > (int) hit5->chrlength) {
+  if (new->insertlength > hit5->chrlength) {
     new->insertlength -= hit5->chrlength;
   }
 
@@ -15011,22 +16408,23 @@ hitpair_sort_cmp (const void *a, const void *b) {
   Univcoord_T x_hit3_high, x_hit3_low, y_hit3_high, y_hit3_low;
   Univcoord_T x_low, x_high, y_low, y_high;
   
-  debug8(printf("  Comparing (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d\n",
+  debug8(printf("  Comparing (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d, GMAP splice prob %f+%f\n",
 		Pairtype_string(x->pairtype),hittype_string(x->hit5->hittype),
 		hittype_string(x->hit3->hittype),x,
 		x->hit5->low - x->hit5->chroffset,x->hit5->high - x->hit5->chroffset,
 		x->hit3->low - x->hit3->chroffset,x->hit3->high - x->hit3->chroffset,
 		x->dir,x->hit5->circularalias,x->hit3->circularalias,x->nmatches,x->nmatches_posttrim,
-		amb_length(x->hit5),amb_length(x->hit3),x->hit5->sensedir,x->hit3->sensedir));
+		amb_length(x->hit5),amb_length(x->hit3),x->hit5->sensedir,x->hit3->sensedir,
+		x->hit5->gmap_avg_splice_score,x->hit3->gmap_avg_splice_score));
 
-  debug8(printf("       with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d\n",
+  debug8(printf("       with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d, GMAP splice prob %f+%f\n",
 		Pairtype_string(y->pairtype),hittype_string(y->hit5->hittype),
 		hittype_string(y->hit3->hittype),y,
 		y->hit5->low - y->hit5->chroffset,y->hit5->high - y->hit5->chroffset,
 		y->hit3->low - y->hit3->chroffset,y->hit3->high - y->hit3->chroffset,
 		y->dir,y->hit5->circularalias,y->hit3->circularalias,y->nmatches,y->nmatches_posttrim,
-		amb_length(y->hit5),amb_length(y->hit3),y->hit5->sensedir,y->hit3->sensedir));
-
+		amb_length(y->hit5),amb_length(y->hit3),y->hit5->sensedir,y->hit3->sensedir,
+		y->hit5->gmap_avg_splice_score,y->hit3->gmap_avg_splice_score));
 
   x_hit5_low = normalize_coord(x->hit5->low,x->hit5->circularalias,x->hit5->chrlength);
   x_hit5_high = normalize_coord(x->hit5->high,x->hit5->circularalias,x->hit5->chrlength);
@@ -15194,6 +16592,7 @@ hitpair_sort_cmp (const void *a, const void *b) {
     return +1;
 #endif
 
+#if 0
   } else if (x->sense_consistent_p == true) {
     if ((x->hit5->sensedir != 0 || x->hit3->sensedir != 0) &&
 	(y->hit5->sensedir == 0 && y->hit3->sensedir == 0)) {
@@ -15204,6 +16603,23 @@ hitpair_sort_cmp (const void *a, const void *b) {
     } else {
       return 0;
     }
+#endif
+
+  } else if (x->hit5->gmap_avg_splice_score + x->hit3->gmap_avg_splice_score >
+	     y->hit5->gmap_avg_splice_score + y->hit3->gmap_avg_splice_score) {
+    return -1;
+
+  } else if (y->hit5->gmap_avg_splice_score + y->hit3->gmap_avg_splice_score >
+	     x->hit5->gmap_avg_splice_score + x->hit3->gmap_avg_splice_score) {
+    return +1;
+
+  } else if (x->hit5->splice_score + x->hit3->splice_score >
+	     y->hit5->splice_score + y->hit3->splice_score) {
+    return -1;
+
+  } else if (y->hit5->splice_score + y->hit3->splice_score >
+	     x->hit5->splice_score + x->hit3->splice_score) {
+    return +1;
 
   } else {
     return 0;
@@ -15657,6 +17073,8 @@ static int
 hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
 		      Stage3pair_T best_hitpair, bool finalp) {
   double prob1, prob2;
+  /* Chrpos_T total_querylength, best_total_querylength; */
+  double zscore, best_zscore;
 
 #if 0
   int hitpair_nmatches, best_hitpair_nmatches;
@@ -15971,15 +17389,20 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
       /* k is better */
       debug8(printf(" => wins by dual splice prob %f vs %f\n",prob1,prob2));
       return +1;
+    } else {
+      debug8(printf(" => neither wins\n"));
     }
 
+
+#if 0
     /* Overlapping ends worse than separate ends */
-    if (hitpair->insertlength <= hitpair->hit5->querylength + hitpair->hit3->querylength &&
-	best_hitpair->insertlength > best_hitpair->hit5->querylength + best_hitpair->hit3->querylength) {
+    total_querylength = (Chrpos_T) (hitpair->hit5->querylength + hitpair->hit3->querylength);
+    best_total_querylength = (Chrpos_T) (best_hitpair->hit5->querylength + best_hitpair->hit3->querylength);
+
+    if (hitpair->insertlength <= total_querylength && best_hitpair->insertlength > best_total_querylength) {
       debug8(printf(" => loses by being overlapping\n"));
       return -1;
-    } else if (hitpair->insertlength > hitpair->hit5->querylength + hitpair->hit3->querylength &&
-	       best_hitpair->insertlength <= best_hitpair->hit5->querylength + best_hitpair->hit3->querylength) {
+    } else if (hitpair->insertlength > total_querylength && best_hitpair->insertlength <= best_total_querylength) {
       debug8(printf(" => wins by being separate\n"));
       return +1;
 
@@ -16027,6 +17450,36 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair,
       *equalp = true;
       return 0;
     }
+#endif
+
+    /* Look at expected pairlength and pairlength deviation */
+    if (hitpair->insertlength < expected_pairlength) {
+      zscore = (double) (expected_pairlength - (Chrpos_T) hitpair->insertlength) / (double) pairlength_deviation;
+    } else {
+      zscore = (double) ((Chrpos_T) hitpair->insertlength - expected_pairlength) / (double) pairlength_deviation;
+    }
+    if (best_hitpair->insertlength < expected_pairlength) {
+      best_zscore = (double) (expected_pairlength - (Chrpos_T) best_hitpair->insertlength) / (double) pairlength_deviation;
+    } else {
+      best_zscore = (double) ((Chrpos_T) best_hitpair->insertlength - expected_pairlength) / (double) pairlength_deviation;
+    }
+    debug8(printf("expected_pairlength %u, pairlength_deviation %u\n",expected_pairlength,pairlength_deviation));
+    debug8(printf("Comparing insertlength %d (z score %f) with best_insertlength %d (zscore %f)\n",
+		  hitpair->insertlength,zscore,best_hitpair->insertlength,best_zscore));
+
+    if (zscore > best_zscore + 1.0) {
+      /* k is worse */
+      debug8(printf(" => loses by insertlength and zscore\n"));
+      return -1;
+    } else if (best_zscore > zscore + 1.0) {
+      /* k is better */
+      debug8(printf(" => wins by insertlength and zscore\n"));
+      return +1;
+    }
+    
+    debug8(printf("  => equal\n"));
+    *equalp = true;
+    return 0;
   }
 }
 
@@ -16229,13 +17682,24 @@ pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
   debug8(
 	 for (i = 0; i < n; i++) {
 	   hitpair = hitpairs[i];
-	   printf("  Initial %d (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d and %d\n",
+	   printf("  Initial %d (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d and %d.",
 		  i,Pairtype_string(hitpair->pairtype),hittype_string(hitpair->hit5->hittype),
 		  hittype_string(hitpair->hit3->hittype),hitpair,
 		  hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
 		  hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
 		  hitpair->dir,hitpair->hit5->circularalias,hitpair->hit3->circularalias,hitpair->nmatches,hitpair->nmatches_posttrim,
 		  amb_length(hitpair->hit5),amb_length(hitpair->hit3),hitpair->hit5->sensedir,hitpair->hit3->sensedir);
+	   if (hitpair->hit5->hittype == GMAP) {
+	     printf("  5' GMAP splice prob %f",hitpair->hit5->gmap_avg_splice_score);
+	   } else if (hitpair->hit5->hittype == TRANSLOC_SPLICE) {
+	     printf("  5' TRANSLOC splice probs %f",hitpair->hit5->splice_score);
+	   }
+	   if (hitpair->hit3->hittype == GMAP) {
+	     printf("  3' GMAP splice prob %f",hitpair->hit3->gmap_avg_splice_score);
+	   } else if (hitpair->hit3->hittype == TRANSLOC_SPLICE) {
+	     printf("  3' TRANSLOC splice probs %f",hitpair->hit3->splice_score);
+	   }
+	   printf("\n");
 	 }
 	 );
 
@@ -16320,11 +17784,9 @@ List_T
 Stage3pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
   List_T unique_separate, unique_overlapping,
     separate = NULL, overlapping = NULL, p;
-  Stage3pair_T hitpair;
+  Stage3pair_T hitpair_separate, hitpair_overlapping, hitpair;
 
-  List_T indep_overlapping = NULL;
   Stage3pair_T *array_separate, *array_overlapping;
-  Stage3pair_T hitpair_overlapping;
   Univcoord_T low, high;
   bool subsumedp, equalp;
   int n_separate, n_overlapping, i, j;
@@ -16332,7 +17794,7 @@ Stage3pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
 
   for (p = hitpairlist; p != NULL; p = List_next(p)) {
     hitpair = (Stage3pair_T) List_head(p);
-    if (hitpair->insertlength <= hitpair->hit5->querylength + hitpair->hit3->querylength) {
+    if (hitpair->insertlength <= (Chrpos_T) (hitpair->hit5->querylength + hitpair->hit3->querylength)) {
       overlapping = List_push(overlapping,(void *) hitpair);
     } else {
       separate = List_push(separate,(void *) hitpair);
@@ -16358,7 +17820,6 @@ Stage3pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
     List_fill_array_and_free((void **) array_overlapping,&unique_overlapping);
 #else
     array_overlapping = (Stage3pair_T *) List_to_array(unique_overlapping,NULL);
-    List_free(&unique_overlapping);
 #endif
 
     n_separate = List_length(unique_separate);
@@ -16367,39 +17828,101 @@ Stage3pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
     List_fill_array((void **) array_separate,unique_separate);
 #else
     array_separate = (Stage3pair_T *) List_to_array(unique_separate,NULL);
-    /* List_free(&unique_separate); -- save for final result */
 #endif
 
     qsort(array_overlapping,n_overlapping,sizeof(Stage3pair_T),hitpair_position_cmp);
     qsort(array_separate,n_separate,sizeof(Stage3pair_T),hitpair_position_cmp);
 
+    /* 1.  First, favor overlapping (with smaller insertlengths) */
+    /* Keep unique_overlapping and filter unique_separate into indep_separate */
+    List_free(&unique_separate);
+    unique_separate = (List_T) NULL;
+
     i = j = 0;
-    for (i = 0; i < n_overlapping; i++) {
-      hitpair_overlapping = array_overlapping[i];
-      low = hitpair_overlapping->low;
-      high = hitpair_overlapping->high;
-      while (j >= 0 && array_separate[j]->high >= low) {
+    for (i = 0; i < n_separate; i++) {
+      hitpair_separate = array_separate[i];
+      low = hitpair_separate->low;
+      high = hitpair_separate->high;
+      while (j >= 0 && array_overlapping[j]->high >= low) {
 	j--;
       }
       j += 1;
 
       subsumedp = false;
-      while (j < n_separate && subsumedp == false && array_separate[j]->low <= high) {
-	if (hitpair_goodness_cmp(&equalp,array_separate[j],
-				 hitpair_overlapping,finalp) > 0) {
-	  debug8(printf("separate pair %d better than overlapping pair %d\n",j,i));
-	  subsumedp = hitpair_subsumption(array_separate[j],hitpair_overlapping);
+      while (j < n_overlapping && subsumedp == false && array_overlapping[j]->low <= high) {
+	if (hitpair_goodness_cmp(&equalp,array_overlapping[j],
+				 hitpair_separate,finalp) > 0) {
+	  debug8(printf("overlapping pair %d better than separate pair %d\n",j,i));
+	  subsumedp = hitpair_subsumption(hitpair_separate,array_overlapping[j]);
 	  debug8(printf("  checking if separate pair %d subsumes overlapping pair %d => %d\n",
-			j,i,subsumedp));
+			i,j,subsumedp));
 	}
 	j++;
       }
       j -= 1;
 
       if (subsumedp == true) {
-	Stage3pair_free(&hitpair_overlapping);
+	Stage3pair_free(&hitpair_separate);
       } else {
-	indep_overlapping = List_push(indep_overlapping,(void *) hitpair_overlapping);
+        unique_separate = List_push(unique_separate,(void *) hitpair_separate);
+      }
+    }
+
+#ifdef USE_ALLOCA_FOR_HITS
+    FREEA(array_separate);
+#else
+    FREE(array_separate);
+#endif
+
+    if ((n_separate = List_length(unique_separate)) == 0) {
+#ifdef USE_ALLOCA_FOR_HITS
+      FREEA(array_overlapping);
+#else
+      FREE(array_overlapping);
+#endif
+      return unique_overlapping;
+
+    } else {
+#ifdef USE_ALLOCA_FOR_HITS
+      array_separate = (Stage3pair_T *) MALLOCA(n_separate * sizeof(Stage3pair_T));
+      List_fill_array((void **) array_separate,unique_separate);
+#else
+      array_separate = (Stage3pair_T *) List_to_array(unique_separate,NULL);
+#endif
+
+      /* 2.  Second, favor separate (with larger insertlengths) */
+      /* Keep indep_separate and filter unique_overlapping into indep_overlapping */
+      List_free(&unique_overlapping);
+      unique_overlapping = (List_T) NULL;
+
+      i = j = 0;
+      for (i = 0; i < n_overlapping; i++) {
+	hitpair_overlapping = array_overlapping[i];
+	low = hitpair_overlapping->low;
+	high = hitpair_overlapping->high;
+	while (j >= 0 && array_separate[j]->high >= low) {
+	  j--;
+	}
+	j += 1;
+
+	subsumedp = false;
+	while (j < n_separate && subsumedp == false && array_separate[j]->low <= high) {
+	  if (hitpair_goodness_cmp(&equalp,array_separate[j],
+				   hitpair_overlapping,finalp) > 0) {
+	    debug8(printf("separate pair %d better than overlapping pair %d\n",j,i));
+	    subsumedp = hitpair_subsumption(array_separate[j],hitpair_overlapping);
+	    debug8(printf("  checking if separate pair %d subsumes overlapping pair %d => %d\n",
+			  j,i,subsumedp));
+	  }
+	  j++;
+	}
+	j -= 1;
+	
+	if (subsumedp == true) {
+	  Stage3pair_free(&hitpair_overlapping);
+	} else {
+	  unique_overlapping = List_push(unique_overlapping,(void *) hitpair_overlapping);
+	}
       }
     }
 
@@ -16411,7 +17934,7 @@ Stage3pair_remove_overlaps (List_T hitpairlist, bool translocp, bool finalp) {
     FREE(array_overlapping);
 #endif
 
-    return List_append(unique_separate,indep_overlapping);
+    return List_append(unique_overlapping,unique_separate);
   }
 }
 
@@ -16794,6 +18317,9 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
   int trim_left_5 = querylength5, trim_right_5 = querylength5,
     trim_left_3 = querylength3, trim_right_3 = querylength3, trim_left, trim_right;
   int nindelbreaks, nbadintrons;
+  int min_badintrons_5, min_badintrons_3;
+  int nleft;
+
 
 #if 0 /* DISTANT_SPLICE_SPECIAL */
   bool shortdistance_p = false;
@@ -16817,15 +18343,15 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
     hit5 = hitpair->hit5;
     hit3 = hitpair->hit3;
 
-    debug6(printf("hit5 %u..%u type %s, nsegments %d, trim_left: %d%s, trim_right %d%s, start_ambig %d, end_ambig %d.  hit3 %u..%u type %s, nsegments %d, trim_left %d%s, trim_right %d%s, start_ambig %d, end_ambig %d, sensedirs %d and %d.\n",
+    debug6(printf("hit5 %u..%u type %s, nsegments %d, nindels %d, trim_left: %d%s, trim_right %d%s, start_ambig %d, end_ambig %d.  hit3 %u..%u type %s, nsegments %d, nindels %d, trim_left %d%s, trim_right %d%s, start_ambig %d, end_ambig %d, sensedirs %d and %d, gmap_splice_scores %f and %f.\n",
 		  hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,hittype_string(hit5->hittype),
-		  hit5->nsegments,hit5->trim_left,hit5->trim_left_splicep ? " (splice)" : "",
+		  hit5->nsegments,hit5->nindels,hit5->trim_left,hit5->trim_left_splicep ? " (splice)" : "",
 		  hit5->trim_right,hit5->trim_right_splicep ? " (splice)" : "",
 		  start_amb_length(hit5),end_amb_length(hit5),
 		  hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,hittype_string(hit3->hittype),
-		  hit3->nsegments,hit3->trim_left,hit3->trim_left_splicep ? " (splice)" : "",
+		  hit3->nsegments,hit3->nindels,hit3->trim_left,hit3->trim_left_splicep ? " (splice)" : "",
 		  hit3->trim_right,hit3->trim_right_splicep ? " (splice)" : "",
-		  start_amb_length(hit3),end_amb_length(hit3),hit5->sensedir,hit3->sensedir));
+		  start_amb_length(hit3),end_amb_length(hit3),hit5->sensedir,hit3->sensedir,hit5->gmap_avg_splice_score,hit3->gmap_avg_splice_score));
 
     if (hit5->hittype == TERMINAL) {
       /* Don't allow terminals to set trims */
@@ -16895,6 +18421,8 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
   debug6(printf("overall 3': trim_left %d, trim_right %d\n",trim_left_3,trim_right_3));
 
 
+  min_badintrons_5 = querylength5;
+  min_badintrons_3 = querylength3;
   for (p = hitpairlist; p != NULL; p = p->rest) {
     hitpair = (Stage3pair_T) p->first;
     hit5 = hitpair->hit5;
@@ -16931,6 +18459,12 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
       debug6(printf("  add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit5->pairarray,hit5->npairs,
 								    trim_left_5,trim_right_5,start_amb_length(hit5),end_amb_length(hit5),
 								    hit5->querylength)));
+      debug6(printf("  nbadintrons %d.",nbadintrons));
+      hit5->gmap_nbadintrons = nbadintrons;
+      if (nbadintrons < min_badintrons_5) {
+	min_badintrons_5 = nbadintrons;
+      }
+
       if (start_amb_length(hit5) > 0) {
 	debug6(printf("  add penalty for start amb %d.",amb_penalty));
 	hit5->score_eventrim += amb_penalty;
@@ -16952,6 +18486,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
       
     } else {
       hit5->score_eventrim = 0;	/* was hit5->penalties */
+      min_badintrons_5 = 0;
       debug6(printf("score 5' OTHER:"));
 
       for (q = hit5->substrings_1toN; q != NULL; q = List_next(q)) {
@@ -17018,6 +18553,11 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
       debug6(printf("  add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit3->pairarray,hit3->npairs,
 								    trim_left_3,trim_right_3,start_amb_length(hit3),end_amb_length(hit3),
 								    hit3->querylength)));
+      debug6(printf("  nbadintrons %d.",nbadintrons));
+      hit3->gmap_nbadintrons = nbadintrons;
+      if (nbadintrons < min_badintrons_3) {
+	min_badintrons_3 = nbadintrons;
+      }
 
       if (start_amb_length(hit3) > 0) {
 	debug6(printf("  add penalty for start amb %d.",amb_penalty));
@@ -17051,6 +18591,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 
     } else {
       hit3->score_eventrim = 0;  /* was hit3->penalties */
+      min_badintrons_3 = 0;
       debug6(printf("score 3' OTHER:"));
 
       for (q = hit3->substrings_1toN; q != NULL; q = List_next(q)) {
@@ -17094,6 +18635,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
       minscore = hitpair->score_eventrim;
     }
   }
+  debug6(printf("MINSCORE: %d\n",minscore));
 
   if (finalp == false) {
     for (p = hitpairlist; p != NULL; p = p->rest) {
@@ -17148,7 +18690,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 
   } else {
 #ifdef DO_FINAL
-    /* Final: based on nmatches_posttrim + amb_length */
+    /* Final: based on score first, then within optimal score, based on nmatches_posttrim + amb_length */
     max_nmatches = 0;
     for (p = hitpairlist; p != NULL; p = p->rest) {
       hitpair = (Stage3pair_T) p->first;
@@ -17212,7 +18754,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 
 #ifdef DO_FINAL
       } else if (hitpair->nmatches_posttrim + amb_length(hitpair->hit5) + amb_length(hitpair->hit3) < cutoff_level) {
-	debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nmatches_posttrim %d (%d+%d) + amb %d+%d < cutoff_level %d (finalp %d)\n",
+	debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u based on nmatches: nmatches_posttrim %d (%d+%d) + amb %d+%d < cutoff_level %d (finalp %d)\n",
 		      hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
 		      hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
 		      hitpair->nmatches_posttrim,hitpair->hit5->nmatches_posttrim,hitpair->hit3->nmatches_posttrim,
@@ -17238,8 +18780,44 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 	optimal = List_push(optimal,hitpair);
       }
     }
-  }
 
+    /* Filter GMAP hits with bad introns */
+    debug6(printf("Filtering GMAP hits with bad introns: mininum was %d and %d\n",
+		  min_badintrons_5,min_badintrons_3));
+
+    nleft = 0;
+    for (p = optimal; p != NULL; p = p->rest) {
+      hitpair = (Stage3pair_T) p->first;
+
+      if (hitpair->hit5->gmap_nbadintrons == min_badintrons_5 && hitpair->hit3->gmap_nbadintrons == min_badintrons_3) {
+	nleft += 1;
+      } else {
+	/* Candidate for elimination */
+      }
+    }
+
+    debug6(printf("If we eliminated based on bad introns, would have %d left\n",nleft));
+    if (nleft > 0) {
+      /* Proceed to eliminate based on bad introns */	
+      List_free(&hitpairlist);
+      hitpairlist = optimal;
+      optimal = (List_T) NULL;
+
+      for (p = hitpairlist; p != NULL; p = p->rest) {
+        hitpair = (Stage3pair_T) p->first;
+        if (hitpair->hit5->gmap_nbadintrons > min_badintrons_5 || hitpair->hit3->gmap_nbadintrons > min_badintrons_3) {
+	  debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nbadintrons %d+%d\n",
+	                hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
+		        hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
+       	                hitpair->hit5->gmap_nbadintrons,hitpair->hit3->gmap_nbadintrons));
+  	  *eliminatedp = true;
+	  Stage3pair_free(&hitpair);
+        } else {
+	  optimal = List_push(optimal,(void *) hitpair);
+        }
+      }
+    }
+  }
 
   List_free(&hitpairlist);
 
@@ -17278,7 +18856,8 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 	  *eliminatedp = true;
 	  Stage3pair_free(&hitpair);
 	} else {
-	  debug6(printf("Keeping hit pair %p with nsegments (%d)+%d\n",hitpair,hitpair->hit5->nsegments,hitpair->hit3->nsegments));
+	  debug6(printf("Keeping hit pair %p with nsegments (%d)+%d, nindels %d+%d\n",
+			hitpair,hitpair->hit5->nsegments,hitpair->hit3->nsegments,hitpair->hit5->nindels,hitpair->hit3->nindels));
 	  optimal = List_push(optimal,hitpair);
 	}
 
@@ -17288,7 +18867,8 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 	  *eliminatedp = true;
 	  Stage3pair_free(&hitpair);
 	} else {
-	  debug6(printf("Keeping hit pair %p with nsegments %d+(%d)\n",hitpair,hitpair->hit5->nsegments,hitpair->hit3->nsegments));
+	  debug6(printf("Keeping hit pair %p with nsegments %d+(%d), nindels %d+%d\n",
+			hitpair,hitpair->hit5->nsegments,hitpair->hit3->nsegments,hitpair->hit5->nindels,hitpair->hit3->nindels));
 	  optimal = List_push(optimal,hitpair);
 	}
 
@@ -17297,7 +18877,8 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 	*eliminatedp = true;
 	Stage3pair_free(&hitpair);
       } else {
-	debug6(printf("Keeping hit pair %p with nsegments %d+%d\n",hitpair,hitpair->hit5->nsegments,hitpair->hit3->nsegments));
+	debug6(printf("Keeping hit pair %p with nsegments %d+%d, nindels %d+%d\n",
+		      hitpair,hitpair->hit5->nsegments,hitpair->hit3->nsegments,hitpair->hit5->nindels,hitpair->hit3->nindels));
 	optimal = List_push(optimal,hitpair);
       }
     }
@@ -17622,6 +19203,7 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
 		  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,/*pairtype*/CONCORDANT_TRANSLOCATIONS,
 						   /*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
 		    *conc_transloc = List_push(*conc_transloc,(void *) stage3pair);
+		    /* *nconcordant += 1; */
 		  }
 
 		} else if (SENSE_INCONSISTENT_P(hit5->sensedir,hit3->sensedir)) {
@@ -17642,22 +19224,21 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
 				hit5->effective_chrnum,hit5->chrnum,hit3->chrnum));
 		  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,/*pairtype*/CONCORDANT,
 						   /*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
-
-		    debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
+		    debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",stage3pair->hit5->score,stage3pair->hit3->score,new_found_score));
 		    if (Stage3pair_max_trim(stage3pair) > 18) {
 		      /* Don't use terminals to set new_found_score */
 		      debug5(printf("Max trim is %d > 18, so treating as terminals\n",Stage3pair_max_trim(stage3pair)));
 		      *terminals = List_push(*terminals,(void *) stage3pair);
 
-		    } else if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+		    } else if (stage3pair->hit5->hittype == GMAP || stage3pair->hit3->hittype == GMAP) {
 		      /* Don't use GMAP scores to set new_found_score */
 		      hitpairs = List_push(hitpairs,(void *) stage3pair);
 		      (*nconcordant)++;
 
-		    } else if (hit5->score + hit3->score < new_found_score) {
+		    } else if (stage3pair->hit5->score + stage3pair->hit3->score < new_found_score) {
 		      /* Don't use frontier_score here, which is the trimmed_score.  Use the full score, to motivate stage1hr to find longer alignments */
-		      new_found_score = hit5->score + hit3->score;
-		      debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,hit5->score,hit3->score));
+		      new_found_score = stage3pair->hit5->score + stage3pair->hit3->score;
+		      debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,stage3pair->hit5->score,stage3pair->hit3->score));
 		      hitpairs = List_push(hitpairs,(void *) stage3pair);
 		      (*nconcordant)++;
 
@@ -17742,6 +19323,7 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
 		  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,/*pairtype*/CONCORDANT_TRANSLOCATIONS,
 						   /*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
 		    *conc_transloc = List_push(*conc_transloc,(void *) stage3pair);
+		    /* *nconcordant += 1; */
 		  }
 
 		} else if (SENSE_INCONSISTENT_P(hit3->sensedir,hit5->sensedir)) {
@@ -17763,21 +19345,21 @@ pair_up_concordant_aux (bool *abort_pairing_p, int *found_score, int *nconcordan
 		  if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,/*pairtype*/CONCORDANT,
 						   /*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) {
 
-		    debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score));
+		    debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",stage3pair->hit5->score,stage3pair->hit3->score,new_found_score));
 		    if (Stage3pair_max_trim(stage3pair) > 18) {
 		      /* Don't use terminals to set new_found_score */
 		      debug5(printf("Max trim is %d > 18, so treating as terminals\n",Stage3pair_max_trim(stage3pair)));
 		      *terminals = List_push(*terminals,(void *) stage3pair);
 
-		    } else if (hit5->hittype == GMAP || hit3->hittype == GMAP) {
+		    } else if (stage3pair->hit5->hittype == GMAP || stage3pair->hit3->hittype == GMAP) {
 		      /* Don't use GMAP scores to set new_found_score */
 		      hitpairs = List_push(hitpairs,(void *) stage3pair);
 		      (*nconcordant)++;
 		      
-		    } else if (hit5->score + hit3->score < new_found_score) {
+		    } else if (stage3pair->hit5->score + stage3pair->hit3->score < new_found_score) {
 		      /* Don't use frontier_score here, which is the trimmed_score.  Use the full score, to motivate stage1hr to find longer alignments */
-		      new_found_score = hit5->score + hit3->score;
-		      debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,hit5->score,hit3->score));
+		      new_found_score = stage3pair->hit5->score + stage3pair->hit3->score;
+		      debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,stage3pair->hit5->score,stage3pair->hit3->score));
 		      hitpairs = List_push(hitpairs,(void *) stage3pair);
 		      (*nconcordant)++;
 
diff --git a/src/stage3hr.h b/src/stage3hr.h
index 5c20725..7254c95 100644
--- a/src/stage3hr.h
+++ b/src/stage3hr.h
@@ -1,7 +1,9 @@
-/* $Id: stage3hr.h 199475 2016-10-23 23:21:59Z twu $ */
+/* $Id: stage3hr.h 207311 2017-06-14 19:26:20Z twu $ */
 #ifndef STAGE3HR_INCLUDED
 #define STAGE3HR_INCLUDED
 
+typedef struct Stage3end_T *Stage3end_T;
+
 #include <stdio.h>
 #include "bool.h"
 #include "sense.h"
@@ -20,6 +22,12 @@
 #include "pair.h"
 #include "filestring.h"
 
+#include "dynprog.h"
+#include "diagpool.h"
+#include "cellpool.h"
+#include "oligoindex_hr.h"
+
+
 /* Should arrange in order of goodness, best to worst */
 typedef enum {EXACT, SUB, INSERTION, DELETION, SUBSTRINGS,
 	      HALFSPLICE_DONOR, HALFSPLICE_ACCEPTOR, SPLICE, SAMECHR_SPLICE, TRANSLOC_SPLICE,
@@ -27,7 +35,6 @@ typedef enum {EXACT, SUB, INSERTION, DELETION, SUBSTRINGS,
 	      GMAP, TERMINAL} Hittype_T;
 
 #define T Stage3end_T
-typedef struct T *T;
 
 typedef struct Stage3pair_T *Stage3pair_T;
 
@@ -38,14 +45,15 @@ Stage3hr_setup (bool invert_first_p_in, bool invert_second_p_in, Genome_T genome
 		IIT_T genes_iit_in, int *genes_divint_crosstable_in,
 		IIT_T tally_iit_in, int *tally_divint_crosstable_in,
 		IIT_T runlength_iit_in, int *runlength_divint_crosstable_in,
-		bool distances_observed_p, int pairmax_linear_in, int pairmax_circular_in,
-		Chrpos_T expected_pairlength, Chrpos_T pairlength_deviation,
+		bool distances_observed_p, Chrpos_T pairmax_linear_in, Chrpos_T pairmax_circular_in,
+		Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in, int maxpeelback_in,
 		int localsplicing_penalty_in, int indel_penalty_middle_in,
 		int antistranded_penalty_in, bool favor_multiexon_p_in,
 		int gmap_min_nconsecutive_in, int end_detail, int subopt_levels_in,
 		int max_middle_insertions_in, int max_middle_deletions_in,
 		bool novelsplicingp_in, Chrpos_T shortsplicedist_in, bool merge_samechr_p_in,
 		bool *circularp_in, bool *altlocp_in, Univcoord_T *alias_starts_in, Univcoord_T *alias_ends_in,
+		bool omit_concordant_uniq_p_in, bool omit_concordant_mult_p_in,
 		char *failedinput_root_in, bool print_m8_p_in, bool want_random_p_in);
 
 extern char *
@@ -69,6 +77,8 @@ extern Chrnum_T
 Stage3end_chrnum (T this);
 extern Chrnum_T
 Stage3end_effective_chrnum (T this);
+extern Chrnum_T
+Stage3end_other_chrnum (T this);
 extern Univcoord_T
 Stage3end_chroffset (T this);
 extern Univcoord_T
@@ -92,9 +102,9 @@ Stage3end_absmq_score (T this);
 extern int
 Stage3end_score (T this);
 extern int
+Stage3end_gmap_goodness (T this);
+extern int
 Stage3end_gmap_max_match_length (T this);
-extern double
-Stage3end_gmap_min_splice_prob (T this);
 extern int
 Stage3end_best_score (List_T hits);
 extern bool
@@ -397,7 +407,7 @@ extern T
 Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_length,
 		    int ambig_end_length_5, int ambig_end_length_3,
 		    Splicetype_T ambig_splicetype_5, Splicetype_T ambig_splicetype_3,
-		    double min_splice_prob,
+		    double avg_splice_score, int goodness,
 		    struct Pair_T *pairarray, int npairs, int nsegments, int nintrons, int nindelbreaks,
 		    Univcoord_T left, int genomiclength, bool plusp, int genestrand,
 		    char *accession, int querylength,
@@ -472,6 +482,13 @@ Stage3pair_print_end (Filestring_T fp, Filestring_T fp_failedinput,
 		      int maxpaths, bool quiet_if_excessive_p,
 		      bool invertp, int quality_shift);
 
+extern List_T
+Stage3pair_resolve_insides (List_T hitpairlist, char *queryuc_ptr_5, char *queryuc_ptr_3,
+			    Compress_T query5_compress_fwd, Compress_T query5_compress_rev,
+			    Compress_T query3_compress_fwd, Compress_T query3_compress_rev,
+			    Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR,
+			    Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool);
+
 extern Stage3pair_T
 Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype,
 		bool private5p, bool private3p, bool expect_concordant_p);
diff --git a/src/substring.c b/src/substring.c
index 90b4603..4733adc 100644
--- a/src/substring.c
+++ b/src/substring.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 199475 2016-10-23 23:21:59Z twu $";
+static char rcsid[] = "$Id: substring.c 207556 2017-06-21 00:52:32Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -331,7 +331,7 @@ struct T {
   int siteA_pos;
   double siteA_prob;
 
-  Univcoord_T splicecoord_N;	/* For DNA fusions */
+  /* Note: For DNA fusions, use both splicecoord_D and splicecoord_A */
   int siteN_pos;
 
 
@@ -352,12 +352,14 @@ Substring_alias_circular (T this) {
   if (this != NULL) {
     chrlength = this->chrlength;
 
-    if (this->left + this->querylength > chrlength) {
+    if (this->left + this->querylength > this->chroffset + chrlength) {
+      debug2(printf("For alias, this->left %u + this->querylength %d > offset %u + chrlength %u\n",
+		    this->left,this->querylength,this->chroffset,chrlength));
       if (this->plusp == true) {
-	this->mandatory_trim_right = chrlength - (this->left + this->querylength);
+	this->mandatory_trim_right = (this->chroffset + chrlength) - (this->left + this->querylength);
 	debug2(printf("For alias, setting mandatory_trim_right to be %d\n",this->mandatory_trim_right));
       } else {
-	this->mandatory_trim_left = chrlength - (this->left + this->querylength);
+	this->mandatory_trim_left = (this->chroffset + chrlength) - (this->left + this->querylength);
 	debug2(printf("For alias, setting mandatory_trim_left to be %d\n",this->mandatory_trim_left));
       }
     }
@@ -371,7 +373,6 @@ Substring_alias_circular (T this) {
     this->alignend_trim += chrlength;
     this->splicecoord_D += chrlength;
     this->splicecoord_A += chrlength;
-    this->splicecoord_N += chrlength;
   }
 
   return;
@@ -385,13 +386,15 @@ Substring_unalias_circular (T this) {
   if (this != NULL) {
     chrlength = this->chrlength;
 
-    if (this->left < chrlength) {
+    if (this->left < this->chroffset + chrlength) {
+      debug2(printf("For unalias, this->left %u < chroffset %u + chrlength %d\n",
+		    this->left,this->chroffset,chrlength));
       if (this->plusp == true) {
-	this->mandatory_trim_left = chrlength - this->left;
-	debug2(printf("For alias, setting mandatory_trim_left to be %d\n",this->mandatory_trim_left));
+	this->mandatory_trim_left = (this->chroffset + chrlength) - this->left;
+	debug2(printf("For unalias, setting mandatory_trim_left to be %d\n",this->mandatory_trim_left));
       } else {
-	this->mandatory_trim_right = chrlength - this->left;
-	debug2(printf("For alias, setting mandatory_trim_right to be %d\n",this->mandatory_trim_right));
+	this->mandatory_trim_right = (this->chroffset + chrlength) - this->left;
+	debug2(printf("For unalias, setting mandatory_trim_right to be %d\n",this->mandatory_trim_right));
       }
     }
 
@@ -404,7 +407,6 @@ Substring_unalias_circular (T this) {
     this->alignend_trim -= chrlength;
     this->splicecoord_D -= chrlength;
     this->splicecoord_A -= chrlength;
-    this->splicecoord_N -= chrlength;
   }
 
   return;
@@ -422,6 +424,16 @@ fill_w_dashes (char *string, int start, int end) {
   return;
 }
 
+static void
+fill_w_stars (char *string, int start, int end) {
+  int i;
+
+  for (i = start; i < end; i++) {
+    string[i] = '*';
+  }
+  return;
+}
+
 
 #if 0
 static bool
@@ -1236,6 +1248,9 @@ Substring_overlap_p (T substring1, T substring2) {
     low1 = substring1->alignend;
     high1 = substring1->alignstart;
   }
+  if (high1 > 0) {
+    high1 -= 1;
+  }
 
   if (substring2->plusp == true) {
     low2 = substring2->alignstart;
@@ -1244,6 +1259,9 @@ Substring_overlap_p (T substring1, T substring2) {
     low2 = substring2->alignend;
     high2 = substring2->alignstart;
   }
+  if (high2 > 0) {
+    high2 -= 1;
+  }
 
   debug3(printf("Checking overlap between %u..%u and %u..%u",low1,high1,low2,high2));
 
@@ -1289,10 +1307,16 @@ Substring_overlap_point_trimmed_p (T substring, Univcoord_T endpos) {
   if (substring->plusp == true) {
     low = substring->alignstart_trim;
     high = substring->alignend_trim;
+    if (high > 0) {
+      high -= 1;
+    }
     debug3(printf("Checking overlap between plus %u..%u and %u",low,high,endpos));
   } else {
     low = substring->alignend_trim;
     high = substring->alignstart_trim;
+    if (high > 0) {
+      high -= 1;
+    }
     debug3(printf("Checking overlap between minus %u..%u and %u",low,high,endpos));
   }
 
@@ -1322,6 +1346,9 @@ Substring_overlap_segment_trimmed (T substring1, T substring2) {
     low1 = substring1->alignend_trim;
     high1 = substring1->alignstart_trim;
   }
+  if (high1 > 0) {
+    high1 -= 1;
+  }
 
   if (substring2->plusp == true) {
     low2 = substring2->alignstart_trim;
@@ -1330,6 +1357,9 @@ Substring_overlap_segment_trimmed (T substring1, T substring2) {
     low2 = substring2->alignend_trim;
     high2 = substring2->alignstart_trim;
   }
+  if (high2 > 0) {
+    high2 -= 1;
+  }
 
   debug3(printf("Checking overlap between %u..%u and %u..%u",low1,high1,low2,high2));
 
@@ -1711,7 +1741,7 @@ Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
 
 static char *
 embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend, int querylength,
-		   int extraleft, int extraright, int genestrand) {
+		   int mandatory_trim_left, int mandatory_trim_right, int extraleft, int extraright, int genestrand) {
   char *result;
   int i, j, k;
 
@@ -1727,6 +1757,7 @@ embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend
 
   /* Add aligned region with lower-case diffs, surrounded by dashes */
   fill_w_dashes(result,0,querystart);
+  fill_w_stars(result,0,mandatory_trim_left);
   debug1(printf("g1: %s (%d..%d) extraleft:%d extraright:%d\n",result,querystart,queryend,extraleft,extraright));
 
   /* Don't need to know adj anymore, because each substring has its own left */
@@ -1747,20 +1778,21 @@ embellish_genomic (char *genomic_diff, char *query, int querystart, int queryend
   }
 
   fill_w_dashes(result,queryend,querylength);
+  fill_w_stars(result,querylength - mandatory_trim_right,querylength);
   debug1(printf("g1: %s\n",result));
 
   /* Add terminal ends as lower-case */
   for (k = 0, i = querystart-1, j = querystart-1; k < extraleft && i >= 0 /*&& j >= 0*/; k++, i--, j--) {
     result[i] = (char) tolower(genomic_diff[j]);
     /* printf("k=%d i=%d result[i]=%c\n",k,i,result[i]); */
-    assert(result[i] == 'a' || result[i] == 'c' || result[i] == 'g' || result[i] == 't' || result[i] == 'n');
+    assert(result[i] == 'a' || result[i] == 'c' || result[i] == 'g' || result[i] == 't' || result[i] == 'n' || result[i] == '*');
   }
 
   for (k = 0, i = queryend, j = queryend; k < extraright && i < querylength /*&& j < genomiclength*/; k++, i++, j++) {
     result[i] = (char) tolower(genomic_diff[j]);
     /* printf("k=%d i=%d result[i]=%c\n",k,i,result[i]); */
 #if 0
-    assert(result[i] == 'a' || result[i] == 'c' || result[i] == 'g' || result[i] == 't' || result[i] == 'n');
+    assert(result[i] == 'a' || result[i] == 'c' || result[i] == 'g' || result[i] == 't' || result[i] == 'n' || result[i] == '*');
 #endif
   }
   debug1(printf("g1: %s\n",result));
@@ -1985,7 +2017,7 @@ Substring_new (int nmismatches_whole, Chrnum_T chrnum, Univcoord_T chroffset,
   new->plusp = plusp;
   new->genestrand = genestrand;
 
-  new->splicecoord_D = new->splicecoord_A = new->splicecoord_N = 0;
+  new->splicecoord_D = new->splicecoord_A = 0;
   new->siteD_pos = new->siteA_pos = new->siteN_pos = 0;
 
   new->siteD_prob = new->siteA_prob = 0.0;
@@ -2484,6 +2516,7 @@ Substring_new_ambig_D (int querystart, int queryend, int splice_pos, int queryle
   new->siteA_prob = amb_common_prob;
 
   new->nmismatches_bothdiff = new->nmismatches_whole = Intlist_min(amb_nmismatches);
+  debug2(printf("nmismatches_bothdiff due to amb_nmismatches is %d\n",new->nmismatches_bothdiff));
 
 #if 0
   if (plusp == true) {
@@ -2592,6 +2625,7 @@ Substring_new_ambig_A (int querystart, int queryend, int splice_pos, int queryle
   new->siteD_prob = amb_common_prob;
 
   new->nmismatches_bothdiff = new->nmismatches_whole = Intlist_min(amb_nmismatches);
+  debug2(printf("nmismatches_bothdiff due to amb_nmismatches is %d\n",new->nmismatches_bothdiff));
 
 #if 0
   if (plusp == true) {
@@ -2876,7 +2910,8 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
     }
     /* Need to perform embellish to put dashes in */
     this->genomic_bothdiff = embellish_genomic(genomic_diff,queryuc_ptr,this->querystart,this->queryend,
-					       querylength,extraleft,extraright,this->genestrand);
+					       querylength,this->mandatory_trim_left,this->mandatory_trim_right,
+					       extraleft,extraright,this->genestrand);
 
     if (snps_iit == NULL) {
       this->genomic_refdiff = this->genomic_bothdiff;
@@ -2894,7 +2929,8 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
 				 /*plusp*/true,this->genestrand);
       if (output_sam_p == false) {
 	this->genomic_refdiff = embellish_genomic(genomic_diff,queryuc_ptr,this->querystart,this->queryend,
-						  querylength,extraleft,extraright,this->genestrand);
+						  querylength,this->mandatory_trim_left,this->mandatory_trim_right,
+						  extraleft,extraright,this->genestrand);
       }
     }
 
@@ -2952,7 +2988,8 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
 
     /* Need to perform embellish to put dashes in */
     this->genomic_bothdiff = embellish_genomic(genomic_diff,/*not queryrc*/queryuc_ptr,this->querystart,this->queryend,
-					       querylength,extraleft,extraright,this->genestrand);
+					       querylength,this->mandatory_trim_left,this->mandatory_trim_right,
+					       extraleft,extraright,this->genestrand);
 
     if (snps_iit == NULL) {
       this->genomic_refdiff = this->genomic_bothdiff;
@@ -2972,7 +3009,8 @@ Substring_display_prep (T this, char *queryuc_ptr, int querylength,
       
       if (output_sam_p == false) {
 	this->genomic_refdiff = embellish_genomic(genomic_diff,/*not queryrc*/queryuc_ptr,this->querystart,this->queryend,
-						  querylength,extraleft,extraright,this->genestrand);
+						  querylength,this->mandatory_trim_left,this->mandatory_trim_right,
+						  extraleft,extraright,this->genestrand);
       }
     }
 
@@ -3015,14 +3053,28 @@ Substring_splicecoord_A (T this) {
   return this->splicecoord_A;
 }
 
+/* Called only by samprint */
 Chrpos_T
-Substring_chr_splicecoord_D (T this) {
-  return (Chrpos_T) (this->splicecoord_D - this->chroffset);
+Substring_chr_splicecoord_D (T this, char donor_strand) {
+  if (donor_strand == '+') {
+    return (Chrpos_T) (this->splicecoord_D - this->chroffset);
+  } else if (donor_strand == '-') {
+    return (Chrpos_T) (this->splicecoord_D - this->chroffset + 1);
+  } else {
+    abort();
+  }
 }
 
+/* Called only by samprint */
 Chrpos_T
-Substring_chr_splicecoord_A (T this) {
-  return (Chrpos_T) (this->splicecoord_A - this->chroffset);
+Substring_chr_splicecoord_A (T this, char acceptor_strand) {
+  if (acceptor_strand == '+') {
+    return (Chrpos_T) (this->splicecoord_A - this->chroffset + 1);
+  } else if (acceptor_strand == '-') {
+    return (Chrpos_T) (this->splicecoord_A - this->chroffset);
+  } else {
+    abort();
+  }
 }
 
 int
@@ -3660,7 +3712,6 @@ Substring_copy (T old) {
     new->siteA_pos = old->siteA_pos;
     new->siteA_prob = old->siteA_prob;
 
-    new->splicecoord_N = old->splicecoord_N;
     new->siteN_pos = old->siteN_pos;
 
     new->ambiguous_p = old->ambiguous_p;
@@ -3748,13 +3799,14 @@ Substring_new_startfrag (Univcoord_T startfrag_coord, int splice_pos, int nmisma
 
   debug2(printf("Making new startfrag with coord %u and left %u, plusp %d, query %d..%d, genome %u..%u\n",
 		startfrag_coord,left,plusp,querystart,queryend,alignstart - chroffset,alignend - chroffset));
-  new->splicecoord_N = startfrag_coord;
+  new->splicecoord_D = new->splicecoord_A = startfrag_coord;
+  new->splicesitesD_knowni = new->splicesitesA_knowni = -1;
   assert(startfrag_coord == left + splice_pos);
 
   if (plusp == true) {
-    new->siteN_pos = splice_pos;
+    new->siteD_pos = new->siteA_pos = new->siteN_pos = splice_pos;
   } else {
-    new->siteN_pos = querylength - splice_pos;
+    new->siteD_pos = new->siteA_pos = new->siteN_pos = querylength - splice_pos;
   }
 
   return new;
@@ -3820,13 +3872,14 @@ Substring_new_endfrag (Univcoord_T endfrag_coord, int splice_pos, int nmismatche
 
   debug2(printf("Making new endfrag with coord %u and left %u, plusp %d, query %d..%d, genome %u..%u\n",
 		endfrag_coord,left,plusp,querystart,queryend,alignstart - chroffset,alignend - chroffset));
-  new->splicecoord_N = endfrag_coord;
+  new->splicecoord_D = new->splicecoord_A = endfrag_coord;
+  new->splicesitesD_knowni = new->splicesitesA_knowni = -1;
   assert(endfrag_coord == left + splice_pos);
 
   if (plusp == true) {
-    new->siteN_pos = splice_pos;
+    new->siteD_pos = new->siteA_pos = new->siteN_pos = splice_pos;
   } else {
-    new->siteN_pos = querylength - splice_pos;
+    new->siteD_pos = new->siteA_pos = new->siteN_pos = querylength - splice_pos;
   }
 
   return new;
@@ -4908,41 +4961,63 @@ Substring_intragenic_splice_p (Genomicpos_T splicedistance, Chrnum_T chrnum,
 
 
 Chrpos_T
-Substring_compute_chrpos (T this, int hardclip_low, bool hide_soft_clips_p) {
-  Chrpos_T chrpos;
+Substring_compute_chrpos (T this, int hardclip_low, int hardclip_high, bool hide_soft_clips_p) {
+  Chrpos_T chrpos_low;
 
   if (hide_soft_clips_p == true) {
     if (this->plusp == true) {
       /* Add 1 to report in 1-based coordinates */
-      chrpos = this->genomicstart - this->chroffset + 1U;
-      chrpos += hardclip_low;
+      chrpos_low = this->genomicstart - this->chroffset + 1U;
+      chrpos_low += hardclip_low;
+      /* *chrpos_high = this->genomicend - this->chroffset + 1U; */
+      /* *chrpos_high -= hardclip_high; */
 
     } else {
       /* Add 1 to report in 1-based coordinates */
-      chrpos = this->genomicend - this->chroffset + 1U;
-      chrpos += hardclip_low;
+      chrpos_low = this->genomicend - this->chroffset + 1U;
+      chrpos_low += hardclip_low;
+      /* *chrpos_high = this->genomicstart - this->chroffset + 1U; */
+      /* *chrpos_high -= hardclip_high; */
     }
 
   } else {
     if (this->plusp == true) {
-      chrpos = this->genomicstart - this->chroffset + 1U;
+      chrpos_low = this->genomicstart - this->chroffset + 1U;
       if (this->querystart > hardclip_low) {
-	chrpos += this->querystart; /* not querystart_orig */
+	chrpos_low += this->querystart; /* not querystart_orig */
       } else {
-	chrpos += hardclip_low;
+	chrpos_low += hardclip_low;
       }
 
+#if 0
+      *chrpos_high = this->genomicend - this->chroffset + 1U;
+      if (this->querylength - this->queryend > hardclip_high) {
+	*chrpos_high -= this->querylength - this->queryend;
+      } else {
+	*chrpos_high -= hardclip_high;
+      }
+#endif
+	
     } else {
-      chrpos = this->genomicend - this->chroffset + 1U;
+      chrpos_low = this->genomicend - this->chroffset + 1U;
       if (this->querylength - this->queryend > hardclip_low) {
-	chrpos += this->querylength - this->queryend; /* not queryend_orig */
+	chrpos_low += this->querylength - this->queryend; /* not queryend_orig */
       } else {
-	chrpos += hardclip_low;
+	chrpos_low += hardclip_low;
       }
+
+#if 0
+      *chrpos_high = this->genomicstart - this->chroffset + 1U;
+      if (this->querystart > hardclip_high) {
+	*chrpos_high -= this->querystart;
+      } else {
+	*chrpos_high -= hardclip_high;
+      }
+#endif
     }
   }
     
-  return chrpos;
+  return chrpos_low;
 }
 
 
@@ -6244,8 +6319,134 @@ Substring_count_mismatches_region (T this, int trim_left, int trim_right,
  ************************************************************************/
 
 List_T
-Substring_convert_to_pairs (List_T pairs, T substring, int querylength, Shortread_T queryseq,
-			    int hardclip_low, int hardclip_high, int queryseq_offset) {
+Substring_convert_to_pairs (List_T pairs, T substring, char *queryuc_ptr,
+			    Chrpos_T chrlength, Pairpool_T pairpool) {
+  int querystart, queryend, querypos, i;
+  Chrpos_T chrpos;
+  char genome;
+
+  if (substring == NULL) {
+    return pairs;
+  }
+
+  debug6(printf("*** Entered Substring_convert_to_pairs with querylength %d\n",querylength));
+
+  if (substring->plusp == true) {
+    querystart = substring->querystart;
+    queryend = substring->queryend;
+
+    /* Pairs are all zero-based, so do not add 1 */
+#if 0
+    chrpos = substring->genomicstart_adj + querystart - substring->chroffset /*+ 1U*/;
+#else
+    chrpos = substring->genomicstart + querystart - substring->chroffset /*+ 1U*/;
+#endif
+
+    debug6(printf("plus conversion\n"));
+    debug6(printf("querystart %d, queryend %d, plusp %d\n",querystart,queryend,substring->plusp));
+    debug6(printf("alignstart %u, alignend %u\n",substring->alignstart_trim - substring->chroffset,
+		  substring->alignend_trim - substring->chroffset));
+    debug6(printf("chrpos %u\n",chrpos));
+
+    if (substring->genomic_bothdiff == NULL) {
+      /* Exact match */
+      for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) {
+	pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+			      queryuc_ptr[i],/*comp*/MATCH_COMP,queryuc_ptr[i],/*g_alt*/queryuc_ptr[i],/*dynprogindex*/0);
+      }
+    } else if (show_refdiff_p == true) {
+      for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) {
+	if (isupper(genome = substring->genomic_refdiff[i])) {
+	  /* assert(queryuc_ptr[i] == genome || queryuc_ptr[i] == 'N'); -- Doesn't hold for SNPs */
+	  pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+				queryuc_ptr[i],/*comp*/MATCH_COMP,genome,/*g_alt*/genome,/*dynprogindex*/0);
+	} else {
+	  assert(queryuc_ptr[i] != toupper(genome));
+	  pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+				queryuc_ptr[i],/*comp*/MISMATCH_COMP,toupper(genome),/*g_alt*/toupper(genome),
+				/*dynprogindex*/0);
+	}
+      }
+    } else {
+      /* printf("querystart %d, queryend %d\n",querystart,queryend); */
+      /* printf("seq1   %s\n",queryuc_ptr); */
+      /* printf("genome %s\n",substring->genomic_bothdiff); */
+      for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) {
+	if (isupper(genome = substring->genomic_bothdiff[i])) {
+	  /* assert(queryuc_ptr[i] == genome || queryuc_ptr[i] == 'N'); -- Doesn't hold for SNPs */
+	  pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+				queryuc_ptr[i],/*comp*/MATCH_COMP,genome,/*g_alt*/genome,/*dynprogindex*/0);
+	} else {
+	  assert(queryuc_ptr[i] != toupper(genome));
+	  pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+				queryuc_ptr[i],/*comp*/MISMATCH_COMP,toupper(genome),/*g_alt*/toupper(genome),
+				/*dynprogindex*/0);
+	}
+      }
+    }
+
+  } else {
+    querystart = substring->querystart;
+    queryend = substring->queryend;
+
+    /* For minus, to get 0-based coordinates, subtract 1 */
+#if 0
+    chrpos = substring->genomicstart_adj - querystart - substring->chroffset - 1U;
+#else
+    chrpos = substring->genomicstart - querystart - substring->chroffset - 1U;
+    chrpos = chrlength - chrpos;
+#endif
+
+    debug6(printf("minus conversion\n"));
+    debug6(printf("querystart %d, queryend %d, plusp %d\n",querystart,queryend,substring->plusp));
+    debug6(printf("alignstart %u, alignend %u\n",substring->alignstart_trim - substring->chroffset,
+		  substring->alignend_trim - substring->chroffset));
+    debug6(printf("chrpos %u\n",chrpos));
+
+    if (substring->genomic_bothdiff == NULL) {
+      /* Exact match */
+      for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) {
+	pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+			      queryuc_ptr[i],/*comp*/MATCH_COMP,queryuc_ptr[i],/*g_alt*/queryuc_ptr[i],/*dynprogindex*/0);
+      }
+    } else if (show_refdiff_p == true) {
+      for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) {
+	if (isupper(genome = substring->genomic_refdiff[i])) {
+	  assert(queryuc_ptr[i] == genome || queryuc_ptr[i] == 'N');
+	  pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+				queryuc_ptr[i],/*comp*/MATCH_COMP,genome,/*g_alt*/genome,/*dynprogindex*/0);
+	} else {
+	  assert(queryuc_ptr[i] != toupper(genome));
+	  pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+				queryuc_ptr[i],/*comp*/MISMATCH_COMP,toupper(genome),/*g_alt*/toupper(genome),
+				/*dynprogindex*/0);
+	}
+      }
+    } else {
+      for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) {
+	if (isupper(genome = substring->genomic_bothdiff[i])) {
+	  /* assert(queryuc_ptr[i] == genome || queryuc_ptr[i] == 'N'); */
+	  pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+				queryuc_ptr[i],/*comp*/MATCH_COMP,genome,/*g_alt*/genome,/*dynprogindex*/0);
+	} else {
+	  /* assert(queryuc_ptr[i] != toupper(genome)); */
+	  pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++,
+				queryuc_ptr[i],/*comp*/MISMATCH_COMP,toupper(genome),/*g_alt*/toupper(genome),
+				/*dynprogindex*/0);
+	}
+      }
+    }
+  }
+
+  debug6(Pair_dump_list(pairs,true));
+  return pairs;
+}
+
+
+
+List_T
+Substring_convert_to_pairs_out (List_T pairs, T substring, int querylength, Shortread_T queryseq,
+				int hardclip_low, int hardclip_high, int queryseq_offset) {
   int querystart, queryend, querypos, i;
   Chrpos_T chrpos;
   char *seq1;
@@ -6383,9 +6584,159 @@ Substring_convert_to_pairs (List_T pairs, T substring, int querylength, Shortrea
 
 
 List_T
-Substring_add_insertion (List_T pairs, T substringA, T substringB, int querylength,
-			 int insertionlength, Shortread_T queryseq,
-			 int hardclip_low, int hardclip_high, int queryseq_offset) {
+Substring_add_insertion (List_T pairs, T substringA, T substringB,
+			 int insertionlength, char *queryuc_ptr,
+			 Pairpool_T pairpool) {
+  int querystartA, queryendA, querystartB, queryendB, querypos, i;
+  Chrpos_T chrendA;
+
+
+  if (substringA->plusp == true) {
+    querystartA = substringA->querystart;
+    queryendA = substringA->queryend;
+    querystartB = substringB->querystart;
+    queryendB = substringB->queryend;
+
+    /* Pairs are all zero-based, so do not add 1 */
+#if 0
+    chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
+#else
+    chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/;
+#endif
+
+  } else {
+    querystartA = substringA->querystart;
+    queryendA = substringA->queryend;
+    querystartB = substringB->querystart;
+    queryendB = substringB->queryend;
+
+    /* Pairs are all zero-based, so subtract 1 */
+#if 0
+    chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
+#else
+    chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U;
+#endif
+  }
+
+  if (querystartA <= queryendA && querystartB <= queryendB) {
+    querypos = queryendA /*+ queryseq_offset*/;
+    i = queryendA;
+    while (--insertionlength >= 0) {
+      pairs = Pairpool_push(pairs,pairpool,querypos++,/*genomepos*/chrendA,
+			    queryuc_ptr[i++],/*comp*/INDEL_COMP,' ',/*g_alt*/' ',/*dynprogindex*/0);
+    }
+  }
+
+  return pairs;
+}
+
+List_T
+Substring_add_deletion (List_T pairs, T substringA, T substringB,
+			char *deletion, int deletionlength,
+			Pairpool_T pairpool) {
+  int querystartA, queryendA, querystartB, queryendB, querypos, k;
+  Chrpos_T chrendA;
+
+  if (substringA->plusp == true) {
+    querystartA = substringA->querystart;
+    queryendA = substringA->queryend;
+    querystartB = substringB->querystart;
+    queryendB = substringB->queryend;
+
+    /* Pairs are all zero-based, so do not add 1 */
+#if 0
+    chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
+#else
+    chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/;
+#endif
+
+    if (querystartA < queryendA && querystartB < queryendB) {
+      querypos = queryendA /*+ queryseq_offset*/;
+      for (k = 0; k < deletionlength; k++) {
+	pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrendA++,
+			      ' ',/*comp*/INDEL_COMP,deletion[k],/*g_alt*/deletion[k],
+			      /*dynprogindex*/0);
+      }
+    }
+
+  } else {
+    querystartA = substringA->querystart;
+    queryendA = substringA->queryend;
+    querystartB = substringB->querystart;
+    queryendB = substringB->queryend;
+
+    /* Pairs are all zero-based, so subtract 1 */
+#if 0
+    chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
+#else
+    chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U;
+#endif
+
+    if (querystartA <= queryendA && querystartB <= queryendB) {
+      querypos = queryendA /*+ queryseq_offset*/;
+      for (k = 0; k < deletionlength; k++) {
+	pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrendA++,
+			      ' ',/*comp*/INDEL_COMP,deletion[k],/*g_alt*/deletion[k],
+			      /*dynprogindex*/0);
+      }
+    }
+  }
+
+  return pairs;
+}
+
+List_T
+Substring_add_intron (List_T pairs, T substringA, T substringB, Pairpool_T pairpool) {
+  int querystartA, queryendA, querystartB, queryendB, querypos;
+  Chrpos_T chrendA;
+
+  if (substringA->plusp == true) {
+    querystartA = substringA->querystart;
+    queryendA = substringA->queryend;
+    querystartB = substringB->querystart;
+    queryendB = substringB->queryend;
+
+    /* Pairs are all zero-based, so do not add 1 */
+#if 0
+    chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/;
+#else
+    chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/;
+#endif
+
+  } else {
+    querystartA = substringA->querystart;
+    queryendA = substringA->queryend;
+    querystartB = substringB->querystart;
+    queryendB = substringB->queryend;
+
+
+    /* Pairs are all zero-based, so subtract 1 */
+#if 0
+    chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U;
+#else
+    chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U;
+#endif
+  }
+
+  if (querystartA <= queryendA && querystartB <= queryendB) {
+    /* Add gapholder */
+    /* All we really need for Pair_print_sam is to set gapp to be true */
+    querypos = queryendA /*+ queryseq_offset*/;
+    pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrendA,
+			  ' ',/*comp*/FWD_CANONICAL_INTRON_COMP,' ',/*g_alt*/' ',
+			  /*dynprogindex*/0);
+  }
+
+  return pairs;
+}
+
+
+
+
+List_T
+Substring_add_insertion_out (List_T pairs, T substringA, T substringB, int querylength,
+			     int insertionlength, Shortread_T queryseq,
+			     int hardclip_low, int hardclip_high, int queryseq_offset) {
   int querystartA, queryendA, querystartB, queryendB, querypos, i;
   Chrpos_T chrendA;
   char *seq1;
@@ -6471,9 +6822,9 @@ Substring_add_insertion (List_T pairs, T substringA, T substringB, int queryleng
 
 
 List_T
-Substring_add_deletion (List_T pairs, T substringA, T substringB, int querylength,
-			char *deletion, int deletionlength,
-			int hardclip_low, int hardclip_high, int queryseq_offset) {
+Substring_add_deletion_out (List_T pairs, T substringA, T substringB, int querylength,
+			    char *deletion, int deletionlength,
+			    int hardclip_low, int hardclip_high, int queryseq_offset) {
   int querystartA, queryendA, querystartB, queryendB, querypos, k;
   Chrpos_T chrendA;
 
@@ -6564,8 +6915,8 @@ Substring_add_deletion (List_T pairs, T substringA, T substringB, int querylengt
 
 
 List_T
-Substring_add_intron (List_T pairs, T substringA, T substringB, int querylength,
-		      int hardclip_low, int hardclip_high, int queryseq_offset) {
+Substring_add_intron_out (List_T pairs, T substringA, T substringB, int querylength,
+			  int hardclip_low, int hardclip_high, int queryseq_offset) {
   int querystartA, queryendA, querystartB, queryendB, querypos;
   Chrpos_T chrendA;
 
diff --git a/src/substring.h b/src/substring.h
index ce4f283..2c1da8c 100644
--- a/src/substring.h
+++ b/src/substring.h
@@ -1,7 +1,13 @@
-/* $Id: substring.h 199475 2016-10-23 23:21:59Z twu $ */
+/* $Id: substring.h 207145 2017-06-10 00:20:06Z twu $ */
 #ifndef SUBSTRING_INCLUDED
 #define SUBSTRING_INCLUDED
 
+typedef enum {GMAP_NOT_APPLICABLE, GMAP_VIA_SUBSTRINGS, GMAP_VIA_SEGMENTS, GMAP_VIA_REGION} GMAP_source_T;
+typedef enum {END, INS, DEL, FRAG, DON, ACC, AMB_DON, AMB_ACC, TERM} Endtype_T;
+typedef enum {NO_TRIM, PRE_TRIMMED, COMPUTE_TRIM} Trimaction_T;
+
+typedef struct Substring_T *Substring_T;
+
 #include <stdio.h>
 #include "mode.h"
 #include "genomicpos.h"
@@ -14,6 +20,7 @@
 #include "iit-read.h"
 #include "bool.h"
 #include "pairdef.h"
+#include "pairpool.h"
 #include "filestring.h"
 #include "junction.h"
 #include "intlist.h"
@@ -26,10 +33,6 @@
 #endif
 
 
-typedef enum {GMAP_NOT_APPLICABLE, GMAP_VIA_SUBSTRINGS, GMAP_VIA_SEGMENTS, GMAP_VIA_REGION} GMAP_source_T;
-typedef enum {END, INS, DEL, FRAG, DON, ACC, AMB_DON, AMB_ACC, TERM} Endtype_T;
-typedef enum {NO_TRIM, PRE_TRIMMED, COMPUTE_TRIM} Trimaction_T;
-
 extern char *
 Endtype_string (Endtype_T endtype);
 
@@ -46,7 +49,6 @@ Substring_setup (bool print_nsnpdiffs_p_in, bool print_snplabels_p_in,
 		 bool output_sam_p_in, Mode_T mode_in, Univcoord_T genomelength_in);
 
 #define T Substring_T
-typedef struct T *T;
 
 extern void
 Substring_alias_circular (T this);
@@ -127,9 +129,9 @@ Substring_splicecoord_A (T this);
 extern Univcoord_T
 Substring_splicecoord_D (T this);
 extern Chrpos_T
-Substring_chr_splicecoord_D (T this);
+Substring_chr_splicecoord_D (T this, char donor_strand);
 extern Chrpos_T
-Substring_chr_splicecoord_A (T this);
+Substring_chr_splicecoord_A (T this, char acceptor_strand);
 extern int
 Substring_splicesitesD_knowni (T this);
 extern int
@@ -323,7 +325,7 @@ Substring_sort_siteN_halves (List_T hitlist, bool ascendingp);
 
 
 extern Chrpos_T
-Substring_compute_chrpos (T this, int hardclip_low, bool hide_soft_clips_p);
+Substring_compute_chrpos (T this, int hardclip_low, int hardclip_high, bool hide_soft_clips_p);
 
 extern double
 Substring_evalue (T substring);
@@ -359,21 +361,36 @@ Substring_runlength_p (T this, IIT_T runlength_iit, int *runlength_divint_crosst
 extern int
 Substring_count_mismatches_region (T this, int trim_left, int trim_right,
 				   Compress_T query_compress_fwd, Compress_T query_compress_rev);
-
 extern List_T
-Substring_convert_to_pairs (List_T pairs, T substring, int querylength, Shortread_T queryseq,
-			    int hardclip_low, int hardclip_high, int queryseq_offset);
+Substring_convert_to_pairs (List_T pairs, T substring, char *queryuc_ptr,
+			    Chrpos_T chrlength, Pairpool_T pairpool);
 extern List_T
-Substring_add_insertion (List_T pairs, T substringA, T substringB, int querylength,
-			 int insertionlength, Shortread_T queryseq,
-			 int hardclip_low, int hardclip_high, int queryseq_offset);
+Substring_convert_to_pairs_out (List_T pairs, T substring, int querylength, Shortread_T queryseq,
+				int hardclip_low, int hardclip_high, int queryseq_offset);
+
 extern List_T
-Substring_add_deletion (List_T pairs, T substringA, T substringB, int querylength,
+Substring_add_insertion (List_T pairs, T substringA, T substringB,
+			 int insertionlength, char *queryuc_ptr,
+			 Pairpool_T pairpool);
+extern List_T
+Substring_add_deletion (List_T pairs, T substringA, T substringB,
 			char *deletion, int deletionlength,
-			int hardclip_low, int hardclip_high, int queryseq_offset);
+			Pairpool_T pairpool);
+extern List_T
+Substring_add_intron (List_T pairs, T substringA, T substringB,
+		      Pairpool_T pairpool);
+
+extern List_T
+Substring_add_insertion_out (List_T pairs, T substringA, T substringB, int querylength,
+			     int insertionlength, Shortread_T queryseq,
+			     int hardclip_low, int hardclip_high, int queryseq_offset);
+extern List_T
+Substring_add_deletion_out (List_T pairs, T substringA, T substringB, int querylength,
+			    char *deletion, int deletionlength,
+			    int hardclip_low, int hardclip_high, int queryseq_offset);
 extern List_T
-Substring_add_intron (List_T pairs, T substringA, T substringB, int querylength,
-		      int hardclip_low, int hardclip_high, int queryseq_offset);
+Substring_add_intron_out (List_T pairs, T substringA, T substringB, int querylength,
+			  int hardclip_low, int hardclip_high, int queryseq_offset);
 
 #undef T
 #endif
diff --git a/src/table.c b/src/table.c
index 45ac503..9f9613d 100644
--- a/src/table.c
+++ b/src/table.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: table.c 182426 2016-01-15 22:06:05Z twu $";
+static char rcsid[] = "$Id: table.c 207856 2017-06-29 20:35:00Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -195,6 +195,32 @@ Table_remove (T table, const void *key) {
   return NULL;
 }
 
+void *
+Table_remove_old_key (T table, const void *key) {
+  void *old_key;
+  int i;
+  struct binding **pp;
+
+  assert(table);
+  /* assert(key); -- Doesn't hold for atomic 0 */
+  table->timestamp++;
+  i = (*table->hash)(key)%table->size;
+  for (pp = &table->buckets[i]; *pp; pp = &(*pp)->link) {
+    if ((*table->cmp)(key, (*pp)->key) == 0) {
+      struct binding *p = *pp;
+      /* void *value = p->value; */
+      old_key = (void *) p->key;
+      *pp = p->link;
+      FREE(p);
+      table->length--;
+      /* return value; */
+      return old_key;
+    }
+  }
+  return NULL;
+}
+
+
 void **
 Table_keys (T table, void *end) {
   void **keyarray;
diff --git a/src/table.h b/src/table.h
index 653f7b0..8023c32 100644
--- a/src/table.h
+++ b/src/table.h
@@ -1,4 +1,4 @@
-/* $Id: table.h 115432 2013-11-18 18:21:03Z twu $ */
+/* $Id: table.h 207856 2017-06-29 20:35:00Z twu $ */
 #ifndef TABLE_INCLUDED
 #define TABLE_INCLUDED
 
@@ -26,6 +26,8 @@ extern void *
 Table_get (T table, const void *key);
 extern void *
 Table_remove (T table, const void *key);
+extern void *
+Table_remove_old_key (T table, const void *key);
 extern void   
 Table_map (T table,
 	   void (*apply)(const void *key, void **value, void *cl),
diff --git a/src/uniqscan.c b/src/uniqscan.c
index e6c1d92..121d20b 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 200234 2016-11-08 00:56:52Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 207328 2017-06-14 19:47:20Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1292,13 +1292,15 @@ main (int argc, char *argv[]) {
 		   /*snpp*/snps_iit ? true : false,amb_closest_p,/*amb_clip_p*/true,min_shortend);
   spansize = Spanningelt_setup(index1part,index1interval);
   Indel_setup(min_indel_end_matches,indel_penalty_middle);
-  Stage1hr_setup(/*use_sarray_p*/false,/*use_only_sarray_p*/false,index1part,index1interval,
+  Stage1hr_setup(/*use_sarray_p*/false,/*use_only_sarray_p*/false,/*require_completeset_p*/false,
+		 index1part,index1interval,
 		 spansize,/*max_anchors*/10,chromosome_iit,nchromosomes,
 		 genome,genomealt,mode,/*maxpaths_search*/10,
 		 splicesites,splicetypes,splicedists,nsplicesites,
 		 novelsplicingp,knownsplicingp,/*find_dna_chimeras_p*/false,distances_observed_p,
 		 subopt_levels,min_indel_end_matches,max_middle_insertions,max_middle_deletions,
-		 shortsplicedist,shortsplicedist_known,shortsplicedist_novelend,min_intronlength,
+		 shortsplicedist,shortsplicedist_known,shortsplicedist_novelend,
+		 min_intronlength,expected_pairlength,pairlength_deviation,
 		 min_distantsplicing_end_matches,min_distantsplicing_identity,
 		 nullgap,maxpeelback,maxpeelback_distalmedial,
 		 extramaterial_end,extramaterial_paired,gmap_mode,
@@ -1338,11 +1340,13 @@ main (int argc, char *argv[]) {
 		 chromosome_iit,nchromosomes,circular_typeint,genes_iit,genes_divint_crosstable,
 		 /*tally_iit*/NULL,/*tally_divint_crosstable*/NULL,
 		 /*runlength_iit*/NULL,/*runlength_divint_crosstable*/NULL,
-		 distances_observed_p,pairmax_linear,pairmax_circular,expected_pairlength,pairlength_deviation,
+		 distances_observed_p,pairmax_linear,pairmax_circular,
+		 expected_pairlength,pairlength_deviation,maxpeelback,
 		 localsplicing_penalty,indel_penalty_middle,antistranded_penalty,
 		 favor_multiexon_p,gmap_min_nconsecutive,/*end_detail*/1,subopt_levels,
 		 max_middle_insertions,max_middle_deletions,
 		 novelsplicingp,shortsplicedist,/*merge_samechr_p*/false,circularp,altlocp,alias_starts,alias_ends,
+		 /*omit_concordant_uniq_p*/false,/*omit_concordant_mult_p*/false,
 		 /*failedinput_root*/NULL,/*print_m8_p*/false,/*want_random_p*/true);
 
   uniqueness_scan(from_right_p);
@@ -1415,6 +1419,8 @@ main (int argc, char *argv[]) {
     Univ_IIT_free(&chromosome_iit);
   }
 
+  Access_controlled_cleanup();
+
   return 0;
 }
 
diff --git a/tests/Makefile.in b/tests/Makefile.in
index 35e66ea..d5f4096 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -383,6 +383,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
+LIBGMAP_SO_VERSION = @LIBGMAP_SO_VERSION@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
@@ -395,9 +396,6 @@ MAKEINFO = @MAKEINFO@
 MANIFEST_TOOL = @MANIFEST_TOOL@
 MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@
 MKDIR_P = @MKDIR_P@
-MPICC = @MPICC@
-MPILIBS = @MPILIBS@
-MPI_CFLAGS = @MPI_CFLAGS@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -422,6 +420,7 @@ SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIMD_AVX2_CFLAGS = @SIMD_AVX2_CFLAGS@
+SIMD_AVX512_CFLAGS = @SIMD_AVX512_CFLAGS@
 SIMD_SSE2_CFLAGS = @SIMD_SSE2_CFLAGS@
 SIMD_SSE4_1_CFLAGS = @SIMD_SSE4_1_CFLAGS@
 SIMD_SSE4_2_CFLAGS = @SIMD_SSE4_2_CFLAGS@
diff --git a/util/Makefile.am b/util/Makefile.am
index acf6477..8d0d4e0 100644
--- a/util/Makefile.am
+++ b/util/Makefile.am
@@ -4,14 +4,14 @@ if FULLDIST
 bin_SCRIPTS = gmap_compress gmap_uncompress gmap_process gmap_build gmap_reassemble md_coords fa_coords \
               psl_splicesites psl_introns psl_genes \
               ensembl_genes \
-              gtf_splicesites gtf_introns gtf_genes \
+              gtf_splicesites gtf_introns gtf_genes gtf_transcript_splicesites \
               gff3_splicesites gff3_introns gff3_genes \
               dbsnp_iit gvf_iit vcf_iit sam_merge sam_restore
 else
 bin_SCRIPTS = gmap_compress gmap_uncompress gmap_process gmap_build gmap_reassemble md_coords fa_coords \
               psl_splicesites psl_introns psl_genes \
               ensembl_genes \
-              gtf_splicesites gtf_introns gtf_genes \
+              gtf_splicesites gtf_introns gtf_genes gtf_transcript_splicesites \
               gff3_splicesites gff3_introns gff3_genes \
               dbsnp_iit gvf_iit vcf_iit
 endif
@@ -77,6 +77,10 @@ gtf_genes: gtf_genes.pl
 	cp gtf_genes.pl gtf_genes
 	chmod +x gtf_genes
 
+gtf_transcript_splicesites: gtf_transcript_splicesites.pl
+	cp gtf_transcript_splicesites.pl gtf_transcript_splicesites
+	chmod +x gtf_transcript_splicesites
+
 gff3_splicesites: gff3_splicesites.pl
 	cp gff3_splicesites.pl gff3_splicesites
 	chmod +x gff3_splicesites
@@ -115,14 +119,14 @@ if FULLDIST
 CLEANFILES = gmap_compress gmap_uncompress gmap_process gmap_build gmap_reassemble md_coords fa_coords \
              psl_splicesites psl_introns psl_genes \
              ensembl_genes \
-             gtf_splicesites gtf_introns gtf_genes \
+             gtf_splicesites gtf_introns gtf_genes gtf_transcript_splicesites \
              gff3_splicesites gff3_introns gff3_genes \
              dbsnp_iit gvf_iit vcf_iit sam_merge sam_restore
 else
 CLEANFILES = gmap_compress gmap_uncompress gmap_process gmap_build gmap_reassemble md_coords fa_coords \
              psl_splicesites psl_introns psl_genes \
              ensembl_genes \
-             gtf_splicesites gtf_introns gtf_genes \
+             gtf_splicesites gtf_introns gtf_genes gtf_transcript_splicesites \
              gff3_splicesites gff3_introns gff3_genes \
              dbsnp_iit gvf_iit vcf_iit
 endif
diff --git a/util/Makefile.in b/util/Makefile.in
index fbee2bd..518ea47 100644
--- a/util/Makefile.in
+++ b/util/Makefile.in
@@ -122,9 +122,10 @@ CONFIG_HEADER = $(top_builddir)/src/config.h
 CONFIG_CLEAN_FILES = gmap_compress.pl gmap_uncompress.pl \
 	gmap_process.pl gmap_build.pl gmap_reassemble.pl md_coords.pl \
 	fa_coords.pl psl_splicesites.pl psl_introns.pl psl_genes.pl \
-	ensembl_genes.pl gtf_splicesites.pl gtf_introns.pl \
-	gtf_genes.pl gff3_splicesites.pl gff3_introns.pl gff3_genes.pl \
-	dbsnp_iit.pl gvf_iit.pl vcf_iit.pl
+	ensembl_genes.pl gtf_splicesites.pl \
+	gtf_transcript_splicesites.pl gtf_introns.pl gtf_genes.pl \
+	gff3_splicesites.pl gff3_introns.pl gff3_genes.pl dbsnp_iit.pl \
+	gvf_iit.pl vcf_iit.pl
 CONFIG_CLEAN_VPATH_FILES =
 am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
 am__vpath_adj = case $$p in \
@@ -183,6 +184,7 @@ am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/dbsnp_iit.pl.in \
 	$(srcdir)/gmap_reassemble.pl.in \
 	$(srcdir)/gmap_uncompress.pl.in $(srcdir)/gtf_genes.pl.in \
 	$(srcdir)/gtf_introns.pl.in $(srcdir)/gtf_splicesites.pl.in \
+	$(srcdir)/gtf_transcript_splicesites.pl.in \
 	$(srcdir)/gvf_iit.pl.in $(srcdir)/md_coords.pl.in \
 	$(srcdir)/psl_genes.pl.in $(srcdir)/psl_introns.pl.in \
 	$(srcdir)/psl_splicesites.pl.in $(srcdir)/vcf_iit.pl.in
@@ -224,6 +226,7 @@ INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
+LIBGMAP_SO_VERSION = @LIBGMAP_SO_VERSION@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
@@ -236,9 +239,6 @@ MAKEINFO = @MAKEINFO@
 MANIFEST_TOOL = @MANIFEST_TOOL@
 MAX_STACK_READLENGTH = @MAX_STACK_READLENGTH@
 MKDIR_P = @MKDIR_P@
-MPICC = @MPICC@
-MPILIBS = @MPILIBS@
-MPI_CFLAGS = @MPI_CFLAGS@
 NM = @NM@
 NMEDIT = @NMEDIT@
 OBJDUMP = @OBJDUMP@
@@ -263,6 +263,7 @@ SED = @SED@
 SET_MAKE = @SET_MAKE@
 SHELL = @SHELL@
 SIMD_AVX2_CFLAGS = @SIMD_AVX2_CFLAGS@
+SIMD_AVX512_CFLAGS = @SIMD_AVX512_CFLAGS@
 SIMD_SSE2_CFLAGS = @SIMD_SSE2_CFLAGS@
 SIMD_SSE4_1_CFLAGS = @SIMD_SSE4_1_CFLAGS@
 SIMD_SSE4_2_CFLAGS = @SIMD_SSE4_2_CFLAGS@
@@ -330,28 +331,28 @@ top_srcdir = @top_srcdir@
 @FULLDIST_FALSE at bin_SCRIPTS = gmap_compress gmap_uncompress gmap_process gmap_build gmap_reassemble md_coords fa_coords \
 @FULLDIST_FALSE@              psl_splicesites psl_introns psl_genes \
 @FULLDIST_FALSE@              ensembl_genes \
- at FULLDIST_FALSE@              gtf_splicesites gtf_introns gtf_genes \
+ at FULLDIST_FALSE@              gtf_splicesites gtf_introns gtf_genes gtf_transcript_splicesites \
 @FULLDIST_FALSE@              gff3_splicesites gff3_introns gff3_genes \
 @FULLDIST_FALSE@              dbsnp_iit gvf_iit vcf_iit
 
 @FULLDIST_TRUE at bin_SCRIPTS = gmap_compress gmap_uncompress gmap_process gmap_build gmap_reassemble md_coords fa_coords \
 @FULLDIST_TRUE@              psl_splicesites psl_introns psl_genes \
 @FULLDIST_TRUE@              ensembl_genes \
- at FULLDIST_TRUE@              gtf_splicesites gtf_introns gtf_genes \
+ at FULLDIST_TRUE@              gtf_splicesites gtf_introns gtf_genes gtf_transcript_splicesites \
 @FULLDIST_TRUE@              gff3_splicesites gff3_introns gff3_genes \
 @FULLDIST_TRUE@              dbsnp_iit gvf_iit vcf_iit sam_merge sam_restore
 
 @FULLDIST_FALSE at CLEANFILES = gmap_compress gmap_uncompress gmap_process gmap_build gmap_reassemble md_coords fa_coords \
 @FULLDIST_FALSE@             psl_splicesites psl_introns psl_genes \
 @FULLDIST_FALSE@             ensembl_genes \
- at FULLDIST_FALSE@             gtf_splicesites gtf_introns gtf_genes \
+ at FULLDIST_FALSE@             gtf_splicesites gtf_introns gtf_genes gtf_transcript_splicesites \
 @FULLDIST_FALSE@             gff3_splicesites gff3_introns gff3_genes \
 @FULLDIST_FALSE@             dbsnp_iit gvf_iit vcf_iit
 
 @FULLDIST_TRUE at CLEANFILES = gmap_compress gmap_uncompress gmap_process gmap_build gmap_reassemble md_coords fa_coords \
 @FULLDIST_TRUE@             psl_splicesites psl_introns psl_genes \
 @FULLDIST_TRUE@             ensembl_genes \
- at FULLDIST_TRUE@             gtf_splicesites gtf_introns gtf_genes \
+ at FULLDIST_TRUE@             gtf_splicesites gtf_introns gtf_genes gtf_transcript_splicesites \
 @FULLDIST_TRUE@             gff3_splicesites gff3_introns gff3_genes \
 @FULLDIST_TRUE@             dbsnp_iit gvf_iit vcf_iit sam_merge sam_restore
 
@@ -411,6 +412,8 @@ ensembl_genes.pl: $(top_builddir)/config.status $(srcdir)/ensembl_genes.pl.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 gtf_splicesites.pl: $(top_builddir)/config.status $(srcdir)/gtf_splicesites.pl.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+gtf_transcript_splicesites.pl: $(top_builddir)/config.status $(srcdir)/gtf_transcript_splicesites.pl.in
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 gtf_introns.pl: $(top_builddir)/config.status $(srcdir)/gtf_introns.pl.in
 	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 gtf_genes.pl: $(top_builddir)/config.status $(srcdir)/gtf_genes.pl.in
@@ -687,6 +690,10 @@ gtf_genes: gtf_genes.pl
 	cp gtf_genes.pl gtf_genes
 	chmod +x gtf_genes
 
+gtf_transcript_splicesites: gtf_transcript_splicesites.pl
+	cp gtf_transcript_splicesites.pl gtf_transcript_splicesites
+	chmod +x gtf_transcript_splicesites
+
 gff3_splicesites: gff3_splicesites.pl
 	cp gff3_splicesites.pl gff3_splicesites
 	chmod +x gff3_splicesites
diff --git a/util/gmap_build.pl.in b/util/gmap_build.pl.in
index 8867fb5..0ded540 100644
--- a/util/gmap_build.pl.in
+++ b/util/gmap_build.pl.in
@@ -1,5 +1,5 @@
 #! @PERL@
-# $Id: gmap_build.pl.in 184524 2016-02-18 17:51:39Z twu $
+# $Id: gmap_build.pl.in 203541 2017-02-15 00:49:44Z twu $
 
 use warnings;	
 
@@ -45,7 +45,9 @@ GetOptions(
     'c|circular=s' => \$circular,    # Circular chromosomes
     '2|altscaffold=s' => \$altscaffold,  # File with altscaffold info
 
-    'e|nmessages=s' => \$nmessages  # Max number of warnings or messages to print
+    'e|nmessages=s' => \$nmessages,  # Max number of warnings or messages to print
+
+    'p|part=s' => \$part	# Build in parts
     );
 
 
@@ -149,6 +151,9 @@ if (defined($contigs_mapped_p)) {
 #}
 #my $genome_fasta = join(" ", at quoted);
 
+$dbdir = create_db($destdir,$dbname);
+$genomecompfile = "$dbdir/$dbname.genomecomp";
+
 my $coordsfile = "$destdir/$dbname.coords";
 my $fasta_sources = "$destdir/$dbname.sources";
 
@@ -163,42 +168,49 @@ close($FP);
 
 check_compiler_assumptions();
 
-$dbdir = create_db($destdir,$dbname);
+if (!defined($part) || $part == 1) {
 
-create_genome_version($dbdir,$dbname);
+    create_genome_version($dbdir,$dbname);
 
-create_coords($mdfile,$fasta_pipe,$gunzip_flag,$circular_flag,$altscaffold_flag,$contigs_mapped_flag,$chrnamefile,
-	      $bindir,$coordsfile,$fasta_sources);
-if (!(-s "$coordsfile")) {
-    die "ERROR: $coordsfile not found";
-} else {
-    $gmap_process_pipe = make_gmap_process_pipe($fasta_pipe,$gunzip_flag,$bindir,$coordsfile,$fasta_sources);
-}
+    create_coords($mdfile,$fasta_pipe,$gunzip_flag,$circular_flag,$altscaffold_flag,$contigs_mapped_flag,$chrnamefile,
+		  $bindir,$coordsfile,$fasta_sources);
+    if (!(-s "$coordsfile")) {
+	die "ERROR: $coordsfile not found";
+    } else {
+	$gmap_process_pipe = make_gmap_process_pipe($fasta_pipe,$gunzip_flag,$bindir,$coordsfile,$fasta_sources);
+    }
 
-make_contig($nmessages_flag,$chr_order_flag,
-	    $bindir,$dbdir,$dbname,$gmap_process_pipe);
+    make_contig($nmessages_flag,$chr_order_flag,
+		$bindir,$dbdir,$dbname,$gmap_process_pipe);
 
-$genomecompfile = compress_genome($nmessages_flag,$bindir,$dbdir,$dbname,$gmap_process_pipe);
+    compress_genome($nmessages_flag,$bindir,$dbdir,$dbname,$gmap_process_pipe);
 
-unshuffle_genome($bindir,$dbdir,$dbname,$genomecompfile);
+    unshuffle_genome($bindir,$dbdir,$dbname,$genomecompfile);
+}
 
-$index_cmd = "\"$bindir/gmapindex\" -k $kmersize -q $sampling $nmessages_flag -d $dbname -F \"$dbdir\" -D \"$dbdir\"";
+if (!defined($part) || $part == 2) {
+    $index_cmd = "\"$bindir/gmapindex\" -k $kmersize -q $sampling $nmessages_flag -d $dbname -F \"$dbdir\" -D \"$dbdir\"";
 
-if (count_index_offsets($index_cmd,$genomecompfile) == 1) {
-    $index_cmd .= " -H";
-}
+    if (count_index_offsets($index_cmd,$genomecompfile) == 1) {
+	$index_cmd .= " -H";
+    }
 
-create_index_offsets($index_cmd,$compression_flag,$genomecompfile);
+    create_index_offsets($index_cmd,$compression_flag,$genomecompfile);
 
-create_index_positions($index_cmd,$genomecompfile);
+    create_index_positions($index_cmd,$genomecompfile);
+}
 
-if ($sarrayp == 1) {
-    make_enhanced_suffix_array($bindir,$dbdir,$dbname);
+if (!defined($part) || $part == 3) {
+    if ($sarrayp == 1) {
+	make_enhanced_suffix_array($bindir,$dbdir,$dbname);
+    }
 }
 
+if (!defined($part) || $part == 4) {
 # install_db($sarrayp);
-system("rm -f \"$fasta_sources\"");
-system("rm -f \"$coordsfile\"");
+    system("rm -f \"$fasta_sources\"");
+    system("rm -f \"$coordsfile\"");
+}
 
 exit;
 
@@ -216,6 +228,7 @@ sub create_db {
     my ($destdir, $dbname) = @_;
 
     print STDERR "Creating files in directory $destdir/$dbname\n";
+    system("mkdir -p \"$destdir\"");
     system("mkdir -p \"$destdir/$dbname\"");
     system("mkdir -p \"$destdir/$dbname/$dbname.maps\"");
     system("chmod 755 \"$destdir/$dbname/$dbname.maps\"");
@@ -289,7 +302,6 @@ sub make_contig {
 
 sub compress_genome {
     my ($nmessages_flag, $bindir, $dbdir, $dbname, $gmap_process_pipe) = @_;
-    my $genomecompfile = "$dbdir/$dbname.genomecomp";
     my ($cmd, $rc);
 
     $cmd = "$gmap_process_pipe | \"$bindir/gmapindex\" $nmessages_flag -d $dbname -F \"$dbdir\" -D \"$dbdir\" -G";
@@ -298,7 +310,7 @@ sub compress_genome {
 	die "$cmd failed with return code $rc";
     }
     sleep($sleeptime);
-    return $genomecompfile;
+    return;
 }
 
 sub unshuffle_genome {
diff --git a/util/gtf_transcript_splicesites.pl.in b/util/gtf_transcript_splicesites.pl.in
new file mode 100644
index 0000000..54695b2
--- /dev/null
+++ b/util/gtf_transcript_splicesites.pl.in
@@ -0,0 +1,490 @@
+#! @PERL@
+
+use warnings;
+
+use IO::File;
+use Getopt::Std;
+undef $opt_C;			# If provided, will keep only canonical splice sites.  Requires -d flag.
+undef $opt_R;			# If provided, will report only non-canonical splice sites to stdout.  Requires -d flag.
+undef $opt_2;			# If provided, will print dinucleotides at splice sites.  Requires -d flag.
+undef $opt_D;			# Genome directory
+undef $opt_d;			# Genome index
+undef $opt_E;			# Use exon_number field to determine exon ordering
+getopts("D:d:CR2E");
+
+
+if (defined($opt_d)) {
+    if (!defined($opt_C) && !defined($opt_R) && !defined($opt_2)) {
+	print STDERR "-d flag useful only with -C, -R, or -2 flags.  Ignoring -d flag\n";
+	undef $opt_d;
+    } else {
+	if (0) {
+	    $FP = new IO::File(">&STDOUT");
+	} elsif (defined($opt_D)) {
+	    $FP = new IO::File("| @BINDIR@/get-genome -D $opt_D -d $opt_d > get-genome.out");
+	} else {
+	    $FP = new IO::File("| @BINDIR@/get-genome -d $opt_d > get-genome.out");
+	}
+
+	@exons = ();
+	$sortp = 0;
+	$last_transcript_id = "";
+	while (defined($line = <>)) {
+	    if ($line =~ /^\#/) {
+		# Skip
+	    } else {
+		$line =~ s/\r\n/\n/;
+		push @lines,$line;
+		chop $line;
+		@fields = split /\t/,$line;
+
+		if ($fields[2] eq "exon") {
+		    @info = ();
+		    parse_info($fields[8]);
+		    $transcript_id = get_info(\@info,"transcript_id");
+		    if ($transcript_id ne $last_transcript_id) {
+			if ($last_transcript_id =~ /\S/) {
+			    query_dinucleotides(\@exons,$chr,$strand,$FP,$sortp);
+			}
+			@exons = ();
+			$sortp = 0;
+			$last_transcript_id = $transcript_id;
+			$chr = $fields[0];
+			$strand = $fields[6];
+		    }
+		    if (defined($opt_E) && defined($exon_number = get_info_optional(\@info,"exon_number"))) {
+			$exons[$exon_number-1] = "$fields[3] $fields[4]";
+		    } else {
+			$sortp = 1;
+			push @exons,"$fields[3] $fields[4]";
+		    }
+		}
+	    }
+	}
+    }
+
+    if ($last_transcript_id =~ /\S/) {
+	query_dinucleotides(\@exons,$chr,$strand,$FP,$sortp);
+    }
+
+    close($FP);
+
+    $FP = new IO::File("get-genome.out") or die "Cannot open get-genome.out";
+
+} else {
+    if (defined($opt_C)) {
+	print STDERR "-C flag requires you to specify -d flag.  Ignoring -C flag\n";
+	undef $opt_C;
+    }
+    if (defined($opt_R)) {
+	print STDERR "-R flag requires you to specify -d flag.  Ignoring -R flag\n";
+	undef $opt_R;
+    }
+    if (defined($opt_2)) {
+	print STDERR "-2 flag requires you to specify -d flag.  Ignoring -2 flag\n";
+	undef $opt_2;
+    }
+}
+
+
+
+ at exons = ();
+$sortp = 0;
+$last_transcript_id = "";
+while (defined($line = get_line())) {
+    if ($line =~ /^\#/) {
+	# Skip
+    } else {
+	chop $line;
+	@fields = split /\t/,$line;
+
+	if ($fields[2] eq "exon") {
+	    @info = ();
+	    parse_info($fields[8]);
+	    $transcript_id = get_info(\@info,"transcript_id");
+	    if ($transcript_id ne $last_transcript_id) {
+		if ($last_transcript_id =~ /\S/) {
+		    print_exons(\@exons,$gene_name,$last_transcript_id,$chr,$strand,$FP,$sortp);
+		}
+		@exons = ();
+		$sortp = 0;
+		$gene_name = get_info(\@info,"gene_id","gene_name");
+		$last_transcript_id = $transcript_id;
+		$chr = $fields[0];
+		$strand = $fields[6];
+	    }
+
+	    if (defined($opt_E) && defined($exon_number = get_info_optional(\@info,"exon_number"))) {
+		$exons[$exon_number-1] = "$fields[3] $fields[4]";
+	    } else {
+		$sortp = 1;
+		push @exons,"$fields[3] $fields[4]";
+	    }
+	}
+    }
+}
+
+if ($last_transcript_id =~ /\S/) {
+    print_exons(\@exons,$gene_name,$last_transcript_id,$chr,$strand,$FP,$sortp);
+}
+
+if (defined($opt_d)) {
+    close($FP);
+}
+
+exit;
+
+
+sub get_line {
+    my $line;
+
+    if (!defined($opt_d)) {
+	if (!defined($line = <>)) {
+	    return;
+	} else {
+	    return $line;
+	}
+    } else {
+	if ($#lines < 0) {
+	    return;
+	} else {
+	    $line = shift @lines;
+	    return $line;
+	}
+    }
+}
+
+
+sub parse_info {
+    my ($list) = @_;
+
+    if ($list !~ /\S/) {
+	return;
+    } elsif ($list =~ /(\S+) "([^"]+)";?(.*)/) {
+	push @info,"$1 $2";
+	parse_info($3);
+    } elsif ($list =~ /(\S+) (\S+);?(.*)/) {
+	push @info,"$1 $2";
+	parse_info($3);
+    } else {
+	die "Cannot parse $list";
+    }
+}
+
+
+
+sub get_info {
+    my $info = shift @_;
+    my @desired_keys = @_;
+    
+    foreach $desired_key (@desired_keys) {
+	foreach $item (@ {$info}) {
+	    ($key,$value) = $item =~ /(\S+) (.+)/;
+	    if ($key eq $desired_key) {
+		return $value;
+	    }
+	}
+    }
+
+    print STDERR "Cannot find " . join(" or ", at desired_keys) . " in " . join("; ",@ {$info}) . "\n";
+    return "NA";
+}
+
+sub get_info_optional {
+    my $info = shift @_;
+    my @desired_keys = @_;
+    
+    foreach $item (@ {$info}) {
+	($key,$value) = $item =~ /(\S+) (.+)/;
+	foreach $desired_key (@desired_keys) {
+	    if ($key eq $desired_key) {
+		return $value;
+	    }
+	}
+    }
+
+    return;
+}
+
+
+sub get_dinucleotide {
+    my ($query, $FP) = @_;
+    my $dinucl;
+    my $line;
+    my $lastline;
+
+    while (defined($line = <$FP>) && $line !~ /^\# Query: $query\s*$/) {
+	if ($line =~ /^\# End\s*$/) {
+	    print STDERR "line is $line\n";
+	    die "Could not find query $query";
+	}
+    }
+
+    while (defined($line = <$FP>) && $line !~ /^\# End\s*$/) {
+	if ($line =~ /^\# Query: /) {
+	    die "Could not find query $query";
+	}
+	$lastline = $line;
+    }
+
+    if (!defined($line)) {
+	die "File ended while looking for query $query";
+    }
+
+    ($dinucl) = $lastline =~ /(\S\S)/;
+    if (!defined($dinucl) || $dinucl !~ /\S/) {
+	die "Could not find dinucl in lastline $line for query $query";
+    }
+
+    return $dinucl;
+}
+
+
+sub ascending_cmp {
+    ($starta) = $a =~ /(\d+) \d+/;
+    ($startb) = $b =~ /(\d+) \d+/;
+    return $starta <=> $startb;
+}
+
+sub get_bounds_plus {
+    my ($exons, $sortp) = @_;
+    my @querystarts = ();
+    my @queryends = ();
+    my @starts = ();
+    my @ends = ();
+    my $querypos = 0;
+
+    if ($sortp == 1) {
+	foreach $exon (sort ascending_cmp (@ {$exons})) {
+	    ($start,$end) = $exon =~ /(\d+) (\d+)/;
+	    push @starts,$start;
+	    push @ends,$end;
+	    push @querystarts,$querypos;
+	    $querypos += $end - $start + 1;
+	    push @queryends,$querypos;
+	}
+    } else {
+	foreach $exon (@ {$exons}) {
+	    ($start,$end) = $exon =~ /(\d+) (\d+)/;
+	    push @starts,$start;
+	    push @ends,$end;
+	    push @querystarts,$querypos;
+	    $querypos += $end - $start + 1;
+	    push @queryends,$querypos;
+	}
+    }
+
+    shift @querystarts;
+    pop @queryends;
+    shift @starts;
+    pop @ends;
+
+    return (\@querystarts,\@queryends,\@starts,\@ends);
+}
+
+sub get_bounds_minus {
+    my ($exons, $sortp) = @_;
+    my @querystarts = ();
+    my @queryends = ();
+    my @starts = ();
+    my @ends = ();
+    my $querypos = 0;
+
+    if ($sortp == 1) {
+	foreach $exon (reverse sort ascending_cmp (@ {$exons})) {
+	    ($start,$end) = $exon =~ /(\d+) (\d+)/;
+	    push @starts,$start;
+	    push @ends,$end;
+	    push @querystarts,$querypos;
+	    $querypos += $end - $start + 1;
+	    push @queryends,$querypos;
+	}
+    } else {
+	foreach $exon (@ {$exons}) {
+	    ($start,$end) = $exon =~ /(\d+) (\d+)/;
+	    push @starts,$start;
+	    push @ends,$end;
+	    push @querystarts,$querypos;
+	    $querypos += $end - $start + 1;
+	    push @queryends,$querypos;
+	}
+    }
+
+    shift @querystarts;
+    pop @queryends;
+    pop @starts;
+    shift @ends;
+
+    return (\@querystarts,\@queryends,\@starts,\@ends);
+}
+
+
+sub query_dinucleotides {
+    my ($exons, $chr, $strand, $FP, $sortp) = @_;
+
+    $nexons = $#{$exons} + 1;
+    if ($strand eq "+") {
+	($querystarts,$queryends,$starts,$ends) = get_bounds_plus($exons,$sortp);
+	for ($i = 0; $i < $nexons - 1; $i++) {
+	    $query = sprintf("%s:%u..%u",$chr,$ends[$i]+1,$ends[$i]+2);
+	    print $FP $query . "\n";
+
+	    $query = sprintf("%s:%u..%u",$chr,$starts[$i]-2,$starts[$i]-1);
+	    print $FP $query . "\n";
+	}
+
+    } elsif ($strand eq "-") {
+	($querystarts,$queryends,$starts,$ends) = get_bounds_minus($exons,$sortp);
+	for ($i = 0; $i < $nexons - 1; $i++) {
+	    $query = sprintf("%s:%u..%u",$chr,$starts[$i]-1,$starts[$i]-2);
+	    print $FP $query . "\n";
+
+	    $query = sprintf("%s:%u..%u",$chr,$ends[$i]+2,$ends[$i]+1);
+	    print $FP $query . "\n";
+	}
+    }
+    
+    return;
+}
+
+
+
+sub donor_okay_p {
+    my ($donor_dinucl, $acceptor_dinucl) = @_;
+
+    if ($donor_dinucl eq "GT") {
+	return 1;
+    } elsif ($donor_dinucl eq "GC") {
+	return 1;
+    } elsif ($donor_dinucl eq "AT" && $acceptor_dinucl eq "AC") {
+	return 1;
+    } else {
+	return 0;
+    }
+}
+
+sub acceptor_okay_p {
+    my ($donor_dinucl, $acceptor_dinucl) = @_;
+
+    if ($acceptor_dinucl eq "AG") {
+	return 1;
+    } elsif ($donor_dinucl eq "AT" && $acceptor_dinucl eq "AC") {
+	return 1;
+    } else {
+	return 0;
+    }
+}
+
+
+sub print_exons {
+    my ($exons, $gene_name, $transcript_id, $chr, $strand, $FP, $sortp) = @_;
+
+    $nexons = $#{$exons} + 1;
+    if ($strand eq "+") {
+	($querystarts,$queryends,$starts,$ends) = get_bounds_plus($exons,$sortp);
+	for ($i = 0; $i < $nexons - 1; $i++) {
+	    $intron_length = $ {$starts}[$i] - $ {$ends}[$i] - 1;
+	    if (!defined($opt_d)) {
+		printf ">%s.%s.exon%d/%d %s:%d..%d donor 0",$gene_name,$transcript_id,$i+1,$nexons,$transcript_id,$ {$queryends}[$i],$ {$queryends}[$i]+1;
+		printf " +%s@%u\n",$chr,$ {$ends}[$i];
+		printf ">%s.%s.exon%d/%d %s:%d..%d acceptor 0",$gene_name,$transcript_id,$i+2,$nexons,$transcript_id,$ {$querystarts}[$i],$ {$querystarts}[$i]+1;
+		printf " +%s@%u\n",$chr,$ {$starts}[$i];
+	    } else {
+		$query = sprintf("%s:%u..%u",$chr,$ends[$i]+1,$ends[$i]+2);
+		$donor_dinucl = get_dinucleotide($query,$FP);
+
+		$query = sprintf("%s:%u..%u",$chr,$starts[$i]-2,$starts[$i]-1);
+		$acceptor_dinucl = get_dinucleotide($query,$FP);
+
+		if (defined($opt_C) && donor_okay_p($donor_dinucl,$acceptor_dinucl) == 0) {
+		    printf STDERR "Skipping non-canonical donor $donor_dinucl, intron length %d for %s.%s.exon%d/%d on plus strand\n",
+		    $intron_length,$gene_name,$transcript_id,$i+1,$nexons;
+		} elsif (defined($opt_R)) {
+		    if (donor_okay_p($donor_dinucl,$acceptor_dinucl) == 0) {
+			printf ">%s.%s.exon%d/%d %s:%d..%d donor 0",$gene_name,$transcript_id,$i+1,$nexons,$transcript_id,$ {$queryends}[$i],$ {$queryends}[$i]+1;
+			print " $donor_dinucl";
+			printf " +%s@%u\n",$chr,$ {$ends}[$i];
+		    }
+		} else {
+		    printf ">%s.%s.exon%d/%d %s:%d..%d donor 0",$gene_name,$transcript_id,$i+1,$nexons,$transcript_id,$ {$queryends}[$i],$ {$queryends}[$i]+1;
+		    if (defined($opt_2)) {
+			print " $donor_dinucl";
+		    }
+		    printf " +%s@%u\n",$chr,$ {$ends}[$i];
+		}
+		    
+		if (defined($opt_C) && acceptor_okay_p($donor_dinucl,$acceptor_dinucl) == 0) {
+		    printf STDERR "Skipping non-canonical acceptor $acceptor_dinucl, intron length %d for %s.%s.exon%d/%d on plus strand\n",
+		    $intron_length,$gene_name,$transcript_id,$i+2,$nexons;
+		} elsif (defined($opt_R)) {
+		    if (acceptor_okay_p($donor_dinucl,$acceptor_dinucl) == 0) {
+			printf ">%s.%s.exon%d/%d %s:%d..%d acceptor 0",$gene_name,$transcript_id,$i+2,$nexons,$transcript_id,$ {$querystarts}[$i],$ {$querystarts}[$i]+1;
+			print " $acceptor_dinucl";
+			print " +%s@%u\n",$chr,$ {$starts}[$i];
+		    }
+		} else {
+		    printf ">%s.%s.exon%d/%d %s:%d..%d acceptor 0",$gene_name,$transcript_id,$i+2,$nexons,$transcript_id,$ {$querystarts}[$i],$ {$querystarts}[$i]+1;
+		    if (defined($opt_2)) {
+			print " $acceptor_dinucl";
+		    }
+		    print " +%s@%u\n",$chr,$ {$starts}[$i];
+		}
+	    }
+	}
+
+    } elsif ($strand eq "-") {
+	($querystarts,$queryends,$starts,$ends) = get_bounds_minus($exons,$sortp);
+	for ($i = 0; $i < $nexons - 1; $i++) {
+	    $intron_length = $ {$starts}[$i] - $ {$ends}[$i] - 1;
+	    if (!defined($opt_d)) {
+		printf ">%s.%s.exon%d/%d %s:%d..%d donor 0",$gene_name,$transcript_id,$i+1,$nexons,$transcript_id,$ {$queryends}[$i],$ {$queryends}[$i]+1;
+		printf " -%s@%u\n",$chr,$ {$starts}[$i];
+		printf ">%s.%s.exon%d/%d %s:%d..%d acceptor 0",$gene_name,$transcript_id,$i+2,$nexons,$transcript_id,$ {$querystarts}[$i],$ {$querystarts}[$i]+1;
+		printf " -%s@%u\n",$chr,$ {$ends}[$i];
+	    } else {
+		$query = sprintf("%s:%u..%u",$chr,$starts[$i]-1,$starts[$i]-2);
+		$donor_dinucl = get_dinucleotide($query,$FP);
+
+		$query = sprintf("%s:%u..%u",$chr,$ends[$i]+2,$ends[$i]+1);
+		$acceptor_dinucl = get_dinucleotide($query,$FP);
+
+		if (defined($opt_C) && donor_okay_p($donor_dinucl,$acceptor_dinucl) == 0) {
+		    printf STDERR "Skipping non-canonical donor $donor_dinucl, intron length %d for %s.%s.exon%d/%d on minus strand\n",
+		    $intron_length,$gene_name,$transcript_id,$i+1,$nexons;
+		} elsif (defined($opt_R)) {
+		    if (donor_okay_p($donor_dinucl,$acceptor_dinucl) == 0) {
+			printf ">%s.%s.exon%d/%d %s:%d..%d donor 0",$gene_name,$transcript_id,$i+1,$nexons,$transcript_id,$ {$queryends}[$i],$ {$queryends}[$i]+1;
+			print " $donor_dinucl";
+			printf " -%s@%u\n",$chr,$ {$starts}[$i];
+		    }
+		} else {
+		    printf ">%s.%s.exon%d/%d %s:%d..%d donor 0",$gene_name,$transcript_id,$i+1,$nexons,$transcript_id,$ {$queryends}[$i],$ {$queryends}[$i]+1;
+		    if (defined($opt_2)) {
+			print " $donor_dinucl";
+		    }
+		    printf " -%s@%u\n",$chr,$ {$starts}[$i];
+		}
+		
+		if (defined($opt_C) && acceptor_okay_p($donor_dinucl,$acceptor_dinucl) == 0) {
+		    printf STDERR "Skipping non-canonical acceptor $acceptor_dinucl, intron length %d for %s.%s.exon%d/%d on minus strand\n",
+		    $intron_length,$gene_name,$transcript_id,$i+2,$nexons;
+		} elsif (defined($opt_R)) {
+		    if (acceptor_okay_p($donor_dinucl,$acceptor_dinucl) == 0) {
+			printf ">%s.%s.exon%d/%d %s:%d..%d acceptor 0",$gene_name,$transcript_id,$i+2,$nexons,$transcript_id,$ {$querystarts}[$i],$ {$querystarts}[$i]+1;
+			print " $acceptor_dinucl";
+			printf " -%s@%u\n",$chr,$ {$ends}[$i];
+		    }
+		} else {
+		    printf ">%s.%s.exon%d/%d %s:%d..%d acceptor 0",$gene_name,$transcript_id,$i+2,$nexons,$transcript_id,$ {$querystarts}[$i],$ {$querystarts}[$i]+1;
+		    if (defined($opt_2)) {
+			print " $acceptor_dinucl";
+		    }
+		    printf " -%s@%u\n",$chr,$ {$ends}[$i];
+		}
+	    }
+	}
+    }
+    
+    return;
+}
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git



More information about the debian-med-commit mailing list