[med-svn] [bowtie] 01/04: New upstream version 1.2.2~beta+dfsg

Alex Mestiashvili malex-guest at moszumanska.debian.org
Mon Nov 27 13:13:41 UTC 2017


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository bowtie.

commit fef87b40a4bf367be58776ea0401cf98a86924f7
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Mon Nov 27 13:26:30 2017 +0100

    New upstream version 1.2.2~beta+dfsg
---
 .gitignore                   |    4 +-
 LICENSE                      |  179 +++----
 MANUAL.markdown              |   62 +--
 Makefile                     |   99 ++--
 aligner.h                    |   31 +-
 annot.cpp                    |   30 --
 annot.h                      |   63 ---
 bitset.h                     |  197 ++++----
 blockwise_sa.h               |    7 +-
 bowtie_inspect.cpp           |    2 -
 btypes.h                     |    7 +-
 doc/website/recent_news.ssi  |    3 +-
 ebwt.h                       |    9 -
 ebwt_search.cpp              | 1088 ++++++++++++++++++++++++++++--------------
 filebuf.h                    |   15 +-
 genomes/.cvsignore           |    1 -
 hit.cpp                      |   43 +-
 hit.h                        |  469 +++++-------------
 hit_set.h                    |   10 -
 pat.cpp                      |   49 +-
 pat.h                        |  130 ++---
 refmap.cpp                   |   64 ---
 refmap.h                     |   65 ---
 row_chaser.h                 |    2 +-
 sam.cpp                      |   19 +-
 sam.h                        |   52 +-
 scripts/test/simple_tests.pl |   16 +
 sstring.h                    |    9 +
 threading.h                  |   10 +
 29 files changed, 1276 insertions(+), 1459 deletions(-)

diff --git a/.gitignore b/.gitignore
index 39ae9d8..793b7d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,6 @@ bowtie-inspect-s
 bowtie-inspect-s-debug
 .tmp*
 *.dSYM
-*.xcodeproj
\ No newline at end of file
+*.xcodeproj.simple_tests*
+reads_*
+.simple_tests*
diff --git a/LICENSE b/LICENSE
index 7cb8b7b..d31db89 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,114 +1,73 @@
-The Artistic License
+Artistic License 2.0
+Copyright (c) 2000-2006, The Perl Foundation.
+
+Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
 
 Preamble
+This license establishes the terms under which a given free software Package may be copied, modified, distributed, and/or redistributed. The intent is that the Copyright Holder maintains some artistic control over the development of that Package while still keeping the Package available as open source and free software.
+
+You are always permitted to make arrangements wholly outside of this license directly with the Copyright Holder of a given Package. If the terms of this license do not permit the full use that you propose to make of the Package, you should contact the Copyright Holder and seek a different licensing arrangement.
+
+Definitions
+"Copyright Holder" means the individual(s) or organization(s) named in the copyright notice for the entire Package.
+
+"Contributor" means any party that has contributed code or other material to the Package, in accordance with the Copyright Holder's procedures.
+
+"You" and "your" means any person who would like to copy, distribute, or modify the Package.
+
+"Package" means the collection of files distributed by the Copyright Holder, and derivatives of that collection and/or of those files. A given Package may consist of either the Standard Version, or a Modified Version.
+
+"Distribute" means providing a copy of the Package or making it accessible to anyone else, or in the case of a company or organization, to others outside of your company or organization.
+
+"Distributor Fee" means any fee that you charge for Distributing this Package or providing support for this Package to another party. It does not mean licensing fees.
+
+"Standard Version" refers to the Package if it has not been modified, or has been modified only in ways explicitly requested by the Copyright Holder.
+
+"Modified Version" means the Package, if it has been changed, and such changes were not explicitly requested by the Copyright Holder.
+
+"Original License" means this Artistic License as Distributed with the Standard Version of the Package, in its current version or as it may be modified by The Perl Foundation in the future.
+
+"Source" form means the source code, documentation source, and configuration files for the Package.
+
+"Compiled" form means the compiled bytecode, object code, binary, or any other form resulting from mechanical transformation or translation of the Source form.
+
+Permission for Use and Modification Without Distribution
+(1) You are permitted to use the Standard Version and create and use Modified Versions for any purpose without restriction, provided that you do not Distribute the Modified Version.
+
+Permissions for Redistribution of the Standard Version
+(2) You may Distribute verbatim copies of the Source form of the Standard Version of this Package in any medium without restriction, either gratis or for a Distributor Fee, provided that you duplicate all of the original copyright notices and associated disclaimers. At your discretion, such verbatim copies may or may not include a Compiled form of the Package.
+
+(3) You may apply any bug fixes, portability changes, and other modifications made available from the Copyright Holder. The resulting Package will still be considered the Standard Version, and as such will be subject to the Original License.
+
+Distribution of Modified Versions of the Package as Source
+(4) You may Distribute your Modified Version as Source (either gratis or for a Distributor Fee, and with or without a Compiled form of the Modified Version) provided that you clearly document how it differs from the Standard Version, including, but not limited to, documenting any non-standard features, executables, or modules, and provided that you do at least ONE of the following:
+
+(a) make the Modified Version available to the Copyright Holder of the Standard Version, under the Original License, so that the Copyright Holder may include your modifications in the Standard Version.
+(b) ensure that installation of your Modified Version does not prevent the user installing or running the Standard Version. In addition, the Modified Version must bear a name that is different from the name of the Standard Version.
+(c) allow anyone who receives a copy of the Modified Version to make the Source form of the Modified Version available to others under
+(i) the Original License or
+(ii) a license that permits the licensee to freely copy, modify and redistribute the Modified Version using the same licensing terms that apply to the copy that the licensee received, and requires that the Source form of the Modified Version, and of any works derived from it, be made freely available in that license fees are prohibited but Distributor Fees are allowed.
+
+Distribution of Compiled Forms of the Standard Version or Modified Versions without the Source
+(5) You may Distribute Compiled forms of the Standard Version without the Source, provided that you include complete instructions on how to get the Source of the Standard Version. Such instructions must be valid at the time of your distribution. If these instructions, at any time while you are carrying out such distribution, become invalid, you must provide new instructions on demand or cease further distribution. If you provide valid instructions or cease distribution within thirty days [...]
+
+(6) You may Distribute a Modified Version in Compiled form without the Source, provided that you comply with Section 4 with respect to the Source of the Modified Version.
+
+Aggregating or Linking the Package
+(7) You may aggregate the Package (either the Standard Version or Modified Version) with other packages and Distribute the resulting aggregation provided that you do not charge a licensing fee for the Package. Distributor Fees are permitted, and licensing fees for other components in the aggregation are permitted. The terms of this license apply to the use and Distribution of the Standard or Modified Versions as included in the aggregation.
+
+(8) You are permitted to link Modified and Standard Versions with other works, to embed the Package in a larger work of your own, or to build stand-alone binary or bytecode versions of applications that include the Package, and Distribute the result without restriction, provided the result does not expose a direct interface to the Package.
+
+Items That are Not Considered Part of a Modified Version
+(9) Works (including, but not limited to, modules and scripts) that merely extend or make use of the Package, do not, by themselves, cause the Package to be a Modified Version. In addition, such works are not considered parts of the Package itself, and are not subject to the terms of this license.
+
+General Provisions
+(10) Any use, modification, and distribution of the Standard or Modified Versions is governed by this Artistic License. By using, modifying or distributing the Package, you accept this license. Do not use, modify, or distribute the Package, if you do not accept this license.
+
+(11) If your Modified Version has been derived from a Modified Version made by someone other than you, you are nevertheless required to ensure that your Modified Version complies with the requirements of this license.
+
+(12) This license does not grant you the right to use any trademark, service mark, tradename, or logo of the Copyright Holder.
 
-The intent of this document is to state the conditions under which a
-Package may be copied, such that the Copyright Holder maintains some
-semblance of artistic control over the development of the package,
-while giving the users of the package the right to use and distribute
-the Package in a more-or-less customary fashion, plus the right to
-make reasonable modifications.
-
-Definitions:
-    * "Package" refers to the collection of files distributed by the
-      Copyright Holder, and derivatives of that collection of files
-      created through textual modification.
-    * "Standard Version" refers to such a Package if it has not been
-      modified, or has been modified in accordance with the wishes of
-      the Copyright Holder.
-    * "Copyright Holder" is whoever is named in the copyright or
-      copyrights for the package.
-    * "You" is you, if you're thinking about copying or distributing
-      this Package.
-    * "Reasonable copying fee" is whatever you can justify on the
-      basis of media cost, duplication charges, time of people
-      involved, and so on. (You will not be required to justify it to
-      the Copyright Holder, but only to the computing community at
-      large as a market that must bear the fee.)
-    * "Freely Available" means that no fee is charged for the item
-      itself, though there may be fees involved in handling the
-      item. It also means that recipients of the item may redistribute
-      it under the same conditions they received it.
-
-1. You may make and give away verbatim copies of the source form of
-   the Standard Version of this Package without restriction, provided
-   that you duplicate all of the original copyright notices and
-   associated disclaimers.
-
-2. You may apply bug fixes, portability fixes and other modifications
-   derived from the Public Domain or from the Copyright Holder. A
-   Package modified in such a way shall still be considered the
-   Standard Version.
-
-3. You may otherwise modify your copy of this Package in any way,
-   provided that you insert a prominent notice in each changed file
-   stating how and when you changed that file, and provided that you
-   do at least ONE of the following:
-
-    a) place your modifications in the Public Domain or otherwise make
-    them Freely Available, such as by posting said modifications to
-    Usenet or an equivalent medium, or placing the modifications on a
-    major archive site such as ftp.uu.net, or by allowing the
-    Copyright Holder to include your modifications in the Standard
-    Version of the Package.
-
-    b) use the modified Package only within your corporation or
-    organization.
-
-    c) rename any non-standard executables so the names do not
-    conflict with standard executables, which must also be provided,
-    and provide a separate manual page for each non-standard
-    executable that clearly documents how it differs from the Standard
-    Version.
-
-    d) make other distribution arrangements with the Copyright Holder.
-
-4. You may distribute the programs of this Package in object code or
-   executable form, provided that you do at least ONE of the
-   following:
-
-    a) distribute a Standard Version of the executables and library
-    files, together with instructions (in the manual page or
-    equivalent) on where to get the Standard Version.
-
-    b) accompany the distribution with the machine-readable source of
-    the Package with your modifications.
-
-    c) accompany any non-standard executables with their corresponding
-    Standard Version executables, giving the non-standard executables
-    non-standard names, and clearly documenting the differences in
-    manual pages (or equivalent), together with instructions on where
-    to get the Standard Version.
-
-    d) make other distribution arrangements with the Copyright Holder.
-
-5. You may charge a reasonable copying fee for any distribution of
-   this Package. You may charge any fee you choose for support of this
-   Package. You may not charge a fee for this Package itself. However,
-   you may distribute this Package in aggregate with other (possibly
-   commercial) programs as part of a larger (possibly commercial)
-   software distribution provided that you do not advertise this
-   Package as a product of your own.
-
-6. The scripts and library files supplied as input to or produced as
-   output from the programs of this Package do not automatically fall
-   under the copyright of this Package, but belong to whomever
-   generated them, and may be sold commercially, and may be aggregated
-   with this Package.
-
-7. C or perl subroutines supplied by you and linked into this Package
-   shall not be considered part of this Package.
-
-8. The name of the Copyright Holder may not be used to endorse or
-   promote products derived from this software without specific prior
-   written permission.
-
-9. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
-   WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES
-   OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-
-The End
-This license is approved by the Open Source Initiative
-(www.opensource.org) for certifying software as OSI Certified Open
-Source.
+(13) This license includes the non-exclusive, worldwide, free-of-charge patent license to make, have made, use, offer to sell, sell, import and otherwise transfer the Package with respect to any patent claims licensable by the Copyright Holder that are necessarily infringed by the Package. If you institute patent litigation (including a cross-claim or counterclaim) against any party alleging that the Package constitutes direct or contributory patent infringement, then this Artistic Licen [...]
 
+(14) Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, E [...]
diff --git a/MANUAL.markdown b/MANUAL.markdown
index 44a6272..a202a91 100644
--- a/MANUAL.markdown
+++ b/MANUAL.markdown
@@ -1627,7 +1627,7 @@ the `ID` and `SM` fields must both be among them to make the `@RG` line
 legal according to the [SAM Spec][SAM].  `--sam-RG` is ignored unless
 [`-S`/`--sam`] is also specified.
 
-<tr><td id="bowtie-options-no-unal">
+</td></tr><tr><td id="bowtie-options-no-unal">
 
 [`--no-unal`]: #bowtie-options-no-unal
 
@@ -1941,63 +1941,63 @@ right, the fields are:
     `bowtie` outputs some of these optional fields for each alignment,
     depending on the type of the alignment:
 
-    <table><tr><td>
+<table><tr><td>
 
         NM:i:<N>
 
-    </td><td>
+</td><td>
 
-    Aligned read has an edit distance of `<N>`.
+Aligned read has an edit distance of `<N>`.
 
-    </td></tr><tr><td>
+</td></tr><tr><td>
 
         CM:i:<N>
 
-    </td><td>
+</td><td>
 
-    Aligned read has an edit distance of `<N>` in colorspace.  This
-    field is present in addition to the `NM` field in [`-C`/`--color`]
-    mode, but is omitted otherwise.
+Aligned read has an edit distance of `<N>` in colorspace.  This
+field is present in addition to the `NM` field in [`-C`/`--color`]
+mode, but is omitted otherwise.
 
-    </td></tr><tr><td>
+</td></tr><tr><td>
 
         MD:Z:<S>
 
-    </td><td>
+</td><td>
 
-    For aligned reads, `<S>` is a string representation of the
-    mismatched reference bases in the alignment.  See [SAM] format
-    specification for details.  For colorspace alignments, `<S>`
-    describes the decoded *nucleotide* alignment, not the colorspace
-    alignment.
+For aligned reads, `<S>` is a string representation of the
+mismatched reference bases in the alignment.  See [SAM] format
+specification for details.  For colorspace alignments, `<S>`
+describes the decoded *nucleotide* alignment, not the colorspace
+alignment.
 
-    </td></tr><tr><td>
+</td></tr><tr><td>
 
         XA:i:<N>
 
-    </td><td>
+</td><td>
 
-    Aligned read belongs to stratum `<N>`.  See [Strata] for definition.
+Aligned read belongs to stratum `<N>`.  See [Strata] for definition.
 
 [Strata]: #strata
 
-    </td></tr><tr><td>
+</td></tr><tr><td>
 
         XM:i:<N>
 
-    </td><td>
+</td><td>
 
-    For a read with no reported alignments, `<N>` is 0 if the read had
-    no alignments.  If [`-m`] was specified and the read's alignments
-    were supressed because the [`-m`] ceiling was exceeded, `<N>` equals
-    the [`-m`] ceiling + 1, to indicate that there were at least that
-    many valid alignments (but all were suppressed).  In [`-M`] mode, if
-    the alignment was randomly selected because the [`-M`] ceiling was
-    exceeded, `<N>` equals the [`-M`] ceiling + 1, to indicate that there
-    were at least that many valid alignments (of which one was reported
-    at random).
+For a read with no reported alignments, `<N>` is 0 if the read had
+no alignments.  If [`-m`] was specified and the read's alignments
+were supressed because the [`-m`] ceiling was exceeded, `<N>` equals
+the [`-m`] ceiling + 1, to indicate that there were at least that
+many valid alignments (but all were suppressed).  In [`-M`] mode, if
+the alignment was randomly selected because the [`-M`] ceiling was
+exceeded, `<N>` equals the [`-M`] ceiling + 1, to indicate that there
+were at least that many valid alignments (of which one was reported
+at random).
 
-    </td></tr></table>
+</td></tr></table>
 
 [SAM format specification]: http://samtools.sf.net/SAM1.pdf
 [FASTQ]: http://en.wikipedia.org/wiki/FASTQ_format
diff --git a/Makefile b/Makefile
index 86ecb57..d3eef5d 100644
--- a/Makefile
+++ b/Makefile
@@ -5,13 +5,14 @@
 prefix = /usr/local
 bindir = $(prefix)/bin
 
-SEQAN_DIR = SeqAn-1.1
-SEQAN_INC = -I $(SEQAN_DIR)
-INC = $(SEQAN_INC) -I third_party
-CPP = g++ -w
+SEQAN_DIR = ./SeqAn-1.1
+# treat SeqAn as a sysdir to suppress warnings
+SEQAN_INC = -isystem $(SEQAN_DIR)
+INC = $(if $(RELEASE_BUILD),-I$(CURDIR)/.include) $(SEQAN_INC) -I third_party
+CPP = g++
 CXX = $(CPP)
 CC = gcc
-LIBS = $(LDFLAGS) -lz
+LIBS = $(LDFLAGS) $(if $(RELEASE_BUILD),-L$(CURDIR)/.lib) -lz
 HEADERS = $(wildcard *.h)
 BOWTIE_MM = 1
 BOWTIE_SHARED_MEM = 1
@@ -20,11 +21,16 @@ EXTRA_CFLAGS =
 EXTRA_CXXFLAGS =
 CFLAGS += $(EXTRA_CFLAGS)
 CXXFLAGS += $(EXTRA_CXXFLAGS)
+WARNING_FLAGS = -Wall -Wno-unused-private-field \
+                -Wno-unused-parameter -Wno-reorder \
+				-Wno-unused-local-typedefs
+
+RELEASE_DEPENDENCIES = $(if $(RELEASE_BUILD),static-libs)
 
 # Detect Cygwin or MinGW
-WINDOWS = 0
-CYGWIN = 0
-MINGW = 0
+WINDOWS =
+CYGWIN =
+MINGW =
 ifneq (,$(findstring CYGWIN,$(shell uname)))
     WINDOWS = 1
     CYGWIN = 1
@@ -41,7 +47,7 @@ else
     endif
 endif
 
-MACOS = 0
+MACOS =
 ifneq (,$(findstring Darwin,$(shell uname)))
     MACOS = 1
 	ifneq (,$(findstring 13,$(shell uname -r)))
@@ -49,14 +55,12 @@ ifneq (,$(findstring Darwin,$(shell uname)))
 		CC = clang
 		override EXTRA_FLAGS += -stdlib=libstdc++
 	endif
-	ifneq (,$(findstring 14,$(shell uname -r)))
-		CPP = clang++
-		CC = clang
-		override EXTRA_FLAGS += -stdlib=libstdc++
+	ifeq (1, $(RELEASE_BUILD))
+		EXTRA_FLAGS += -mmacosx-version-min=10.9
 	endif
 endif
 
-LINUX = 0
+LINUX =
 ifneq (,$(findstring Linux,$(shell uname)))
     LINUX = 1
     override EXTRA_FLAGS += -Wl,--hash-style=both
@@ -92,11 +96,7 @@ endif
 
 ifneq (1,$(NO_TBB))
 	LIBS += $(PTHREAD_LIB) -ltbb
-	ifeq (1, $(RELEASE_BIN))
-		LIBS += -ltbbmalloc
-	else
-		LIBS += -ltbbmalloc_proxy
-	endif
+	LIBS += -ltbbmalloc$(if $(RELEASE_BUILD),,_proxy)
 	override EXTRA_FLAGS += -DWITH_TBB
 else
 	LIBS += $(PTHREAD_LIB)
@@ -150,7 +150,7 @@ ifeq (1,$(NO_TBB))
 endif
 
 SEARCH_CPPS = qual.cpp pat.cpp ebwt_search_util.cpp ref_aligner.cpp \
-              log.cpp hit_set.cpp refmap.cpp annot.cpp sam.cpp \
+              log.cpp hit_set.cpp sam.cpp \
               color.cpp color_dec.cpp hit.cpp
 SEARCH_CPPS_MAIN = $(SEARCH_CPPS) bowtie_main.cpp
 
@@ -263,7 +263,7 @@ RELEASE_DEFS = -DCOMPILER_OPTIONS="\"$(RELEASE_FLAGS) $(ALL_FLAGS)\""
 
 bowtie-build-s: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 	$(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(ALL_FLAGS)  \
-		$(DEFS) $(NOASSERT_FLAGS) -Wall \
+		$(DEFS) $(NOASSERT_FLAGS) $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(BUILD_CPPS_MAIN) \
@@ -271,7 +271,7 @@ bowtie-build-s: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 
 bowtie-build-l: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 	$(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(ALL_FLAGS)  \
-		$(DEFS) -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) -Wall \
+		$(DEFS) -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(BUILD_CPPS_MAIN) \
@@ -279,7 +279,7 @@ bowtie-build-l: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 
 bowtie-build_prof: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 	$(CXX) $(RELEASE_FLAGS) -pg -p -g3 $(RELEASE_DEFS) $(ALL_FLAGS) \
-		$(DEFS) $(NOASSERT_FLAGS) -Wall \
+		$(DEFS) $(NOASSERT_FLAGS) $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(BUILD_CPPS_MAIN) \
@@ -287,7 +287,7 @@ bowtie-build_prof: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 
 bowtie-build-s-debug: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 	$(CXX) $(DEBUG_FLAGS) $(DEBUG_DEFS) $(ALL_FLAGS) \
-		$(DEFS) -Wall \
+		$(DEFS) $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(BUILD_CPPS_MAIN) \
@@ -295,7 +295,7 @@ bowtie-build-s-debug: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 
 bowtie-build-l-debug: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 	$(CXX) $(DEBUG_FLAGS) $(DEBUG_DEFS) $(ALL_FLAGS) \
-		$(DEFS) -DBOWTIE_64BIT_INDEX -Wall \
+		$(DEFS) -DBOWTIE_64BIT_INDEX $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(BUILD_CPPS_MAIN) \
@@ -307,7 +307,7 @@ bowtie-build-l-debug: ebwt_build.cpp $(OTHER_CPPS) $(HEADERS)
 
 bowtie-align-s: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
 	$(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(ALL_FLAGS) \
-		$(DEFS) $(NOASSERT_FLAGS) -Wall \
+		$(DEFS) $(NOASSERT_FLAGS) $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -315,7 +315,7 @@ bowtie-align-s: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(SEARCH
 
 bowtie-align-l: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
 	$(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(ALL_FLAGS) \
-		$(DEFS) $(NOASSERT_FLAGS) -DBOWTIE_64BIT_INDEX -Wall \
+		$(DEFS) $(NOASSERT_FLAGS) -DBOWTIE_64BIT_INDEX $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -324,7 +324,7 @@ bowtie-align-l: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(SEARCH
 bowtie_prof: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
 	$(CXX) $(RELEASE_FLAGS) \
 		$(RELEASE_DEFS) -pg -p -g3 $(ALL_FLAGS) \
-		$(DEFS) $(NOASSERT_FLAGS) -Wall \
+		$(DEFS) $(NOASSERT_FLAGS) $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -333,7 +333,7 @@ bowtie_prof: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(SEARCH_FR
 bowtie-align-s-debug: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
 	$(CXX) $(DEBUG_FLAGS) \
 		$(DEBUG_DEFS) $(ALL_FLAGS) \
-		$(DEFS) -Wall \
+		$(DEFS) $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -342,7 +342,7 @@ bowtie-align-s-debug: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(
 bowtie-align-l-debug: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
 	$(CXX) $(DEBUG_FLAGS) \
 		$(DEBUG_DEFS) $(ALL_FLAGS) \
-		$(DEFS) -DBOWTIE_64BIT_INDEX -Wall \
+		$(DEFS) -DBOWTIE_64BIT_INDEX $(WARNING_FLAGS) \
 		$(INC) \
 		-o $@ $< \
 		$(OTHER_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -355,7 +355,7 @@ bowtie-align-l-debug: ebwt_search.cpp $(SEARCH_CPPS) $(OTHER_CPPS) $(HEADERS) $(
 bowtie-inspect-s: bowtie_inspect.cpp $(HEADERS) $(OTHER_CPPS)
 	$(CXX) $(RELEASE_FLAGS) \
 		$(RELEASE_DEFS) $(ALL_FLAGS) \
-		$(DEFS) -Wall \
+		$(DEFS) $(WARNING_FLAGS) \
 		$(INC) -I . \
 		-o $@ $< \
 		$(OTHER_CPPS) \
@@ -364,7 +364,7 @@ bowtie-inspect-s: bowtie_inspect.cpp $(HEADERS) $(OTHER_CPPS)
 bowtie-inspect-l: bowtie_inspect.cpp $(HEADERS) $(OTHER_CPPS)
 	$(CXX) $(RELEASE_FLAGS) \
 		$(RELEASE_DEFS) $(ALL_FLAGS) \
-		$(DEFS) -DBOWTIE_64BIT_INDEX -Wall \
+		$(DEFS) -DBOWTIE_64BIT_INDEX $(WARNING_FLAGS) \
 		$(INC) -I . \
 		-o $@ $< \
 		$(OTHER_CPPS) \
@@ -373,7 +373,7 @@ bowtie-inspect-l: bowtie_inspect.cpp $(HEADERS) $(OTHER_CPPS)
 bowtie-inspect-s-debug: bowtie_inspect.cpp $(HEADERS) $(OTHER_CPPS) 
 	$(CXX) $(DEBUG_FLAGS) \
 		$(DEBUG_DEFS) $(ALL_FLAGS) \
-		$(DEFS) -Wall \
+		$(DEFS) $(WARNING_FLAGS) \
 		$(INC) -I . \
 		-o $@ $< \
 		$(OTHER_CPPS) \
@@ -382,7 +382,7 @@ bowtie-inspect-s-debug: bowtie_inspect.cpp $(HEADERS) $(OTHER_CPPS)
 bowtie-inspect-l-debug: bowtie_inspect.cpp $(HEADERS) $(OTHER_CPPS) 
 	$(CXX) $(DEBUG_FLAGS) \
 		$(DEBUG_DEFS) $(ALL_FLAGS) \
-		$(DEFS) -DBOWTIE_64BIT_INDEX -Wall \
+		$(DEFS) -DBOWTIE_64BIT_INDEX $(WARNING_FLAGS) \
 		$(INC) -I . \
 		-o $@ $< \
 		$(OTHER_CPPS) \
@@ -399,20 +399,21 @@ bowtie-src.zip: $(SRC_PKG_LIST)
 	cp .src.tmp/$@ .
 	rm -rf .src.tmp
 
-bowtie-bin.zip: $(BIN_PKG_LIST) $(BIN_LIST) $(BIN_LIST_AUX) 
+bowtie-bin.zip: $(RELEASE_DEPENDENCIES) $(BIN_PKG_LIST) $(BIN_LIST) $(BIN_LIST_AUX) 
+	$(eval PKG_DIR=bowtie-$(VERSION)-$(if $(MACOS),macos,$(if $(MINGW),mingw,linux))-x86_64)
 	chmod a+x scripts/*.sh scripts/*.pl
 	rm -rf .bin.tmp
 	mkdir .bin.tmp
-	mkdir .bin.tmp/bowtie-$(VERSION)
+	mkdir -p .bin.tmp/$(PKG_DIR)
 	if [ -f bowtie-align-s.exe ] ; then \
 		zip tmp.zip $(BIN_PKG_LIST) $(addsuffix .exe,$(BIN_LIST) $(BIN_LIST_AUX)) ; \
 	else \
 		zip tmp.zip $(BIN_PKG_LIST) $(BIN_LIST) $(BIN_LIST_AUX) ; \
 	fi
-	mv tmp.zip .bin.tmp/bowtie-$(VERSION)
-	cd .bin.tmp/bowtie-$(VERSION) ; unzip tmp.zip ; rm -f tmp.zip
-	cd .bin.tmp ; zip -r $@ bowtie-$(VERSION)
-	cp .bin.tmp/$@ .
+	mv tmp.zip .bin.tmp/$(PKG_DIR)
+	cd .bin.tmp/$(PKG_DIR) ; unzip tmp.zip ; rm -f tmp.zip
+	cd .bin.tmp ; zip -r $(PKG_DIR).zip $(PKG_DIR)
+	cp .bin.tmp/$(PKG_DIR).zip .
 	rm -rf .bin.tmp
 
 .PHONY: doc
@@ -455,6 +456,23 @@ perl-deps:
 		cpanm --force Math::Random Clone Test::Deep Sys::Info -n --quiet; \
 	fi
 
+static-libs:
+	if [[ ! -d $(CURDIR)/.lib || ! -d $(CURDIR)/.inc ]]; then \
+		mkdir $(CURDIR)/.lib $(CURDIR)/.include ; \
+	fi ; \
+	if [[ `uname` = "Darwin" ]]; then \
+		export CFLAGS=-mmacosx-version-min=10.9 ; \
+		export CXXFLAGS=-mmacosx-version-min=10.9 ; \
+	fi ; \
+	DL=$$([ `which wget` ] && echo "wget --no-check-certificate" || echo "curl -LO") ; \
+	cd /tmp ; \
+	$$DL https://zlib.net/zlib-1.2.11.tar.gz && tar xzf zlib-1.2.11.tar.gz && cd zlib-1.2.11 ; \
+	$(if $(MINGW), mingw32-make -f win32/Makefile.gcc, ./configure --static && make) && cp libz.a $(CURDIR)/.lib && cp zconf.h zlib.h $(CURDIR)/.include ; \
+	cd .. ; \
+	$$DL https://github.com/01org/tbb/archive/2017_U8.tar.gz && tar xzf 2017_U8.tar.gz && cd tbb-2017_U8; \
+	$(if $(MINGW), mingw32-make comiler=gcc arch=ia64 runtime=mingw, make) extra_inc=big_iron.inc -j4 \
+	&& cp -r include/tbb $(CURDIR)/.include && cp build/*_release/*.a $(CURDIR)/.lib
+
 
 .PHONY: clean
 clean:
@@ -464,3 +482,4 @@ clean:
 	bowtie-src.zip bowtie-bin.zip
 	rm -f core.*
 	rm -f bowtie-align-s-master* bowtie-align-s-no-io* 
+	rm -rf .lib .include
diff --git a/aligner.h b/aligner.h
index 2e5ac30..3c2be50 100644
--- a/aligner.h
+++ b/aligner.h
@@ -170,6 +170,8 @@ public:
 					saw_last_read = ret.second;
 					if(ret.first && (*patsrcs_)[i]->rdid() < qUpto_) {
 						(*aligners_)[i]->setQuery((*patsrcs_)[i]);
+					} else if (ret.first) {
+						saw_last_read = true;
 					}
 				}
 			}
@@ -291,6 +293,8 @@ public:
 							al = (*alignersSE_)[0];
 							seOrPe_[0] = true; // true = unpaired
 						}
+					} else if (ret.first) {
+						break;
 					}
 				}
 				first = false;
@@ -326,7 +330,7 @@ public:
 						// Get a new read
 						pair<bool, bool> ret = ps->nextReadPair();
 						saw_last_read = ret.second;
-						if(ps->rdid() < qUpto_ && ret.first) {
+						if (ret.first && ps->rdid() < qUpto_) {
 							if(ps->paired()) {
 								// Read currently in buffer is paired-end
 								(*alignersPE_)[i]->setQuery(ps);
@@ -336,6 +340,8 @@ public:
 								(*alignersSE_)[i]->setQuery(ps);
 								seOrPe_[i] = true; // true = unpaired
 							}
+						} else if (ret.first) {
+							break;
 						}
 					}
 				}
@@ -475,7 +481,6 @@ public:
 				colorExEnds,
 				snpPhred,
 				refs_,
-				ra.ebwt->rmap(),
 				ebwtFw,
 				ra.mms,                   // mismatch positions
 				ra.refcs,                 // reference characters for mms
@@ -861,8 +866,7 @@ protected:
 	            TIndexOffU tlen, // length of ref
 	            bool pairFw,   // whether the pair is being mapped to fw strand
 	            bool ebwtFwL,
-	            bool ebwtFwR,
-	            const ReferenceMap* rmap)
+	            bool ebwtFwR)
 	{
 		assert(gAllowMateContainment || upstreamOff < dnstreamOff);
 		TIndexOffU spreadL = rL.bot - rL.top;
@@ -889,7 +893,6 @@ protected:
 				colorExEnds,
 				snpPhred,
 				refs_,
-				rmap,
 				ebwtFwL,
 				rL.mms,                       // mismatch positions
 				rL.refcs,                     // reference characters for mms
@@ -924,7 +927,6 @@ protected:
 				colorExEnds,
 				snpPhred,
 				refs_,
-				rmap,
 				ebwtFwR,
 				rR.mms,                       // mismatch positions
 				rR.refcs,                     // reference characters for mms
@@ -951,12 +953,11 @@ protected:
 	            TIndexOffU upstreamOff, // offset for upstream mate
 	            TIndexOffU dnstreamOff, // offset for downstream mate
 	            TIndexOffU tlen, // length of ref
-	            bool pairFw,   // whether the pair is being mapped to fw strand
-	            const ReferenceMap* rmap)
+	            bool pairFw)   // whether the pair is being mapped to fw strand
 	{
 		return report(rL, rR, first, upstreamOff,
 		              dnstreamOff, tlen,
-		              pairFw, rL.ebwt->fw(), rR.ebwt->fw(), rmap);
+		              pairFw, rL.ebwt->fw(), rR.ebwt->fw());
 	}
 
 	/**
@@ -1098,8 +1099,7 @@ protected:
 				tlen,       // length of ref
 				!doneFw_,   // whether the pair is being mapped to fw strand
 				ebwtLFw,
-				ebwtRFw,
-				range.ebwt->rmap())) return true;
+				ebwtRFw)) return true;
 		}
 		return false;
 	}
@@ -1739,8 +1739,7 @@ protected:
 	            TIndexOffU tlen, // length of ref
 	            bool pairFw,   // whether the pair is being mapped to fw strand
 	            bool ebwtFwL,
-	            bool ebwtFwR,
-	            const ReferenceMap *rmap)
+	            bool ebwtFwR)
 	{
 		assert(gAllowMateContainment || upstreamOff < dnstreamOff);
 		TIndexOffU spreadL = rL.bot - rL.top;
@@ -1767,7 +1766,6 @@ protected:
 				colorExEnds,
 				snpPhred,
 				refs_,
-				rmap,
 				ebwtFwL,
 				rL.mms,                       // mismatch positions
 				rL.refcs,                     // reference characters for mms
@@ -1802,7 +1800,6 @@ protected:
 				colorExEnds,
 				snpPhred,
 				refs_,
-				rmap,
 				ebwtFwR,
 				rR.mms,                       // mismatch positions
 				rR.refcs,                     // reference characters for mms
@@ -1849,7 +1846,6 @@ protected:
 			colorExEnds,
 			snpPhred,
 			refs_,
-			r.ebwt->rmap(),
 			ebwtFw,
 			r.mms,                   // mismatch positions
 			r.refcs,                 // reference characters for mms
@@ -2034,8 +2030,7 @@ protected:
 				tlen,       // length of ref
 				pairFw,     // whether the pair is being mapped to fw strand
 				ebwtLFw,
-				ebwtRFw,
-				range.ebwt->rmap())) return true;
+				ebwtRFw)) return true;
 		}
 		return false;
 	}
diff --git a/annot.cpp b/annot.cpp
deleted file mode 100644
index fd88c7e..0000000
--- a/annot.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * annot.cpp
- *
- *  Created on: Aug 3, 2009
- *      Author: Ben Langmead
- */
-
-#include <stdexcept>
-#include "annot.h"
-
-using namespace std;
-
-/**
- * Parse an annotation-map file.
- */
-void AnnotationMap::parse() {
-	ifstream in(fname_);
-	if(!in.good() && in.is_open()) {
-		cerr << "Could not open annotation file " << fname_ << endl;
-		throw 1;
-	}
-	while(in.peek() != EOF) {
-		UPair pos;
-		CharPair an;
-		in >> pos.first >> pos.second >> an.first >> an.second;
-		map_[pos] = an;
-		while(isspace(in.peek())) in.get();
-	}
-	in.close();
-}
diff --git a/annot.h b/annot.h
deleted file mode 100644
index ab2c79a..0000000
--- a/annot.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * annot.h
- *
- *  Created on: Aug 3, 2009
- *      Author: Ben Langmead
- */
-
-#ifndef ANNOT_H_
-#define ANNOT_H_
-
-#include <stdint.h>
-#include <map>
-#include <iostream>
-#include <fstream>
-#include "btypes.h"
-
-/**
- * Encapsulates a sorted list of reference positions that are annotated
- * somehow (e.g. as a SNP).
- */
-class AnnotationMap {
-public:
-	typedef std::pair<TIndexOffU, TIndexOffU> UPair;
-	typedef std::pair<char, char> CharPair;
-	typedef std::map<UPair, CharPair> AnnotMap;
-	typedef std::map<UPair, CharPair>::const_iterator Iter;
-
-	AnnotationMap(const char *fname) {
-		fname_ = fname;
-		parse();
-	}
-
-	/**
-	 * Give a reference coordinate in the index, translate it into a
-	 * new reference coordinate via the reference map supplied by the
-	 * user.
-	 */
-	Iter lower_bound(const UPair& h) const {
-		return map_.lower_bound(h);
-	}
-
-	Iter begin() const {
-		return map_.begin();
-	}
-
-	Iter end() const {
-		return map_.end();
-	}
-
-protected:
-
-	/**
-	 * Parse an annotation-map file.
-	 */
-	void parse();
-
-	/// filename of file containing the annotation map
-	const char *fname_;
-	/// maps reference positions to character annotations
-	AnnotMap map_;
-};
-
-#endif /* ANNOT_H_ */
diff --git a/bitset.h b/bitset.h
index 569a33d..c062b0b 100644
--- a/bitset.h
+++ b/bitset.h
@@ -8,6 +8,7 @@
 #include <stdexcept>
 #include "assert_helpers.h"
 #include "threading.h"
+#include "btypes.h"
 
 /**
  * Given a words array and a size, allocate a new, larger array, moving
@@ -15,20 +16,20 @@
  * words to 0.  Return the new, larger array, which can be substituted
  * for the old one.  The new array is larger than the old by about 50%.
  */
-static inline uint32_t*
-bitsetRealloc(uint32_t& sz, uint32_t* words, const char *errmsg = NULL) {
-	uint32_t oldsz = sz;
+static inline TIndexOffU*
+bitsetRealloc(TIndexOffU& sz, TIndexOffU* words, const char *errmsg = NULL) {
+	TIndexOffU oldsz = sz;
 	if(sz > 0) {
-		sz += (sz >> 1) + 31; // Add 50% more elements, plus a bit
-		sz &= ~31;            // Make sure it's 32-aligned
+		sz += (sz >> 1) + BITSET_MASK; // Add 50% more elements, plus a bit
+		sz &= ~BITSET_MASK;            // Make sure it's 32-aligned
 	} else {
 		sz = 1024; // Start off at 1024 bits to avoid many expansions
 	}
 	assert_gt(sz, oldsz);
-	assert_eq(0, (sz & 31));
-	uint32_t *newwords;
+	assert_eq(0, (sz & BITSET_MASK));
+	TIndexOffU *newwords;
 	try {
-		newwords = new uint32_t[sz >> 5 /* convert to words */];
+		newwords = new TIndexOffU[sz / WORD_SIZE /* convert to words */];
 	} catch(std::bad_alloc& ba) {
 		if(errmsg != NULL) {
 			// Output given error message
@@ -41,7 +42,7 @@ bitsetRealloc(uint32_t& sz, uint32_t* words, const char *errmsg = NULL) {
 		memcpy(newwords, words, oldsz >> 3 /* convert to bytes */);
 	}
 	// Initialize all new words to 0
-	memset(newwords + (oldsz >> 5 /*convert to words*/), 0,
+	memset(newwords + (oldsz / WORD_SIZE /*convert to words*/), 0,
 	       (sz - oldsz) >> 3 /* convert to bytes */);
 	return newwords; // return new array
 }
@@ -56,10 +57,10 @@ public:
 	 * Allocate enough words to accommodate 'sz' bits.  Output the given
 	 * error message and quit if allocation fails.
 	 */
-	SyncBitset(uint32_t sz, const char *errmsg = NULL) : _errmsg(errmsg) {
-		uint32_t nwords = (sz >> 5)+1; // divide by 32 and add 1
+	SyncBitset(TIndexOffU sz, const char *errmsg = NULL) : _errmsg(errmsg) {
+		TIndexOffU nwords = (sz / WORD_SIZE)+1; // divide by 32 and add 1
 		try {
-			_words = new uint32_t[nwords];
+			_words = new TIndexOffU[nwords];
 		} catch(std::bad_alloc& ba) {
 			if(_errmsg != NULL) {
 				std::cerr << _errmsg;
@@ -67,8 +68,8 @@ public:
 			throw 1;
 		}
 		assert(_words != NULL);
-		memset(_words, 0, nwords * 4 /* words to bytes */);
-		_sz = nwords << 5 /* words to bits */;
+		memset(_words, 0, nwords * OFF_SIZE /* words to bytes */);
+		_sz = nwords * WORD_SIZE /* words to bits */;
 	}
 
 	/**
@@ -81,9 +82,9 @@ public:
 	/**
 	 * Test whether the given bit is set in an unsynchronized manner.
 	 */
-	bool testUnsync(uint32_t i) {
+	bool testUnsync(TIndexOffU i) {
 		if(i < _sz) {
-			return ((_words[i >> 5] >> (i & 0x1f)) & 1) != 0;
+			return ((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) != 0;
 		}
 		return false;
 	}
@@ -91,7 +92,7 @@ public:
 	/**
 	 * Test whether the given bit is set in a synchronized manner.
 	 */
-	bool test(uint32_t i) {
+	bool test(TIndexOffU i) {
 		bool ret;
 		ThreadSafe _ts(&mutex_m);
 		ret = testUnsync(i);
@@ -102,39 +103,39 @@ public:
 	 * Set a bit in the vector that hasn't been set before.  Assert if
 	 * it has been set.  Uses synchronization.
 	 */
-	void set(uint32_t i) {
+	void set(TIndexOffU i) {
 		ThreadSafe _ts(&mutex_m);
 		while(i >= _sz) {
 			// Slow path: bitset needs to be expanded before the
 			// specified bit can be set
-			ASSERT_ONLY(uint32_t oldsz = _sz);
+			ASSERT_ONLY(TIndexOffU oldsz = _sz);
 			expand();
 			assert_gt(_sz, oldsz);
 		}
 		// Fast path
 		assert_lt(i, _sz);
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 0);
-		_words[i >> 5] |= (1 << (i & 0x1f));
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 0);
+		_words[i / WORD_SIZE] |= ((TIndexOffU)1 << (i & BITSET_MASK));
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
 	}
 
 	/**
 	 * Set a bit in the vector that might have already been set.  Uses
 	 * synchronization.
 	 */
-	void setOver(uint32_t i) {
+	void setOver(TIndexOffU i) {
 		ThreadSafe _ts(&mutex_m);
 		while(i >= _sz) {
 			// Slow path: bitset needs to be expanded before the
 			// specified bit can be set
-			ASSERT_ONLY(uint32_t oldsz = _sz);
+			ASSERT_ONLY(TIndexOffU oldsz = _sz);
 			expand();
 			assert_gt(_sz, oldsz);
 		}
 		// Fast path
 		assert_lt(i, _sz);
-		_words[i >> 5] |= (1 << (i & 0x1f));
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
+		_words[i / WORD_SIZE] |= ((TIndexOffU)1 << (i & BITSET_MASK));
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
 	}
 
 
@@ -145,15 +146,15 @@ private:
 	 * bits.
 	 */
 	void expand() {
-		uint32_t *newwords = bitsetRealloc(_sz, _words, _errmsg);
+		TIndexOffU *newwords = bitsetRealloc(_sz, _words, _errmsg);
 		delete[] _words;   // delete old array
 		_words = newwords; // install new array
 	}
 
 	const char *_errmsg; // error message if an allocation fails
-	uint32_t _sz;        // size as # of bits
+	TIndexOffU _sz;        // size as # of bits
 	MUTEX_T mutex_m;       // mutex
-	uint32_t *_words;    // storage
+	TIndexOffU *_words;    // storage
 };
 
 /**
@@ -162,10 +163,10 @@ private:
 class Bitset {
 
 public:
-	Bitset(uint32_t sz, const char *errmsg = NULL) : _errmsg(errmsg) {
-		uint32_t nwords = (sz >> 5)+1;
+	Bitset(TIndexOffU sz, const char *errmsg = NULL) : _errmsg(errmsg) {
+		TIndexOffU nwords = (sz / WORD_SIZE)+1;
 		try {
-			_words = new uint32_t[nwords];
+			_words = new TIndexOffU[nwords];
 		} catch(std::bad_alloc& ba) {
 			if(_errmsg != NULL) {
 				std::cerr << _errmsg;
@@ -173,8 +174,8 @@ public:
 			throw 1;
 		}
 		assert(_words != NULL);
-		memset(_words, 0, nwords * 4);
-		_sz = nwords << 5;
+		memset(_words, 0, nwords * OFF_SIZE);
+		_sz = nwords * WORD_SIZE;
 		_cnt = 0;
 	}
 
@@ -189,10 +190,10 @@ public:
 	/**
 	 * Test whether the given bit is set.
 	 */
-	bool test(uint32_t i) const {
+	bool test(TIndexOffU i) const {
 		bool ret = false;
 		if(i < _sz) {
-			ret = ((_words[i >> 5] >> (i & 0x1f)) & 1) != 0;
+			ret = ((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) != 0;
 		}
 		return ret;
 	}
@@ -201,43 +202,43 @@ public:
 	 * Set a bit in the vector that hasn't been set before.  Assert if
 	 * it has been set.
 	 */
-	void set(uint32_t i) {
+	void set(TIndexOffU i) {
 		while(i >= _sz) {
 			// Slow path: bitset needs to be expanded before the
 			// specified bit can be set
-			ASSERT_ONLY(uint32_t oldsz = _sz);
+			ASSERT_ONLY(TIndexOffU oldsz = _sz);
 			expand();
 			assert_gt(_sz, oldsz);
 		}
 		// Fast path
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 0);
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 0);
 		_cnt++;
-		_words[i >> 5] |= (1 << (i & 0x1f));
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
+		_words[i / WORD_SIZE] |= ((TIndexOffU)1 << (i & BITSET_MASK));
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
 	}
 
 	/**
 	 * Set a bit in the vector that might have already been set.
 	 */
-	void setOver(uint32_t i) {
+	void setOver(TIndexOffU i) {
 		while(i >= _sz) {
 			// Slow path: bitset needs to be expanded before the
 			// specified bit can be set
-			ASSERT_ONLY(uint32_t oldsz = _sz);
+			ASSERT_ONLY(TIndexOffU oldsz = _sz);
 			expand();
 			assert_gt(_sz, oldsz);
 		}
 		// Fast path
-		if(((_words[i >> 5] >> (i & 0x1f)) & 1) == 0) _cnt++;
-		_words[i >> 5] |= (1 << (i & 0x1f));
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
+		if(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 0) _cnt++;
+		_words[i / WORD_SIZE] |= ((TIndexOffU)1 << (i & BITSET_MASK));
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
 	}
 
 	/**
 	 * Unset all entries.  Don't adjust size.
 	 */
 	void clear() {
-		for(size_t i = 0; i < ((_sz+31)>>5); i++) {
+		for(size_t i = 0; i < ((_sz+BITSET_MASK) / WORD_SIZE); i++) {
 			_words[i] = 0;
 		}
 		_cnt = 0;
@@ -246,7 +247,7 @@ public:
 	/**
 	 * Return the number of set bits.
 	 */
-	uint32_t count() const {
+	TIndexOffU count() const {
 		return _cnt;
 	}
 
@@ -265,8 +266,8 @@ public:
 		_sz = o._sz;
 		_cnt = o._cnt;
 		if(_words != NULL) delete[] _words;
-		_words = new uint32_t[(_sz+31)>>5];
-		for(size_t i = 0; i < (_sz+31)>>5; i++) {
+		_words = new TIndexOffU[(_sz+BITSET_MASK) / WORD_SIZE];
+		for(size_t i = 0; i < (_sz+BITSET_MASK) / WORD_SIZE; i++) {
 			_words[i] = o._words[i];
 		}
 		return *this;
@@ -279,15 +280,15 @@ private:
 	 * bits.
 	 */
 	void expand() {
-		uint32_t *newwords = bitsetRealloc(_sz, _words, _errmsg);
+		TIndexOffU *newwords = bitsetRealloc(_sz, _words, _errmsg);
 		delete[] _words;   // delete old array
 		_words = newwords; // install new array
 	}
 
-	uint32_t _cnt;       // number of set bits
+	TIndexOffU _cnt;       // number of set bits
 	const char *_errmsg; // error message if an allocation fails
-	uint32_t _sz;        // size as # of bits
-	uint32_t *_words;    // storage
+	TIndexOffU _sz;        // size as # of bits
+	TIndexOffU *_words;    // storage
 };
 
 /**
@@ -298,65 +299,65 @@ class FixedBitset {
 
 public:
 	FixedBitset() : _cnt(0), _size(0) {
-		memset(_words, 0, ((LEN>>5)+1) * 4);
+		memset(_words, 0, ((LEN / WORD_SIZE)+1) * OFF_SIZE);
 	}
 
 	/**
 	 * Unset all bits.
 	 */
 	void clear() {
-		memset(_words, 0, ((LEN>>5)+1) * 4);
+		memset(_words, 0, ((LEN / WORD_SIZE)+1) * OFF_SIZE);
 	}
 
 	/**
 	 * Return true iff the bit at offset i has been set.
 	 */
-	bool test(uint32_t i) const {
+	bool test(TIndexOffU i) const {
 		bool ret = false;
 		assert_lt(i, LEN);
-		ret = ((_words[i >> 5] >> (i & 0x1f)) & 1) != 0;
+		ret = ((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) != 0;
 		return ret;
 	}
 
 	/**
 	 * Set the bit at offset i.  Assert if the bit was already set.
 	 */
-	void set(uint32_t i) {
+	void set(TIndexOffU i) {
 		// Fast path
 		assert_lt(i, LEN);
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 0);
-		_words[i >> 5] |= (1 << (i & 0x1f));
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 0);
+		_words[i / WORD_SIZE] |= ((TIndexOffU)1 << (i & BITSET_MASK));
 		_cnt++;
 		if(i >= _size) {
 			_size = i+1;
 		}
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
 	}
 
 	/**
 	 * Set the bit at offset i.  Do not assert if the bit was already
 	 * set.
 	 */
-	void setOver(uint32_t i) {
+	void setOver(TIndexOffU i) {
 		// Fast path
 		assert_lt(i, LEN);
-		_words[i >> 5] |= (1 << (i & 0x1f));
+		_words[i / WORD_SIZE] |= ((TIndexOffU)1 << (i & BITSET_MASK));
 		_cnt++;
 		if(i >= _size) {
 			_size = i+1;
 		}
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
 	}
 
-	uint32_t count() const { return _cnt; }
-	uint32_t size() const  { return _size; }
+	TIndexOffU count() const { return _cnt; }
+	TIndexOffU size() const  { return _size; }
 
 	/**
 	 * Return true iff this FixedBitset has the same bits set as
 	 * FixedBitset 'that'.
 	 */
 	bool operator== (const FixedBitset<LEN>& that) const {
-		for(uint32_t i = 0; i < (LEN>>5)+1; i++) {
+		for(TIndexOffU i = 0; i < (LEN / WORD_SIZE)+1; i++) {
 			if(_words[i] != that._words[i]) {
 				return false;
 			}
@@ -369,7 +370,7 @@ public:
 	 * as FixedBitset 'that'.
 	 */
 	bool operator!= (const FixedBitset<LEN>& that) const {
-		for(uint32_t i = 0; i < (LEN>>5)+1; i++) {
+		for(TIndexOffU i = 0; i < (LEN / WORD_SIZE)+1; i++) {
 			if(_words[i] != that._words[i]) {
 				return true;
 			}
@@ -389,9 +390,9 @@ public:
 	}
 
 private:
-	uint32_t _cnt;
-	uint32_t _size;
-	uint32_t _words[(LEN>>5)+1]; // storage
+	TIndexOffU _cnt;
+	TIndexOffU _size;
+	TIndexOffU _words[(LEN / WORD_SIZE)+1]; // storage
 };
 
 /**
@@ -400,9 +401,9 @@ private:
 class FixedBitset2 {
 
 public:
-	FixedBitset2(uint32_t len) : len_(len), _cnt(0), _size(0) {
-		_words = new uint32_t[((len_ >> 5)+1)];
-		memset(_words, 0, ((len_ >> 5)+1) * 4);
+	FixedBitset2(TIndexOffU len) : len_(len), _cnt(0), _size(0) {
+		_words = new TIndexOffU[((len_ / WORD_SIZE)+1)];
+		memset(_words, 0, ((len_ / WORD_SIZE)+1) * OFF_SIZE);
 	}
 
 	~FixedBitset2() { delete[] _words; }
@@ -411,7 +412,7 @@ public:
 	 * Unset all bits.
 	 */
 	void clear() {
-		memset(_words, 0, ((len_ >> 5)+1) * 4);
+		memset(_words, 0, ((len_ / WORD_SIZE)+1) * OFF_SIZE);
 		_cnt = 0;
 		_size = 0;
 	}
@@ -419,69 +420,69 @@ public:
 	/**
 	 * Return true iff the bit at offset i has been set.
 	 */
-	bool test(uint32_t i) const {
+	bool test(TIndexOffU i) const {
 		bool ret = false;
 		assert_lt(i, len_);
-		ret = ((_words[i >> 5] >> (i & 0x1f)) & 1) != 0;
+		ret = ((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) != 0;
 		return ret;
 	}
 
 	/**
 	 * Set the bit at offset i.  Assert if the bit was already set.
 	 */
-	void set(uint32_t i) {
+	void set(TIndexOffU i) {
 		// Fast path
 		assert_lt(i, len_);
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 0);
-		_words[i >> 5] |= (1 << (i & 0x1f));
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 0);
+		_words[i / WORD_SIZE] |= ((TIndexOffU)1 << (i & BITSET_MASK));
 		_cnt++;
 		if(i >= _size) {
 			_size = i+1;
 		}
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
 	}
 
 	/**
 	 * Clear the bit at offset i.  Assert if the bit was not already set.
 	 */
-	void clear(uint32_t i) {
+	void clear(TIndexOffU i) {
 		// Fast path
 		assert_lt(i, len_);
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
-		_words[i >> 5] &= ~(1 << (i & 0x1f));
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
+		_words[i / WORD_SIZE] &= ~((TIndexOffU)1 << (i & BITSET_MASK));
 		_cnt--;
 		if(i >= _size) {
 			_size = i+1;
 		}
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 0);
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 0);
 	}
 
 	/**
 	 * Set the bit at offset i.  Do not assert if the bit was already
 	 * set.
 	 */
-	void setOver(uint32_t i) {
+	void setOver(TIndexOffU i) {
 		// Fast path
 		assert_lt(i, len_);
-		if(((_words[i >> 5] >> (i & 0x1f)) & 1) == 0) {
-			_words[i >> 5] |= (1 << (i & 0x1f));
+		if(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 0) {
+			_words[i / WORD_SIZE] |= ((TIndexOffU)1 << (i & BITSET_MASK));
 			_cnt++;
 		}
 		if(i >= _size) {
 			_size = i+1;
 		}
-		assert(((_words[i >> 5] >> (i & 0x1f)) & 1) == 1);
+		assert(((_words[i / WORD_SIZE] >> (i & BITSET_MASK)) & 1) == 1);
 	}
 
-	uint32_t count() const { return _cnt; }
-	uint32_t size() const  { return _size; }
+	TIndexOffU count() const { return _cnt; }
+	TIndexOffU size() const  { return _size; }
 
 	/**
 	 * Return true iff this FixedBitset has the same bits set as
 	 * FixedBitset 'that'.
 	 */
 	bool operator== (const FixedBitset2& that) const {
-		for(uint32_t i = 0; i < (len_>>5)+1; i++) {
+		for(TIndexOffU i = 0; i < (len_ / WORD_SIZE)+1; i++) {
 			if(_words[i] != that._words[i]) {
 				return false;
 			}
@@ -494,7 +495,7 @@ public:
 	 * as FixedBitset 'that'.
 	 */
 	bool operator!= (const FixedBitset2& that) const {
-		for(uint32_t i = 0; i < (len_>>5)+1; i++) {
+		for(TIndexOffU i = 0; i < (len_ / WORD_SIZE)+1; i++) {
 			if(_words[i] != that._words[i]) {
 				return true;
 			}
@@ -514,10 +515,10 @@ public:
 	}
 
 private:
-	const uint32_t len_;
-	uint32_t _cnt;
-	uint32_t _size;
-	uint32_t *_words; // storage
+	const TIndexOffU len_;
+	TIndexOffU _cnt;
+	TIndexOffU _size;
+	TIndexOffU *_words; // storage
 };
 
 #endif /* BITSET_H_ */
diff --git a/blockwise_sa.h b/blockwise_sa.h
index 2ca7b7c..ce3b903 100644
--- a/blockwise_sa.h
+++ b/blockwise_sa.h
@@ -330,12 +330,7 @@ class KarkkainenBlockwiseSA : public InorderBlockwiseSA<TStr> {
 					_cur++;
 				} else {
 					while(!_done[this->_itrBucketIdx]) {
-#if defined(_TTHREAD_WIN32_)
-						Sleep(1);
-#else
-						const static timespec ts = {0, 1000000};  // 1 millisecond
-						nanosleep(&ts, NULL);
-#endif
+						SLEEP(1);
 					}
 					// Read suffixes from a file
 					std::ostringstream number; number << this->_itrBucketIdx;
diff --git a/bowtie_inspect.cpp b/bowtie_inspect.cpp
index 908c300..3e1eb88 100644
--- a/bowtie_inspect.cpp
+++ b/bowtie_inspect.cpp
@@ -373,7 +373,6 @@ void print_index_summary(
 		false,                // sweep memory-mapped memory
 		true,                 // load names?
 		//false,                // load SA sample?
-		NULL,                 // no reference map
 		verbose,              // be talkative?
 		verbose,              // be talkative at startup?
 		false,                // pass up memory exceptions?
@@ -450,7 +449,6 @@ static void driver(
 			false,                // sweep memory-mapped memory
 			true,                 // load names?
 			//true,                 // load SA sample?
-			NULL,                 // no reference map
 			verbose,              // be talkative?
 			verbose,              // be talkative at startup?
 			false,                // pass up memory exceptions?
diff --git a/btypes.h b/btypes.h
index 564b6b1..bebcf46 100644
--- a/btypes.h
+++ b/btypes.h
@@ -1,5 +1,3 @@
-
-
 #ifndef BOWTIE_INDEX_TYPES_H
 #define	BOWTIE_INDEX_TYPES_H
 
@@ -9,6 +7,8 @@
 #define LS_SIZE 0x100000000000000
 #define OFF_SIZE 8
 #define CACHE_WRAPPER_BIT 0x8000000000000000
+#define WORD_SIZE 64
+#define BITSET_MASK 0x000000000000003f
 
 typedef uint64_t TIndexOffU;
 typedef int64_t TIndexOff;
@@ -19,6 +19,8 @@ typedef int64_t TIndexOff;
 #define LS_SIZE 0x10000000
 #define OFF_SIZE 4
 #define CACHE_WRAPPER_BIT 0x80000000
+#define WORD_SIZE 32
+#define BITSET_MASK 0x0000001f
 
 typedef uint32_t TIndexOffU;
 typedef int TIndexOff;
@@ -28,4 +30,3 @@ typedef int TIndexOff;
 extern const std::string gEbwt_ext;
 
 #endif	/* BOWTIE_INDEX_TYPES_H */
-
diff --git a/doc/website/recent_news.ssi b/doc/website/recent_news.ssi
index c9f561a..8904cb6 100644
--- a/doc/website/recent_news.ssi
+++ b/doc/website/recent_news.ssi
@@ -5,7 +5,8 @@
 
 <h2>1.2.1 - 06/12/2017</h2>
 <p>Please note that Bowtie will be switching to the Artistic 2.0 license in the next release.<br/>
-Pre-build binaries now include statically linked TBB and zlib libraries no longer requiring</p>
+Pre-build binaries now include statically linked TBB and zlib libraries no longer requiring that
+these libraries be pre-installed.</p>
 <ul>
     <li>Fixed an issue which caused Bowtie to hang during parallell index building when running an optimized binary</li>
     <li>Deprecated <tt>--refout</tt> option.  It will be fully removed in the next release</li>
diff --git a/ebwt.h b/ebwt.h
index 0f8877f..fe6300c 100644
--- a/ebwt.h
+++ b/ebwt.h
@@ -35,7 +35,6 @@
 #include "str_util.h"
 #include "mm.h"
 #include "timer.h"
-#include "refmap.h"
 #include "color_dec.h"
 #include "reference.h"
 
@@ -378,7 +377,6 @@ public:
 	    _useMm(false), \
 	    useShmem_(false), \
 	    _refnames(), \
-	    rmap_(NULL), \
 	    mmFile1_(NULL), \
 	    mmFile2_(NULL)
 
@@ -404,7 +402,6 @@ public:
 	     bool useShmem = false,
 	     bool mmSweep = false,
 	     bool loadNames = false,
-	     const ReferenceMap* rmap = NULL,
 	     bool verbose = false,
 	     bool startVerbose = false,
 	     bool passMemExc = false,
@@ -417,7 +414,6 @@ public:
         ProcessorSupport ps; 
         _usePOPCNTinstruction = ps.POPCNTenabled(); 
 #endif 
-		rmap_ = rmap;
 		_useMm = useMm;
 		useShmem_ = useShmem;
 		_in1Str = in + ".1." + gEbwt_ext;
@@ -870,7 +866,6 @@ public:
 	TIndexOffU*   plen() const         { return _plen; }
 	TIndexOffU*   rstarts() const      { return _rstarts; }
 	uint8_t*    ebwt() const         { return _ebwt; }
-	const ReferenceMap* rmap() const { return rmap_; }
 	bool        toBe() const         { return _toBigEndian; }
 	bool        verbose() const      { return _verbose; }
 	bool        sanityCheck() const  { return _sanity; }
@@ -1223,7 +1218,6 @@ public:
 	bool       _useMm;        /// use memory-mapped files to hold the index
 	bool       useShmem_;     /// use shared memory to hold large parts of the index
 	vector<string> _refnames; /// names of the reference sequences
-	const ReferenceMap* rmap_; /// mapping into another reference coordinate space
 	char *mmFile1_;
 	char *mmFile2_;
 	EbwtParams _eh;
@@ -1301,7 +1295,6 @@ public:
 	               bool colExEnds,      // true -> exclude nucleotides at extreme ends after decoding
 	               int snpPhred,        // penalty for a SNP
 	               const BitPairReference* ref, // reference (= NULL if not necessary)
-	               const ReferenceMap* rmap, // map to another reference coordinate system
 	               bool ebwtFw,         // whether index is forward (true) or mirror (false)
 	               const std::vector<TIndexOffU>& mmui32, // mismatch list
 	               const std::vector<uint8_t>& refcs,  // reference characters
@@ -1529,7 +1522,6 @@ public:
 			if(diffs != hit.mms) assert(false);
 		}
 		hit.h = h;
-		if(rmap != NULL) rmap->map(hit.h);
 		hit.patId = ((patid == 0xffffffff) ? _patid : patid);
 		hit.patName = *name;
 		hit.mh = mh;
@@ -2689,7 +2681,6 @@ inline bool Ebwt<TStr>::report(const String<Dna5>& query,
 			colExEnds,                // true -> exclude nucleotides on ends
 			snpPhred,                 // phred probability of SNP
 			ref,                      // reference sequence
-			rmap_,                    // map to another reference coordinate system
 			_fw,                      // true = index is forward; false = mirror
 			mmui32,                   // mismatch positions
 			refcs,                    // reference characters for mms
diff --git a/ebwt_search.cpp b/ebwt_search.cpp
index f85d279..f8f79f5 100644
--- a/ebwt_search.cpp
+++ b/ebwt_search.cpp
@@ -9,6 +9,12 @@
 #include <getopt.h>
 #include <vector>
 #include <time.h>
+
+#ifndef _WIN32
+#include <dirent.h>
+#include <signal.h>
+#endif
+
 #include "alphabet.h"
 #include "assert_helpers.h"
 #include "endian_swap.h"
@@ -21,8 +27,6 @@
 #include "bitset.h"
 #include "threading.h"
 #include "range_cache.h"
-#include "refmap.h"
-#include "annot.h"
 #include "aligner.h"
 #include "aligner_0mm.h"
 #include "aligner_1mm.h"
@@ -35,8 +39,13 @@
 #include <CHUD/CHUD.h>
 #endif
 
+static int FNAME_SIZE;
 #ifdef WITH_TBB
- #include <tbb/compat/thread>
+#include <tbb/compat/thread>
+static tbb::atomic<int> thread_counter;
+#else
+static int thread_counter;
+static MUTEX_T thread_counter_mutex;
 #endif
 
 using namespace std;
@@ -65,7 +74,6 @@ static int reportOpps;    // whether to report # of other mappings
 static int offRate;       // keep default offRate
 static int isaRate;       // keep default isaRate
 static int mismatches;    // allow 0 mismatches by default
-static char *patDumpfile; // filename to dump patterns to
 static bool solexaQuals;  // quality strings are solexa quals, not phred, and subtract 64 (not 33)
 static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33)
 static bool integerQuals; // quality strings are space-separated strings of integers, not ASCII
@@ -76,10 +84,11 @@ static int qualThresh;    // max qual-weighted hamming dist (maq's -e)
 static int maxBtsBetter;  // max # backtracks allowed in half-and-half mode
 static int maxBts;        // max # backtracks allowed in half-and-half mode
 static int nthreads;      // number of pthreads operating concurrently
+static bool reorder;      // reorder SAM output when running multi-threaded
+static int thread_ceiling;// maximum number of threads user wants bowtie to use
+static string thread_stealing_dir; // keep track of pids in this directory
+static bool thread_stealing;// true iff thread stealing is in use
 static output_types outType;  // style of output
-static bool randReadsNoSync;  // true -> generate reads from per-thread random source
-static int numRandomReads;    // # random reads (see Random*PatternSource in pat.h)
-static int lenRandomReads;    // len of random reads (see Random*PatternSource in pat.h)
 static bool noRefNames;       // true -> print reference indexes; not names
 static string dumpAlBase;     // basename of same-format files to dump aligned reads to
 static string dumpUnalBase;   // basename of same-format files to dump unaligned reads to
@@ -88,10 +97,9 @@ static uint32_t khits;  // number of hits per read; >1 is much slower
 static uint32_t mhits;  // don't report any hits if there are > mhits
 static bool better;     // true -> guarantee alignments from best possible stratum
 static bool strata;     // true -> don't stop at stratum boundaries
-static bool refOut;     // if true, alignments go to per-ref files
 static int partitionSz; // output a partitioning key in first field
 static int readsPerBatch; // # reads to read from input file at once
-static int outBatchSz; // # alignments to write to output file at once
+static size_t outBatchSz; // # alignments to write to output file at once
 static bool noMaqRound; // true -> don't round quals to nearest 10 like maq
 static bool fileParallel; // separate threads read separate input files in parallel
 static bool useShmem;     // use shared memory to hold the index
@@ -121,8 +129,6 @@ static int chunkSz;    // size of single chunk disbursed by ChunkPool
 static bool chunkVerbose; // have chunk allocator output status messages?
 static bool useV1;
 static bool reportSe;
-static const char * refMapFile;  // file containing a map from index coordinates to another coordinate system
-static const char * annotMapFile;  // file containing a map from reference coordinates to annotations
 static size_t fastaContLen;
 static size_t fastaContFreq;
 static bool hadoopOut; // print Hadoop status and summary messages
@@ -174,7 +180,6 @@ static void resetOptions() {
 	offRate					= -1; // keep default offRate
 	isaRate					= -1; // keep default isaRate
 	mismatches				= 0; // allow 0 mismatches by default
-	patDumpfile				= NULL; // filename to dump patterns to
 	solexaQuals				= false; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
 	phred64Quals			= false; // quality chars are phred, but must subtract 64 (not 33)
 	integerQuals			= false; // quality strings are space-separated strings of integers, not ASCII
@@ -185,10 +190,12 @@ static void resetOptions() {
 	maxBtsBetter			= 125; // max # backtracks allowed in half-and-half mode
 	maxBts					= 800; // max # backtracks allowed in half-and-half mode
 	nthreads				= 1;     // number of pthreads operating concurrently
+    reorder                 = false; // reorder SAM output
+	thread_ceiling			= 0;     // max # threads user asked for
+	thread_stealing_dir		= ""; // keep track of pids in this directory
+	thread_stealing			= false; // true iff thread stealing is in use
+	FNAME_SIZE				= 200;
 	outType					= OUTPUT_FULL;  // style of output
-	randReadsNoSync			= false; // true -> generate reads from per-thread random source
-	numRandomReads			= 50000000; // # random reads (see Random*PatternSource in pat.h)
-	lenRandomReads			= 35;    // len of random reads (see Random*PatternSource in pat.h)
 	noRefNames				= false; // true -> print reference indexes; not names
 	dumpAlBase				= "";    // basename of same-format files to dump aligned reads to
 	dumpUnalBase			= "";    // basename of same-format files to dump unaligned reads to
@@ -197,7 +204,6 @@ static void resetOptions() {
 	mhits					= 0xffffffff; // don't report any hits if there are > mhits
 	better					= false; // true -> guarantee alignments from best possible stratum
 	strata					= false; // true -> don't stop at stratum boundaries
-	refOut					= false; // if true, alignments go to per-ref files
 	partitionSz				= 0;     // output a partitioning key in first field
 	readsPerBatch			= 16;    // # reads to read from input file at once
 	outBatchSz				= 16;    // # alignments to wrote to output file at once
@@ -230,8 +236,6 @@ static void resetOptions() {
 	chunkVerbose			= false; // have chunk allocator output status messages?
 	useV1					= true;
 	reportSe				= false;
-	refMapFile				= NULL;  // file containing a map from index coordinates to another coordinate system
-	annotMapFile			= NULL;  // file containing a map from reference coordinates to annotations
 	fastaContLen			= 0;
 	fastaContFreq			= 0;
 	hadoopOut				= false; // print Hadoop status and summary messages
@@ -266,15 +270,12 @@ static const char *short_options = "fF:qbzhcu:rv:s:at3:5:o:e:n:l:w:p:k:m:M:1:2:I
 enum {
 	ARG_ORIG = 256,
 	ARG_SEED,
-	ARG_DUMP_PATS,
 	ARG_RANGE,
-	ARG_CONCISE,
 	ARG_SOLEXA_QUALS,
 	ARG_MAXBTS,
 	ARG_VERBOSE,
 	ARG_STARTVERBOSE,
 	ARG_QUIET,
-	ARG_NOOUT,
 	ARG_FAST,
 	ARG_AL,
 	ARG_UN,
@@ -284,7 +285,6 @@ enum {
 	ARG_OLDBEST,
 	ARG_BETTER,
 	ARG_BEST,
-	ARG_REFOUT,
 	ARG_ISARATE,
 	ARG_PARTITION,
 	ARG_READS_PER_BATCH,
@@ -316,8 +316,6 @@ enum {
 	ARG_CHUNKVERBOSE,
 	ARG_STRATA,
 	ARG_PEV2,
-	ARG_REFMAP,
-	ARG_ANNOTMAP,
 	ARG_REPORTSE,
 	ARG_HADOOPOUT,
 	ARG_FUZZY,
@@ -343,113 +341,114 @@ enum {
 	ARG_WRAPPER,
 	ARG_INTERLEAVED_FASTQ,
 	ARG_SAM_NO_UNAL,
+	ARG_THREAD_CEILING,
+	ARG_THREAD_PIDDIR,
+	ARG_REORDER_SAM,
 };
 
 static struct option long_options[] = {
-	{(char*)"verbose",      no_argument,       0,            ARG_VERBOSE},
-	{(char*)"startverbose", no_argument,       0,            ARG_STARTVERBOSE},
-	{(char*)"quiet",        no_argument,       0,            ARG_QUIET},
-	{(char*)"sanity",       no_argument,       0,            ARG_SANITY},
-	{(char*)"pause",        no_argument,       &ipause,      1},
-	{(char*)"orig",         required_argument, 0,            ARG_ORIG},
-	{(char*)"all",          no_argument,       0,            'a'},
-	{(char*)"concise",      no_argument,       0,            ARG_CONCISE},
-	{(char*)"noout",        no_argument,       0,            ARG_NOOUT},
-	{(char*)"solexa-quals", no_argument,       0,            ARG_SOLEXA_QUALS},
-	{(char*)"integer-quals",no_argument,       0,            ARG_integerQuals},
-	{(char*)"time",         no_argument,       0,            't'},
-	{(char*)"trim3",        required_argument, 0,            '3'},
-	{(char*)"trim5",        required_argument, 0,            '5'},
-	{(char*)"seed",         required_argument, 0,            ARG_SEED},
-	{(char*)"qupto",        required_argument, 0,            'u'},
-	{(char*)"al",           required_argument, 0,            ARG_AL},
-	{(char*)"un",           required_argument, 0,            ARG_UN},
-	{(char*)"max",          required_argument, 0,            ARG_MAXDUMP},
-	{(char*)"offrate",      required_argument, 0,            'o'},
-	{(char*)"isarate",      required_argument, 0,            ARG_ISARATE},
-	{(char*)"reportopps",   no_argument,       &reportOpps,  1},
-	{(char*)"version",      no_argument,       &showVersion, 1},
-	{(char*)"reads-per-batch", required_argument, 0,         ARG_READS_PER_BATCH},
-	{(char*)"dumppats",     required_argument, 0,            ARG_DUMP_PATS},
-	{(char*)"maqerr",       required_argument, 0,            'e'},
-	{(char*)"seedlen",      required_argument, 0,            'l'},
-	{(char*)"seedmms",      required_argument, 0,            'n'},
-	{(char*)"filepar",      no_argument,       0,            ARG_FILEPAR},
-	{(char*)"help",         no_argument,       0,            'h'},
-	{(char*)"threads",      required_argument, 0,            'p'},
-	{(char*)"khits",        required_argument, 0,            'k'},
-	{(char*)"mhits",        required_argument, 0,            'm'},
-	{(char*)"minins",       required_argument, 0,            'I'},
-	{(char*)"maxins",       required_argument, 0,            'X'},
-	{(char*)"quals",        required_argument, 0,            'Q'},
-	{(char*)"Q1",           required_argument, 0,            ARG_QUALS1},
-	{(char*)"Q2",           required_argument, 0,            ARG_QUALS2},
-	{(char*)"best",         no_argument,       0,            ARG_BEST},
-	{(char*)"better",       no_argument,       0,            ARG_BETTER},
-	{(char*)"oldbest",      no_argument,       0,            ARG_OLDBEST},
-	{(char*)"strata",       no_argument,       0,            ARG_STRATA},
-	{(char*)"nomaqround",   no_argument,       0,            ARG_NOMAQROUND},
-	{(char*)"refidx",       no_argument,       0,            ARG_REFIDX},
-	{(char*)"range",        no_argument,       0,            ARG_RANGE},
-	{(char*)"maxbts",       required_argument, 0,            ARG_MAXBTS},
-	{(char*)"phased",       no_argument,       0,            'z'},
-	{(char*)"partition",    required_argument, 0,            ARG_PARTITION},
-	{(char*)"stateful",     no_argument,       0,            ARG_STATEFUL},
-	{(char*)"prewidth",     required_argument, 0,            ARG_PREFETCH_WIDTH},
-	{(char*)"ff",           no_argument,       0,            ARG_FF},
-	{(char*)"fr",           no_argument,       0,            ARG_FR},
-	{(char*)"rf",           no_argument,       0,            ARG_RF},
-	{(char*)"mixthresh",    required_argument, 0,            'x'},
-	{(char*)"pairtries",    required_argument, 0,            ARG_MIXED_ATTEMPTS},
-	{(char*)"noreconcile",  no_argument,       0,            ARG_NO_RECONCILE},
-	{(char*)"cachelim",     required_argument, 0,            ARG_CACHE_LIM},
-	{(char*)"cachesz",      required_argument, 0,            ARG_CACHE_SZ},
-	{(char*)"nofw",         no_argument,       0,            ARG_NO_FW},
-	{(char*)"norc",         no_argument,       0,            ARG_NO_RC},
-	{(char*)"offbase",      required_argument, 0,            'B'},
-	{(char*)"tryhard",      no_argument,       0,            'y'},
-	{(char*)"skip",         required_argument, 0,            's'},
-	{(char*)"strandfix",    no_argument,       0,            ARG_STRAND_FIX},
-	{(char*)"stats",        no_argument,       0,            ARG_STATS},
-	{(char*)"12",           required_argument, 0,            ARG_ONETWO},
-	{(char*)"phred33-quals", no_argument,      0,            ARG_PHRED33},
-	{(char*)"phred64-quals", no_argument,      0,            ARG_PHRED64},
-	{(char*)"solexa1.3-quals", no_argument,    0,            ARG_PHRED64},
-	{(char*)"chunkmbs",     required_argument, 0,            ARG_CHUNKMBS},
-	{(char*)"chunksz",      required_argument, 0,            ARG_CHUNKSZ},
-	{(char*)"chunkverbose", no_argument,       0,            ARG_CHUNKVERBOSE},
-	{(char*)"mm",           no_argument,       0,            ARG_MM},
-	{(char*)"shmem",        no_argument,       0,            ARG_SHMEM},
-	{(char*)"mmsweep",      no_argument,       0,            ARG_MMSWEEP},
-	{(char*)"pev2",         no_argument,       0,            ARG_PEV2},
-	{(char*)"refmap",       required_argument, 0,            ARG_REFMAP},
-	{(char*)"annotmap",     required_argument, 0,            ARG_ANNOTMAP},
-	{(char*)"reportse",     no_argument,       0,            ARG_REPORTSE},
-	{(char*)"hadoopout",    no_argument,       0,            ARG_HADOOPOUT},
-	{(char*)"fullref",      no_argument,       0,            ARG_FULLREF},
-	{(char*)"usage",        no_argument,       0,            ARG_USAGE},
-	{(char*)"sam",          no_argument,       0,            'S'},
-	{(char*)"sam-no-qname-trunc", no_argument, 0,            ARG_SAM_NO_QNAME_TRUNC},
-	{(char*)"sam-nohead",   no_argument,       0,            ARG_SAM_NOHEAD},
-	{(char*)"sam-nosq",     no_argument,       0,            ARG_SAM_NOSQ},
-	{(char*)"sam-noSQ",     no_argument,       0,            ARG_SAM_NOSQ},
-	{(char*)"color",        no_argument,       0,            'C'},
-	{(char*)"sam-RG",       required_argument, 0,            ARG_SAM_RG},
-	{(char*)"snpphred",     required_argument, 0,            ARG_SNPPHRED},
-	{(char*)"snpfrac",      required_argument, 0,            ARG_SNPFRAC},
-	{(char*)"suppress",     required_argument, 0,            ARG_SUPPRESS_FIELDS},
-	{(char*)"mapq",         required_argument, 0,            ARG_DEFAULT_MAPQ},
-	{(char*)"col-cseq",     no_argument,       0,            ARG_COLOR_SEQ},
-	{(char*)"col-cqual",    no_argument,       0,            ARG_COLOR_QUAL},
-	{(char*)"col-keepends", no_argument,       0,            ARG_COLOR_KEEP_ENDS},
-	{(char*)"cost",         no_argument,       0,            ARG_COST},
-	{(char*)"showseed",     no_argument,       0,            ARG_SHOWSEED},
-	{(char*)"allow-contain",no_argument,       0,            ARG_ALLOW_CONTAIN},
-	{(char*)"col-primer",   no_argument,       0,            ARG_COLOR_PRIMER},
-	{(char*)"wrapper",      required_argument, 0,            ARG_WRAPPER},
-	{(char*)"interleaved",  required_argument, 0,            ARG_INTERLEAVED_FASTQ},
-	{(char*)"no-unal",      no_argument,       0,            ARG_SAM_NO_UNAL},
-	{(char*)0, 0, 0, 0} // terminator
+{(char*)"verbose",                           no_argument,        0,                    ARG_VERBOSE},
+{(char*)"startverbose",                      no_argument,        0,                    ARG_STARTVERBOSE},
+{(char*)"quiet",                             no_argument,        0,                    ARG_QUIET},
+{(char*)"sanity",                            no_argument,        0,                    ARG_SANITY},
+{(char*)"pause",                             no_argument,        &ipause,              1},
+{(char*)"orig",                              required_argument,  0,                    ARG_ORIG},
+{(char*)"all",                               no_argument,        0,                    'a'},
+{(char*)"solexa-quals",                      no_argument,        0,                    ARG_SOLEXA_QUALS},
+{(char*)"integer-quals",                     no_argument,        0,                    ARG_integerQuals},
+{(char*)"time",                              no_argument,        0,                    't'},
+{(char*)"trim3",                             required_argument,  0,                    '3'},
+{(char*)"trim5",                             required_argument,  0,                    '5'},
+{(char*)"seed",                              required_argument,  0,                    ARG_SEED},
+{(char*)"qupto",                             required_argument,  0,                    'u'},
+{(char*)"al",                                required_argument,  0,                    ARG_AL},
+{(char*)"un",                                required_argument,  0,                    ARG_UN},
+{(char*)"max",                               required_argument,  0,                    ARG_MAXDUMP},
+{(char*)"offrate",                           required_argument,  0,                    'o'},
+{(char*)"isarate",                           required_argument,  0,                    ARG_ISARATE},
+{(char*)"reportopps",                        no_argument,        &reportOpps,          1},
+{(char*)"version",                           no_argument,        &showVersion,         1},
+{(char*)"reads-per-batch",                   required_argument,  0,                    ARG_READS_PER_BATCH},
+{(char*)"maqerr",                            required_argument,  0,                    'e'},
+{(char*)"seedlen",                           required_argument,  0,                    'l'},
+{(char*)"seedmms",                           required_argument,  0,                    'n'},
+{(char*)"filepar",                           no_argument,        0,                    ARG_FILEPAR},
+{(char*)"help",                              no_argument,        0,                    'h'},
+{(char*)"threads",                           required_argument,  0,                    'p'},
+{(char*)"khits",                             required_argument,  0,                    'k'},
+{(char*)"mhits",                             required_argument,  0,                    'm'},
+{(char*)"minins",                            required_argument,  0,                    'I'},
+{(char*)"maxins",                            required_argument,  0,                    'X'},
+{(char*)"quals",                             required_argument,  0,                    'Q'},
+{(char*)"Q1",                                required_argument,  0,                    ARG_QUALS1},
+{(char*)"Q2",                                required_argument,  0,                    ARG_QUALS2},
+{(char*)"best",                              no_argument,        0,                    ARG_BEST},
+{(char*)"better",                            no_argument,        0,                    ARG_BETTER},
+{(char*)"oldbest",                           no_argument,        0,                    ARG_OLDBEST},
+{(char*)"strata",                            no_argument,        0,                    ARG_STRATA},
+{(char*)"nomaqround",                        no_argument,        0,                    ARG_NOMAQROUND},
+{(char*)"refidx",                            no_argument,        0,                    ARG_REFIDX},
+{(char*)"range",                             no_argument,        0,                    ARG_RANGE},
+{(char*)"maxbts",                            required_argument,  0,                    ARG_MAXBTS},
+{(char*)"phased",                            no_argument,        0,                    'z'},
+{(char*)"partition",                         required_argument,  0,                    ARG_PARTITION},
+{(char*)"stateful",                          no_argument,        0,                    ARG_STATEFUL},
+{(char*)"prewidth",                          required_argument,  0,                    ARG_PREFETCH_WIDTH},
+{(char*)"ff",                                no_argument,        0,                    ARG_FF},
+{(char*)"fr",                                no_argument,        0,                    ARG_FR},
+{(char*)"rf",                                no_argument,        0,                    ARG_RF},
+{(char*)"mixthresh",                         required_argument,  0,                    'x'},
+{(char*)"pairtries",                         required_argument,  0,                    ARG_MIXED_ATTEMPTS},
+{(char*)"noreconcile",                       no_argument,        0,                    ARG_NO_RECONCILE},
+{(char*)"cachelim",                          required_argument,  0,                    ARG_CACHE_LIM},
+{(char*)"cachesz",                           required_argument,  0,                    ARG_CACHE_SZ},
+{(char*)"nofw",                              no_argument,        0,                    ARG_NO_FW},
+{(char*)"norc",                              no_argument,        0,                    ARG_NO_RC},
+{(char*)"offbase",                           required_argument,  0,                    'B'},
+{(char*)"tryhard",                           no_argument,        0,                    'y'},
+{(char*)"skip",                              required_argument,  0,                    's'},
+{(char*)"strandfix",                         no_argument,        0,                    ARG_STRAND_FIX},
+{(char*)"stats",                             no_argument,        0,                    ARG_STATS},
+{(char*)"12",                                required_argument,  0,                    ARG_ONETWO},
+{(char*)"phred33-quals",                     no_argument,        0,                    ARG_PHRED33},
+{(char*)"phred64-quals",                     no_argument,        0,                    ARG_PHRED64},
+{(char*)"solexa1.3-quals",                   no_argument,        0,                    ARG_PHRED64},
+{(char*)"chunkmbs",                          required_argument,  0,                    ARG_CHUNKMBS},
+{(char*)"chunksz",                           required_argument,  0,                    ARG_CHUNKSZ},
+{(char*)"chunkverbose",                      no_argument,        0,                    ARG_CHUNKVERBOSE},
+{(char*)"mm",                                no_argument,        0,                    ARG_MM},
+{(char*)"shmem",                             no_argument,        0,                    ARG_SHMEM},
+{(char*)"mmsweep",                           no_argument,        0,                    ARG_MMSWEEP},
+{(char*)"pev2",                              no_argument,        0,                    ARG_PEV2},
+{(char*)"reportse",                          no_argument,        0,                    ARG_REPORTSE},
+{(char*)"hadoopout",                         no_argument,        0,                    ARG_HADOOPOUT},
+{(char*)"fullref",                           no_argument,        0,                    ARG_FULLREF},
+{(char*)"usage",                             no_argument,        0,                    ARG_USAGE},
+{(char*)"sam",                               no_argument,        0,                    'S'},
+{(char*)"sam-no-qname-trunc",                no_argument,        0,                    ARG_SAM_NO_QNAME_TRUNC},
+{(char*)"sam-nohead",                        no_argument,        0,                    ARG_SAM_NOHEAD},
+{(char*)"sam-nosq",                          no_argument,        0,                    ARG_SAM_NOSQ},
+{(char*)"sam-noSQ",                          no_argument,        0,                    ARG_SAM_NOSQ},
+{(char*)"color",                             no_argument,        0,                    'C'},
+{(char*)"sam-RG",                            required_argument,  0,                    ARG_SAM_RG},
+{(char*)"snpphred",                          required_argument,  0,                    ARG_SNPPHRED},
+{(char*)"snpfrac",                           required_argument,  0,                    ARG_SNPFRAC},
+{(char*)"suppress",                          required_argument,  0,                    ARG_SUPPRESS_FIELDS},
+{(char*)"mapq",                              required_argument,  0,                    ARG_DEFAULT_MAPQ},
+{(char*)"col-cseq",                          no_argument,        0,                    ARG_COLOR_SEQ},
+{(char*)"col-cqual",                         no_argument,        0,                    ARG_COLOR_QUAL},
+{(char*)"col-keepends",                      no_argument,        0,                    ARG_COLOR_KEEP_ENDS},
+{(char*)"cost",                              no_argument,        0,                    ARG_COST},
+{(char*)"showseed",                          no_argument,        0,                    ARG_SHOWSEED},
+{(char*)"allow-contain",no_argument,         0,                  ARG_ALLOW_CONTAIN},
+{(char*)"col-primer",                        no_argument,        0,                    ARG_COLOR_PRIMER},
+{(char*)"wrapper",                           required_argument,  0,                    ARG_WRAPPER},
+{(char*)"interleaved",                       required_argument,  0,                    ARG_INTERLEAVED_FASTQ},
+{(char*)"no-unal",                           no_argument,        0,                    ARG_SAM_NO_UNAL},
+{(char*)"thread-ceiling",required_argument,  0,                  ARG_THREAD_CEILING},
+{(char*)"thread-piddir",                     required_argument,  0,                    ARG_THREAD_PIDDIR},
+{(char*)"reorder",                           no_argument,        0,                    ARG_REORDER_SAM},
+{(char*)0,                                   0,                  0,                    0} //  terminator
 };
 
 /**
@@ -554,6 +553,7 @@ static void printUsage(ostream& out) {
 #ifdef BOWTIE_SHARED_MEM
 	    << "  --shmem            use shared mem for index; many 'bowtie's can share" << endl
 #endif
+		<< "  --reorder          force SAM output order to match order of input reads" << endl
 	    << "Other:" << endl
 	    << "  --seed <int>       seed for random number generator" << endl
 	    << "  --verbose          verbose output (for debugging)" << endl
@@ -679,12 +679,7 @@ static void parseOptions(int argc, const char **argv) {
 			case ARG_RF: mate1fw = false; mate2fw = true;  mateFwSet = true; break;
 			case ARG_FR: mate1fw = true;  mate2fw = false; mateFwSet = true; break;
 			case ARG_RANGE: rangeMode = true; break;
-			case ARG_CONCISE: outType = OUTPUT_CONCISE; break;
 			case 'S': outType = OUTPUT_SAM; break;
-			case ARG_REFOUT: refOut = true; break;
-			case ARG_NOOUT: outType = OUTPUT_NONE; break;
-			case ARG_REFMAP: refMapFile = optarg; break;
-			case ARG_ANNOTMAP: annotMapFile = optarg; break;
 			case ARG_SHMEM: useShmem = true; break;
 			case ARG_COLOR_SEQ: colorSeq = true; break;
 			case ARG_COLOR_QUAL: colorQual = true; break;
@@ -795,6 +790,15 @@ static void parseOptions(int argc, const char **argv) {
 			case 'p':
 				nthreads = parseInt(1, "-p/--threads arg must be at least 1");
 				break;
+			case ARG_THREAD_CEILING:
+				thread_ceiling = parseInt(0, "--thread-ceiling must be at least 0");
+				break;
+			case ARG_THREAD_PIDDIR:
+				thread_stealing_dir = optarg;
+				break;
+			case ARG_REORDER_SAM:
+				reorder = true;
+				break;
 			case ARG_FILEPAR:
 				fileParallel = true;
 				break;
@@ -846,7 +850,6 @@ static void parseOptions(int argc, const char **argv) {
 				maxBtsBetter = maxBts;
 				break;
 			}
-			case ARG_DUMP_PATS: patDumpfile = optarg; break;
 			case ARG_STRAND_FIX: strandFix = true; break;
 			case ARG_PARTITION: partitionSz = parse<int>(optarg); break;
 			case ARG_READS_PER_BATCH: {
@@ -876,6 +879,16 @@ static void parseOptions(int argc, const char **argv) {
 				throw 1;
 		}
 	} while(next_option != -1);
+	if (reorder == true) {
+		if (nthreads == 1 && !thread_stealing) {
+			reorder = false;
+		}
+		if (outType != OUTPUT_SAM) {
+			cerr << "Bowtie will attempt to reorder its output only when outputting SAM." << endl
+				<< "Please specify the `-S` parameter if you intend on using this option." << endl;
+			reorder = false;
+		}
+	}
 	//bool paired = mates1.size() > 0 || mates2.size() > 0 || mates12.size() > 0;
 	if(rangeMode) {
 		// Tell the Ebwt loader to ignore the suffix-array portion of
@@ -979,10 +992,6 @@ static void parseOptions(int argc, const char **argv) {
 	if(snpPhred <= 10 && color && !quiet) {
 		cerr << "Warning: the colorspace SNP penalty (--snpphred) is very low: " << snpPhred << endl;
 	}
-	if(outType == OUTPUT_SAM && refOut) {
-		cerr << "Error: --refout cannot be combined with -S/--sam" << endl;
-		throw 1;
-	}
 	if(!mateFwSet) {
 		if(color) {
 			// Set colorspace default (--ff)
@@ -999,6 +1008,14 @@ static void parseOptions(int argc, const char **argv) {
 		cerr << "         --suppress is only available for the default output type." << endl;
 		suppressOuts.clear();
 	}
+	thread_stealing = thread_ceiling > nthreads;
+#ifdef _WIN32
+	thread_stealing = false;
+#endif
+	if(thread_stealing && thread_stealing_dir.empty()) {
+		cerr << "When --thread-ceiling is specified, must also specify --thread-piddir" << endl;
+		throw 1;
+	}
 }
 
 static const char *argv0 = NULL;
@@ -1103,6 +1120,102 @@ createSinkFactory(HitSink& _sink, size_t threadId) {
 	return sink;
 }
 
+void increment_thread_counter() {
+#ifdef WITH_TBB
+	thread_counter.fetch_and_increment();
+#else
+	ThreadSafe ts(&thread_counter_mutex);
+	thread_counter++;
+#endif
+}
+
+void decrement_thread_counter() {
+#ifdef WITH_TBB
+	thread_counter.fetch_and_decrement();
+#else
+	ThreadSafe ts(&thread_counter_mutex);
+	thread_counter--;
+#endif
+}
+
+void del_pid(const char* dirname,int pid) {
+	struct stat finfo;
+	char* fname = (char*) calloc(FNAME_SIZE,sizeof(char));
+	sprintf(fname,"%s/%d",dirname,pid);
+	if(stat( fname, &finfo) != 0) {
+		free(fname);
+		return;
+	}
+	unlink(fname);
+	free(fname);
+} 
+
+//from http://stackoverflow.com/questions/18100097/portable-way-to-check-if-directory-exists-windows-linux-c
+static void write_pid(const char* dirname,int pid) {
+	struct stat dinfo;
+	if(stat(dirname, &dinfo) != 0) {
+		mkdir(dirname,0755);
+	}
+	char* fname = (char*) calloc(FNAME_SIZE,sizeof(char));
+	sprintf(fname,"%s/%d",dirname,pid);
+	FILE* f = fopen(fname,"w");
+	fclose(f);
+	free(fname);
+}
+
+#ifndef _WIN32
+//from  http://stackoverflow.com/questions/612097/how-can-i-get-the-list-of-files-in-a-directory-using-c-or-c
+static int read_dir(const char* dirname,int* num_pids) {
+	DIR *dir;
+	struct dirent *ent;
+	char* fname = (char*) calloc(FNAME_SIZE,sizeof(char));
+	int lowest_pid = -1;
+	if ((dir = opendir (dirname)) != NULL) {
+		while ((ent = readdir (dir)) != NULL) {
+			if(ent->d_name[0] == '.')
+				continue;
+			int pid = atoi(ent->d_name);
+			sprintf(fname,"/proc/%s", ent->d_name);
+			if(kill(pid, 0) != 0) {
+				//deleting pids can lead to race conditions if
+				//2 or more BT2 processes both try to delete
+				//so just skip instead
+				//del_pid(dirname,pid);
+				continue;
+			}
+			(*num_pids)++;
+			if(pid < lowest_pid || lowest_pid == -1)
+				lowest_pid = pid;
+		}
+		closedir (dir);
+	} else {
+		perror (""); // could not open directory
+	}
+	free(fname);
+	return lowest_pid;
+}
+
+static bool steal_thread(int pid, int orig_nthreads) {
+	int ncpu = thread_ceiling;
+	if(thread_ceiling <= nthreads) {
+		return false;
+	}
+	int num_pids = 0;
+	int lowest_pid = read_dir(thread_stealing_dir.c_str(), &num_pids);
+	if(lowest_pid != pid) {
+		return false;
+	}
+	int in_use = ((num_pids-1) * orig_nthreads) + nthreads; //in_use is now baseline + ours
+	float spare = ncpu - in_use;
+	int spare_r = floor(spare);
+	float r = rand() % 100/100.0;
+	if(r <= (spare - spare_r)) {
+		spare_r = ceil(spare);
+	}
+	return spare_r > 0;
+}
+#endif
+
 /**
  * Search through a single (forward) Ebwt index for exact end-to-end
  * hits.  Assumes that index is already loaded into memory.
@@ -1121,6 +1234,9 @@ static void exactSearchWorker(void *vp) {
 static void exactSearchWorker(void *vp) {
 	int tid = *((int*)vp);
 #endif
+	if(thread_stealing) {
+		increment_thread_counter();
+	}
 	PatternComposer& _patsrc = *exactSearch_patsrc;
 	HitSink& _sink               = *exactSearch_sink;
 	Ebwt<String<Dna> >& ebwt     = *exactSearch_ebwt;
@@ -1184,6 +1300,9 @@ static void exactSearchWorker(void *vp) {
 			#include "search_exact.c"
 		}
 		FINISH_READ(patsrc);
+		if(thread_stealing) {
+			decrement_thread_counter();
+		}
 #ifdef PER_THREAD_TIMING
 		ss.str("");
 		ss.clear();
@@ -1210,6 +1329,9 @@ static void exactSearchWorkerStateful(void *vp) {
 static void exactSearchWorkerStateful(void *vp) {
 	int tid = *((int*)vp);
 #endif
+	if(thread_stealing) {
+		increment_thread_counter();
+	}
 	PatternComposer& _patsrc = *exactSearch_patsrc;
 	HitSink& _sink               = *exactSearch_sink;
 	Ebwt<String<Dna> >& ebwt     = *exactSearch_ebwt;
@@ -1283,6 +1405,10 @@ static void exactSearchWorkerStateful(void *vp) {
 		// MultiAligner must be destroyed before patsrcFact
 	}
 
+	if(thread_stealing) {
+		decrement_thread_counter();
+	}
+
 	delete patsrcFact;
 	delete sinkFact;
 	delete pool;
@@ -1336,10 +1462,10 @@ static void exactSearch(PatternComposer& _patsrc,
 	}
 	exactSearch_refs   = refs;
 #ifdef WITH_TBB
-	AutoArray<std::thread*> threads(nthreads);
+	vector<std::thread*> threads;
 #else
-	AutoArray<tthread::thread*> threads(nthreads);
-	AutoArray<int> tids(nthreads);
+	vector<tthread::thread*> threads;
+	int *tids = new int[max(nthreads, thread_ceiling)];
 #endif
 
 #ifdef WITH_TBB
@@ -1350,40 +1476,115 @@ static void exactSearch(PatternComposer& _patsrc,
 	{
 		Timer _t(cerr, "Time for 0-mismatch search: ", timing);
 
-		int mil = 10;
-		struct timespec ts = {0};
-		ts.tv_sec=0;
-		ts.tv_nsec = mil * 1000000L;
-
+		int pid = 0;
+		if(thread_stealing) {
+			pid = getpid();
+			write_pid(thread_stealing_dir.c_str(), pid);
+			thread_counter = 0;
+		}
+		
 		for(int i = 0; i < nthreads; i++) {
 #ifdef WITH_TBB
 			thread_tracking_pair tp;
 			tp.tid = i;
 			tp.done = &all_threads_done;
-			if(stateful) {
-				threads[i] = new std::thread(exactSearchWorkerStateful, (void*) &tp);
-			} else {
-				threads[i] = new std::thread(exactSearchWorker, (void*) &tp);
+			if (i == nthreads - 1) {
+				if(stateful) {
+					exactSearchWorkerStateful((void*)&tp);
+				} else {
+					exactSearchWorker((void*)&tp);
+				}
+			}
+			else {
+				if(stateful) {
+					threads.push_back(new std::thread(exactSearchWorkerStateful, (void*)&tp));
+				} else {
+					threads.push_back(new std::thread(exactSearchWorker, (void*)&tp));
+				}
+				threads[i]->detach();
+				SLEEP(10);
 			}
-			threads[i]->detach();
-			nanosleep(&ts, (struct timespec *) NULL);
-		}
-		while(all_threads_done < nthreads);
 #else
 			tids[i] = i;
-			if(stateful) {
-				threads[i] = new tthread::thread(exactSearchWorkerStateful, (void*)&tids[i]);
-			} else {
-				threads[i] = new tthread::thread(exactSearchWorker, (void*)&tids[i]);
+			if (i == nthreads - 1) {
+				if(stateful) {
+					exactSearchWorkerStateful((void*)(tids + i));
+				} else {
+					exactSearchWorker((void*)(tids + i));
+				}
 			}
+			else {
+				if(stateful) {
+					threads.push_back(new tthread::thread(exactSearchWorkerStateful, (void *)(tids + i)));
+				} else {
+					threads.push_back(new tthread::thread(exactSearchWorker, (void *)(tids + i)));
+				}
+			}
+#endif
 		}
 
-		for(int i = 0; i < nthreads; i++) {
+#ifndef _WIN32
+		if(thread_stealing) {
+			int orig_threads = nthreads, steal_ctr = 1;
+			for(int j = 0; j < 10; j++) {
+				sleep(1);
+			}
+			while(thread_counter > 0) {
+				if(steal_thread(pid, orig_threads)) { 
+					nthreads++;
+#ifdef WITH_TBB
+					thread_tracking_pair tp;
+					tp.tid = nthreads - 1;
+					tp.done = &all_threads_done;
+					if(stateful) {
+						threads.push_back(new std::thread(exactSearchWorkerStateful, (void*) &tp));
+					} else {
+						threads.push_back(new std::thread(exactSearchWorker, (void*) &tp));
+					}
+					threads[nthreads-1]->detach();
+					SLEEP(10);
+#else
+					tids[nthreads-1] = nthreads;
+					if(stateful) {
+						threads.push_back(new tthread::thread(exactSearchWorkerStateful, (void *)(tids + nthreads - 1)));
+					} else {
+						threads.push_back(new tthread::thread(exactSearchWorker, (void *)(tids + nthreads - 1)));
+					}
+#endif
+					cerr << "pid " << pid << " started new worker # " << nthreads << endl;
+				}
+				steal_ctr++;
+				for(int j = 0; j < 10; j++) {
+					sleep(1);
+				}
+			}
+		}
+#endif
+		
+#ifdef WITH_TBB
+		while(all_threads_done < nthreads) {
+			SLEEP(10);
+		}
+#else
+		for (int i = 0; i < nthreads; i++) {
 			threads[i]->join();
 		}
+		delete[] tids;
+#endif
+
+#ifndef _WIN32
+		if(thread_stealing) {
+			del_pid(thread_stealing_dir.c_str(), pid);
+		}
 #endif
 	}
 	if(refs != NULL) delete refs;
+
+	for (int i = 0; i < nthreads - 1; i++) {
+		if (threads[i] != NULL) {
+			delete threads[i];
+		}
+	}
 }
 
 /**
@@ -1416,6 +1617,9 @@ static void mismatchSearchWorkerFullStateful(void *vp) {
 static void mismatchSearchWorkerFullStateful(void *vp) {
 	int tid = *((int*)vp);
 #endif
+	if(thread_stealing) {
+		increment_thread_counter();
+	}
 	PatternComposer&   _patsrc = *mismatchSearch_patsrc;
 	HitSink&               _sink   = *mismatchSearch_sink;
 	Ebwt<String<Dna> >&    ebwtFw  = *mismatchSearch_ebwtFw;
@@ -1489,6 +1693,10 @@ static void mismatchSearchWorkerFullStateful(void *vp) {
 		multi.run(false, tid);
 		// MultiAligner must be destroyed before patsrcFact
 	}
+
+	if(thread_stealing) {
+		decrement_thread_counter();
+	}
 #ifdef WITH_TBB
 	p->done->fetch_and_add(1);
 #endif
@@ -1507,6 +1715,9 @@ static void mismatchSearchWorkerFull(void *vp){
 static void mismatchSearchWorkerFull(void *vp){
 	int tid = *((int*)vp);
 #endif
+	if(thread_stealing) {
+		increment_thread_counter();
+	}
 	PatternComposer&   _patsrc   = *mismatchSearch_patsrc;
 	HitSink&               _sink     = *mismatchSearch_sink;
 	Ebwt<String<Dna> >&    ebwtFw    = *mismatchSearch_ebwtFw;
@@ -1633,10 +1844,10 @@ static void mismatchSearchFull(PatternComposer& _patsrc,
 	mismatchSearch_refs = refs;
 
 #ifdef WITH_TBB
-	AutoArray<std::thread*> threads(nthreads);
+	vector<std::thread*> threads;
 #else
-	AutoArray<tthread::thread*> threads(nthreads);
-	AutoArray<int> tids(nthreads);
+	vector<tthread::thread*> threads;
+	int *tids = new int[max(nthreads, thread_ceiling)];
 #endif
 
 #ifdef WITH_TBB
@@ -1647,40 +1858,118 @@ static void mismatchSearchFull(PatternComposer& _patsrc,
 	CHUD_START();
 	{
 		Timer _t(cerr, "Time for 1-mismatch full-index search: ", timing);
-		int mil = 10;
-		struct timespec ts = {0};
-		ts.tv_sec=0;
-		ts.tv_nsec = mil * 1000000L;
+
+#ifndef _WIN32
+		int pid = 0;
+		if(thread_stealing) {
+			pid = getpid();
+			write_pid(thread_stealing_dir.c_str(), pid);
+			thread_counter = 0;
+		}
+#endif
 
 		for(int i = 0; i < nthreads; i++) {
 #ifdef WITH_TBB
 			thread_tracking_pair tp;
 			tp.tid = i;
 			tp.done = &all_threads_done;
-			if(stateful) {
-				threads[i] = new std::thread(mismatchSearchWorkerFullStateful, (void*)&tp);
-			} else {
-				threads[i] = new std::thread(mismatchSearchWorkerFull, (void*)&tp);
+			if (i == nthreads - 1) {
+				if(stateful) {
+					mismatchSearchWorkerFullStateful((void*)&tp);
+				} else {
+					mismatchSearchWorkerFull((void*)&tp);
+				}
+			}
+			else {
+				if(stateful) {
+					threads.push_back(new std::thread(mismatchSearchWorkerFullStateful, (void*)&tp));
+				} else {
+					threads.push_back(new std::thread(mismatchSearchWorkerFull, (void*)&tp));
+				}
+				threads[i]->detach();
+				SLEEP(10);
 			}
-			threads[i]->detach();
-			nanosleep(&ts, (struct timespec *) NULL);
-		}
-		while(all_threads_done < nthreads);
 #else
 			tids[i] = i;
-			if(stateful) {
-				threads[i] = new tthread::thread(mismatchSearchWorkerFullStateful, (void*)&tids[i]);
-			} else {
-				threads[i] = new tthread::thread(mismatchSearchWorkerFull, (void*)&tids[i]);
+			if (i == nthreads - 1) {
+				if(stateful) {
+					mismatchSearchWorkerFullStateful((void*)(tids + i));
+				} else {
+					mismatchSearchWorkerFull((void*)(tids + i));
+				}
+			}
+			else {
+				if(stateful) {
+					threads.push_back(new tthread::thread(mismatchSearchWorkerFullStateful, (void *)(tids + i)));
+				} else {
+					threads.push_back(new tthread::thread(mismatchSearchWorkerFull, (void *)(tids + i)));
+				}
 			}
+#endif
 		}
 
-		for(int i = 0; i < nthreads; i++) {
+#ifndef _WIN32
+		if(thread_stealing) {
+			int orig_threads = nthreads, steal_ctr = 1;
+			for(int j = 0; j < 10; j++) {
+				sleep(1);
+			}
+			while(thread_counter > 0) {
+				if(steal_thread(pid, orig_threads)) {
+					nthreads++;
+#ifdef WITH_TBB
+					thread_tracking_pair tp;
+					tp.tid = nthreads - 1;
+					tp.done = &all_threads_done;
+					if(stateful) {
+						threads.push_back(new std::thread(mismatchSearchWorkerFullStateful, (void*)&tp));
+					} else {
+						threads.push_back(new std::thread(mismatchSearchWorkerFull, (void*)&tp));
+					}
+					threads[nthreads - 1]->detach();
+					SLEEP(10);
+#else
+					tids[nthreads-1] = nthreads;
+					if(stateful) {
+						threads.push_back(new tthread::thread(mismatchSearchWorkerFullStateful, (void *)(tids + nthreads - 1)));
+					} else {
+						threads.push_back(new tthread::thread(mismatchSearchWorkerFull, (void *)(tids + nthreads - 1)));
+					}
+#endif
+					cerr << "pid " << pid << " started new worker # " << nthreads << endl;
+				}
+				steal_ctr++;
+				for(int j = 0; j < 10; j++) {
+					sleep(1);
+				}
+			}
+		}
+#endif
+		
+#ifdef WITH_TBB
+		while(all_threads_done < nthreads) {
+			SLEEP(10);
+		}
+#else
+		for (int i = 0; i < nthreads; i++) {
 			threads[i]->join();
 		}
+		delete[] tids;
+#endif
+
+#ifndef _WIN32
+		if(thread_stealing) {
+			del_pid(thread_stealing_dir.c_str(), pid);
+		}
 #endif
 	}
 	if(refs != NULL) delete refs;
+
+	for (int i = 0; i < nthreads - 1; i++) {
+		if (threads[i] != NULL) {
+			delete threads[i];
+		}
+	}
 }
 
 #define SWITCH_TO_FW_INDEX() { \
@@ -1778,6 +2067,9 @@ static void twoOrThreeMismatchSearchWorkerStateful(void *vp) {
 static void twoOrThreeMismatchSearchWorkerStateful(void *vp) {
 	int tid = *((int*)vp);
 #endif
+	if(thread_stealing) {
+		increment_thread_counter();
+	}
 	PatternComposer&   _patsrc = *twoOrThreeMismatchSearch_patsrc;
 	HitSink&               _sink   = *twoOrThreeMismatchSearch_sink;
 	Ebwt<String<Dna> >&    ebwtFw  = *twoOrThreeMismatchSearch_ebwtFw;
@@ -1854,15 +2146,20 @@ static void twoOrThreeMismatchSearchWorkerStateful(void *vp) {
 		multi.run(false, tid);
 		// MultiAligner must be destroyed before patsrcFact
 	}
+
 #ifdef WITH_TBB
 	p->done->fetch_and_add(1);
 #endif
+	if(thread_stealing) {
+		decrement_thread_counter();
+	}
 
 	delete patsrcFact;
 	delete sinkFact;
 	delete pool;
 	return;
 }
+
 #ifdef WITH_TBB
 //void twoOrThreeMismatchSearchWorkerFull::operator()() const {
 static void twoOrThreeMismatchSearchWorkerFull(void *vp) {
@@ -1872,6 +2169,9 @@ static void twoOrThreeMismatchSearchWorkerFull(void *vp) {
 static void twoOrThreeMismatchSearchWorkerFull(void *vp) {
 	int tid = *((int*)vp);
 #endif
+	if(thread_stealing) {
+		increment_thread_counter();
+	}
 	PatternComposer&           _patsrc  = *twoOrThreeMismatchSearch_patsrc;
 	HitSink&                       _sink    = *twoOrThreeMismatchSearch_sink;
 	vector<String<Dna5> >&         os       = *twoOrThreeMismatchSearch_os;
@@ -1989,6 +2289,9 @@ static void twoOrThreeMismatchSearchWorkerFull(void *vp) {
 			#undef DONEMASK_SET
 		}
 		FINISH_READ(patsrc);
+		if(thread_stealing) {
+			decrement_thread_counter();
+		}
 #ifdef PER_THREAD_TIMING
 		ss.str("");
 		ss.clear();
@@ -2045,10 +2348,10 @@ static void twoOrThreeMismatchSearchFull(
 	twoOrThreeMismatchSearch_two      = two;
 
 #ifdef WITH_TBB
-	AutoArray<std::thread*> threads(nthreads);
+	vector<std::thread*> threads;
 #else
-	AutoArray<tthread::thread*> threads(nthreads);
-	AutoArray<int> tids(nthreads);
+	vector<tthread::thread*> threads;
+	int *tids = new int[max(nthreads, thread_ceiling)];
 #endif
 
 #ifdef WITH_TBB
@@ -2059,41 +2362,118 @@ static void twoOrThreeMismatchSearchFull(
 	CHUD_START();
 	{
 		Timer _t(cerr, "End-to-end 2/3-mismatch full-index search: ", timing);
-		
-		int mil = 10;
-		struct timespec ts = {0};
-		ts.tv_sec=0;
-		ts.tv_nsec = mil * 1000000L;
+
+#ifndef _WIN32
+		int pid = 0;
+		if(thread_stealing) {
+			pid = getpid();
+			write_pid(thread_stealing_dir.c_str(), pid);
+			thread_counter = 0;
+		}
+#endif
 
 		for(int i = 0; i < nthreads; i++) {
 #ifdef WITH_TBB
 			thread_tracking_pair tp;
 			tp.tid = i;
 			tp.done = &all_threads_done;
-			if(stateful) {
-				threads[i] = new std::thread(twoOrThreeMismatchSearchWorkerStateful, (void*) &tp);
-			} else {
-				threads[i] = new std::thread(twoOrThreeMismatchSearchWorkerFull, (void*) &tp);
+			if (i == nthreads - 1) {
+				if(stateful) {
+					twoOrThreeMismatchSearchWorkerStateful((void*)&tp);
+				} else {
+					twoOrThreeMismatchSearchWorkerFull((void*)&tp);
+				}
+			}
+			else {
+				if(stateful) {
+					threads.push_back(new std::thread(twoOrThreeMismatchSearchWorkerStateful, (void*)&tp));
+				} else {
+					threads.push_back(new std::thread(twoOrThreeMismatchSearchWorkerFull, (void*)&tp));
+				}
+				threads[i]->detach();
+				SLEEP(10);
 			}
-			threads[i]->detach();
-			nanosleep(&ts, (struct timespec *) NULL);
-		}
-		while(all_threads_done < nthreads);
 #else
 			tids[i] = i;
-			if(stateful) {
-				threads[i] = new tthread::thread(twoOrThreeMismatchSearchWorkerStateful, (void*)&tids[i]);
-			} else {
-				threads[i] = new tthread::thread(twoOrThreeMismatchSearchWorkerFull, (void*)&tids[i]);
+			if (i == nthreads - 1) {
+				if(stateful) {
+					twoOrThreeMismatchSearchWorkerStateful((void*)(tids + i));
+				} else {
+					twoOrThreeMismatchSearchWorkerFull((void*)(tids + i));
+				}
+			}
+			else {
+				if(stateful) {
+					threads.push_back(new tthread::thread(twoOrThreeMismatchSearchWorkerStateful, (void *)(tids + i)));
+				} else {
+					threads.push_back(new tthread::thread(twoOrThreeMismatchSearchWorkerFull, (void *)(tids + i)));
+				}
 			}
+#endif
 		}
 
-		for(int i = 0; i < nthreads; i++) {
+#ifndef _WIN32
+		if(thread_stealing) {
+			int orig_threads = nthreads, steal_ctr = 1;
+			for(int j = 0; j < 10; j++) {
+				sleep(1);
+			}
+			while(thread_counter > 0) {
+				if(steal_thread(pid, orig_threads)) {
+					nthreads++;
+#ifdef WITH_TBB
+					thread_tracking_pair tp;
+					tp.tid = nthreads - 1;
+					tp.done = &all_threads_done;
+					if(stateful) {
+						threads.push_back(new std::thread(twoOrThreeMismatchSearchWorkerStateful, (void*) &tp));
+					} else {
+						threads.push_back(new std::thread(twoOrThreeMismatchSearchWorkerFull, (void*) &tp));
+					}
+					threads[nthreads-1]->detach();
+					SLEEP(10);
+#else
+					tids[nthreads-1] = nthreads;
+					if(stateful) {
+						threads.push_back(new tthread::thread(twoOrThreeMismatchSearchWorkerStateful, (void *)(tids + nthreads - 1)));
+					} else {
+						threads.push_back(new tthread::thread(twoOrThreeMismatchSearchWorkerFull, (void *)(tids + nthreads - 1)));
+					}
+#endif
+					cerr << "pid " << pid << " started new worker # " << nthreads << endl;
+				}
+				steal_ctr++;
+				for(int j = 0; j < 10; j++) {
+					sleep(1);
+				}
+			}
+		}
+#endif
+		
+#ifdef WITH_TBB
+		while(all_threads_done < nthreads) {
+			SLEEP(10);
+		}
+#else
+		for (int i = 0; i < nthreads; i++) {
 			threads[i]->join();
 		}
+		delete[] tids;
+#endif
+
+#ifndef _WIN32
+		if(thread_stealing) {
+			del_pid(thread_stealing_dir.c_str(), pid);
+		}
 #endif
 	}
 	if(refs != NULL) delete refs;
+
+	for (int i = 0; i < nthreads - 1; i++) {
+		if (threads[i] != NULL) {
+			delete threads[i];
+		}
+	}
 	return;
 }
 
@@ -2118,6 +2498,9 @@ static void seededQualSearchWorkerFull(void *vp) {
 static void seededQualSearchWorkerFull(void *vp) {
 	int tid = *((int*)vp);
 #endif
+	if(thread_stealing) {
+		increment_thread_counter();
+	}
 	PatternComposer&     _patsrc    = *seededQualSearch_patsrc;
 	HitSink&                 _sink      = *seededQualSearch_sink;
 	vector<String<Dna5> >&   os         = *seededQualSearch_os;
@@ -2327,6 +2710,9 @@ static void seededQualSearchWorkerFull(void *vp) {
 			#undef DONEMASK_SET
 		}
 		FINISH_READ(patsrc);
+		if(thread_stealing) {
+			decrement_thread_counter();
+		}
 		if(seedMms > 0) {
 			delete pamRc;
 			delete pamFw;
@@ -2353,6 +2739,9 @@ static void seededQualSearchWorkerFullStateful(void *vp) {
 static void seededQualSearchWorkerFullStateful(void *vp) {
 	int tid = *((int*)vp);
 #endif
+	if(thread_stealing) {
+		increment_thread_counter();
+	}
 	PatternComposer&     _patsrc    = *seededQualSearch_patsrc;
 	HitSink&                 _sink      = *seededQualSearch_sink;
 	Ebwt<String<Dna> >&      ebwtFw     = *seededQualSearch_ebwtFw;
@@ -2445,9 +2834,13 @@ static void seededQualSearchWorkerFullStateful(void *vp) {
 		metrics->printSummary();
 		delete metrics;
 	}
+
 #ifdef WITH_TBB
 	p->done->fetch_and_add(1);
 #endif
+	if(thread_stealing) {
+		decrement_thread_counter();
+	}
 
 	delete patsrcFact;
 	delete sinkFact;
@@ -2506,11 +2899,10 @@ static void seededQualCutoffSearchFull(
 	seededQualSearch_refs = refs;
 
 #ifdef WITH_TBB
-	//tbb::task_group tbb_grp;
-	AutoArray<std::thread*> threads(nthreads);
+	vector<std::thread*> threads;
 #else
-	AutoArray<tthread::thread*> threads(nthreads);
-	AutoArray<int> tids(nthreads);
+	vector<tthread::thread*> threads;
+	int *tids = new int[max(nthreads, thread_ceiling)];
 #endif
 
 #ifdef WITH_TBB
@@ -2529,42 +2921,122 @@ static void seededQualCutoffSearchFull(
 	{
 		// Phase 1: Consider cases 1R and 2R
 		Timer _t(cerr, "Seeded quality full-index search: ", timing);
-		int mil = 10;
-		struct timespec ts = {0};
-		ts.tv_sec=0;
-		ts.tv_nsec = mil * 1000000L;
+
+#ifndef _WIN32
+		int pid = 0;
+		if(thread_stealing) {
+			pid = getpid();
+			write_pid(thread_stealing_dir.c_str(), pid);
+			thread_counter = 0;
+		}
+#endif
 
 		for(int i = 0; i < nthreads; i++) {
 #ifdef WITH_TBB
 			thread_tracking_pair tp;
 			tp.tid = i;
 			tp.done = &all_threads_done;
-			if(stateful) {
-				threads[i] = new std::thread(seededQualSearchWorkerFullStateful, (void*) &tp);
-			} else {
-				threads[i] = new std::thread(seededQualSearchWorkerFull, (void*) &tp);
+			if (i == nthreads - 1) {
+				if(stateful) {
+					seededQualSearchWorkerFullStateful((void*)&tp);
+				} else {
+					seededQualSearchWorkerFull((void*)&tp);
+				}
 			}
-			threads[i]->detach();
-			nanosleep(&ts, (struct timespec *) NULL);
-		}
-		while(all_threads_done < nthreads);
+			else {
+				if(stateful) {
+					threads.push_back(new std::thread(seededQualSearchWorkerFullStateful, (void*)&tp));
+				} else {
+					threads.push_back(new std::thread(seededQualSearchWorkerFull, (void*)&tp));
+				}
+				threads[i]->detach();
+				SLEEP(10);
+		    }
 #else
 			tids[i] = i;
-			if(stateful) {
-				threads[i] = new tthread::thread(seededQualSearchWorkerFullStateful, (void*)&tids[i]);
-			} else {
-				threads[i] = new tthread::thread(seededQualSearchWorkerFull, (void*)&tids[i]);
+			if (i == nthreads - 1) {
+				if(stateful) {
+					seededQualSearchWorkerFullStateful((void*)(tids + i));
+				} else {
+					seededQualSearchWorkerFull((void*)(tids + i));
+				}
 			}
+			else {
+				if(stateful) {
+					threads.push_back(new tthread::thread(seededQualSearchWorkerFullStateful, (void *)(tids + i)));
+				} else {
+					threads.push_back(new tthread::thread(seededQualSearchWorkerFull, (void *)(tids + i)));
+				}
+			}
+#endif
 		}
 
-		for(int i = 0; i < nthreads; i++) {
+#ifndef _WIN32
+		if(thread_stealing) {
+			int orig_threads = nthreads, steal_ctr = 1;
+			for(int j = 0; j < 10; j++) {
+				sleep(1);
+			}
+			while(thread_counter > 0) {
+				if(steal_thread(pid, orig_threads)) {
+					nthreads++;
+#ifdef WITH_TBB
+					thread_tracking_pair tp;
+					tp.tid = nthreads - 1;
+					tp.done = &all_threads_done;
+					if(stateful) {
+						threads.push_back(new std::thread(seededQualSearchWorkerFullStateful, (void*) &tp));
+					} else {
+						threads.push_back(new std::thread(seededQualSearchWorkerFull, (void*) &tp));
+					}
+					threads[nthreads-1]->detach();
+					SLEEP(10);
+#else
+					tids[nthreads-1] = nthreads - 1;
+					if(stateful) {
+						threads.push_back(new tthread::thread(seededQualSearchWorkerFullStateful, (void *)(tids + nthreads - 1)));
+					} else {
+						threads.push_back(new tthread::thread(seededQualSearchWorkerFull, (void *)(tids + nthreads - 1)));
+					}
+#endif
+					cerr << "pid " << pid << " started new worker # " << nthreads << endl;
+				}
+				steal_ctr++;
+				for(int j = 0; j < 10; j++) {
+					sleep(1);
+				}
+			}
+		}
+#endif
+		
+#ifdef WITH_TBB
+		while(all_threads_done < nthreads) {
+			SLEEP(10);
+		}
+#else
+		for (int i = 0; i < nthreads; i++) {
 			threads[i]->join();
 		}
+		delete[] tids;
+#endif
+
+#ifndef _WIN32
+		if(thread_stealing) {
+			del_pid(thread_stealing_dir.c_str(), pid);
+		}
 #endif
 	}
+
 	if(refs != NULL) {
 		delete refs;
 	}
+
+	for (int i = 0; i < nthreads - 1; i++) {
+		if (threads[i] != NULL) {
+			delete threads[i];
+		}
+	}
+
 	ebwtBw.evictFromMemory();
 }
 
@@ -2581,38 +3053,31 @@ patsrcFromStrings(int format,
 	switch(format) {
 		case FASTA:
 			return new FastaPatternSource (reads, quals, color,
-			                               patDumpfile,
 			                               trim3, trim5,
 			                               solexaQuals, phred64Quals,
 			                               integerQuals);
 		case FASTA_CONT:
 			return new FastaContinuousPatternSource (
 			                               reads, fastaContLen,
-			                               fastaContFreq,
-			                               patDumpfile);
+			                               fastaContFreq);
 		case RAW:
 			return new RawPatternSource   (reads, color,
-			                               patDumpfile,
 			                               trim3, trim5);
 		case FASTQ:
 			return new FastqPatternSource (reads, color,
-			                               patDumpfile,
 			                               trim3, trim5,
 			                               solexaQuals, phred64Quals,
 			                               integerQuals);
 		case INTERLEAVED:
 			return new FastqPatternSource (reads, color,
-			                               patDumpfile,
 			                               trim3, trim5,
 			                               solexaQuals, phred64Quals,
 			                               integerQuals, true /* is interleaved */);
 		case TAB_MATE:
 			return new TabbedPatternSource(reads, false, color,
-			                               patDumpfile,
 			                               trim3, trim5);
 		case CMDLINE:
 			return new VectorPatternSource(reads, color,
-			                               patDumpfile,
 			                               trim3, trim5);
 		default: {
 			cerr << "Internal error; bad patsrc format: " << format << endl;
@@ -2776,33 +3241,10 @@ static void driver(const char * type,
 	}
 	OutFileBuf *fout;
 	if(!outfile.empty()) {
-		if(refOut) {
-			fout = NULL;
-			if(!quiet) {
-				cerr << "Warning: ignoring alignment output file " << outfile << " because --refout was specified" << endl;
-			}
-		} else {
-			fout = new OutFileBuf(outfile.c_str(), false);
-		}
+		fout = new OutFileBuf(outfile.c_str(), false);
 	} else {
 		fout = new OutFileBuf();
 	}
-	ReferenceMap* rmap = NULL;
-	if(refMapFile != NULL) {
-		if(verbose || startVerbose) {
-			cerr << "About to load in a reference map file with name "
-			     << refMapFile << ": "; logTime(cerr, true);
-		}
-		rmap = new ReferenceMap(refMapFile, !noRefNames);
-	}
-	AnnotationMap* amap = NULL;
-	if(annotMapFile != NULL) {
-		if(verbose || startVerbose) {
-			cerr << "About to load in an annotation map file with name "
-			     << annotMapFile << ": "; logTime(cerr, true);
-		}
-		amap = new AnnotationMap(annotMapFile);
-	}
 	// Initialize Ebwt object and read in header
 	if(verbose || startVerbose) {
 		cerr << "About to initialize fw Ebwt: "; logTime(cerr, true);
@@ -2817,7 +3259,6 @@ static void driver(const char * type,
 	                useShmem, // whether to use shared memory
 	                mmSweep,  // sweep memory-mapped files
 	                !noRefNames, // load names?
-	                rmap,     // reference map, or NULL if none is needed
 	                verbose, // whether to be talkative
 	                startVerbose, // talkative during initialization
 	                false /*passMemExc*/,
@@ -2839,7 +3280,6 @@ static void driver(const char * type,
 			useShmem, // whether to use shared memory
 			mmSweep,  // sweep memory-mapped files
 			!noRefNames, // load names?
-			rmap,     // reference map, or NULL if none is needed
 			verbose,  // whether to be talkative
 			startVerbose, // talkative during initialization
 			false /*passMemExc*/,
@@ -2886,101 +3326,49 @@ static void driver(const char * type,
 		HitSink *sink;
 		vector<string>* refnames = &ebwt.refnames();
 		if(noRefNames) refnames = NULL;
-		switch(outType) {
-			case OUTPUT_FULL:
-				if(refOut) {
-					sink = new VerboseHitSink(
-							ebwt.nPat(), offBase,
-							colorSeq, colorQual, printCost,
-							suppressOuts, rmap, amap,
-							fullRef,
-							dumpAlBase,
-							dumpUnalBase,
-							dumpMaxBase,
-							format == TAB_MATE, sampleMax,
-							refnames, nthreads,
-							outBatchSz, partitionSz);
-				} else {
-					sink = new VerboseHitSink(
-							fout, offBase,
-							colorSeq, colorQual, printCost,
-							suppressOuts, rmap, amap,
-							fullRef,
-							dumpAlBase,
-							dumpUnalBase,
-							dumpMaxBase,
-							format == TAB_MATE, sampleMax,
-							refnames, nthreads,
-							outBatchSz, partitionSz);
+		if(outType == OUTPUT_FULL) {
+			sink = new VerboseHitSink(
+					*fout, offBase,
+					colorSeq, colorQual, printCost,
+					suppressOuts,
+					fullRef,
+					dumpAlBase,
+					dumpUnalBase,
+					dumpMaxBase,
+					format == TAB_MATE, sampleMax,
+					refnames, nthreads,
+					outBatchSz, partitionSz);
+		} else if(outType == OUTPUT_SAM) {
+			SAMHitSink *sam = new SAMHitSink(
+				*fout, 1,
+				fullRef, samNoQnameTrunc,
+				dumpAlBase,
+				dumpUnalBase,
+				dumpMaxBase,
+				format == TAB_MATE,
+				sampleMax,
+				refnames,
+				nthreads,
+				outBatchSz,
+				reorder);
+			if(!samNoHead) {
+				vector<string> refnames;
+				if(!samNoSQ) {
+					readEbwtRefnames(adjustedEbwtFileBase, refnames);
 				}
-				break;
-			case OUTPUT_SAM:
-				if(refOut) {
-					throw 1;
-				} else {
-					SAMHitSink *sam = new SAMHitSink(
-						fout, 1, rmap, amap,
-						fullRef, samNoQnameTrunc,
-						dumpAlBase,
-						dumpUnalBase,
-						dumpMaxBase,
-						format == TAB_MATE,
-						sampleMax,
-						refnames,
-						nthreads,
-						outBatchSz);
-					if(!samNoHead) {
-						vector<string> refnames;
-						if(!samNoSQ) {
-							readEbwtRefnames(adjustedEbwtFileBase, refnames);
-						}
-						sam->appendHeaders(
-							sam->out(0),
-							ebwt.nPat(),
-							refnames, color, samNoSQ, rmap,
-							ebwt.plen(), fullRef,
-							samNoQnameTrunc,
-							argstr.c_str(),
-							rgs.empty() ? NULL : rgs.c_str());
-					}
-					sink = sam;
-				}
-				break;
-			case OUTPUT_CONCISE:
-				if(refOut) {
-					sink = new ConciseHitSink(
-						ebwt.nPat(),
-						offBase,
-						dumpAlBase,
-						dumpUnalBase,
-						dumpMaxBase,
-						format == TAB_MATE,
-						sampleMax,
-						refnames,
-						nthreads,
-						outBatchSz,
-						reportOpps);
-				} else {
-					sink = new ConciseHitSink(
-						fout,
-						offBase,
-						dumpAlBase,
-						dumpUnalBase,
-						dumpMaxBase,
-						format == TAB_MATE,
-						sampleMax,
-						refnames,
-						nthreads,
-						outBatchSz,
-						reportOpps);
-				}
-				break;
-			case OUTPUT_NONE:
-				sink = new StubHitSink();
-				break;
-			default:
-				cerr << "Invalid output type: " << outType << endl;
-				throw 1;
+				sam->appendHeaders(
+					sam->out(),
+					ebwt.nPat(),
+					refnames, color, samNoSQ,
+					ebwt.plen(), fullRef,
+					samNoQnameTrunc,
+					argstr.c_str(),
+					rgs.empty() ? NULL : rgs.c_str());
+			}
+			sink = sam;
+		} else {
+			cerr << "Invalid output type: " << outType << endl;
+			throw 1;
 		}
 		if(verbose || startVerbose) {
 			cerr << "Dispatching to search driver: "; logTime(cerr, true);
@@ -3035,8 +3423,6 @@ static void driver(const char * type,
 		}
 		delete patsrc;
 		delete sink;
-		delete amap;
-		delete rmap;
 		if(fout != NULL) delete fout;
 	}
 }
diff --git a/filebuf.h b/filebuf.h
index 39037a4..4f83fdc 100644
--- a/filebuf.h
+++ b/filebuf.h
@@ -38,6 +38,7 @@ static inline bool isspace_notnl(int c) {
  */
 class FileBuf {
 public:
+
 	FileBuf() {
 		init();
 	}
@@ -431,6 +432,8 @@ class OutFileBuf {
 
 public:
 
+    static const size_t BUF_SZ = 16 * 1024;
+
 	/**
 	 * Open a new output stream to a file with given name.
 	 */
@@ -529,11 +532,15 @@ public:
 	 * Write a c++ string to the write buffer and, if necessary, flush.
 	 */
 	template<typename T>
-	void writeString(const T& s) {
+	size_t writeString(const T& s) {
 		assert(!closed_);
 		size_t slen = s.length();
+		size_t bytes_written = 0;
 		if(cur_ + slen > BUF_SZ) {
-			if(cur_ > 0) flush();
+			if(cur_ > 0) {
+				flush();
+				bytes_written += cur_;;
+			}
 			if(slen >= BUF_SZ) {
 				fwrite(s.toZBuf(), slen, 1, out_);
 			} else {
@@ -545,7 +552,9 @@ public:
 			memcpy(&buf_[cur_], s.toZBuf(), slen);
 			cur_ += slen;
 		}
+		bytes_written += slen;
 		assert_leq(cur_, BUF_SZ);
+		return bytes_written;
 	}
 
 	/**
@@ -626,8 +635,6 @@ public:
 
 private:
 
-	static const size_t BUF_SZ = 16 * 1024;
-
 	const char *name_;
 	FILE       *out_;
 	size_t    cur_;
diff --git a/genomes/.cvsignore b/genomes/.cvsignore
deleted file mode 100644
index 616d443..0000000
--- a/genomes/.cvsignore
+++ /dev/null
@@ -1 +0,0 @@
-NC_008253.bfa
diff --git a/hit.cpp b/hit.cpp
index f0064a8..dd1fa1b 100644
--- a/hit.cpp
+++ b/hit.cpp
@@ -45,7 +45,7 @@ void VerboseHitSink::reportMaxed(
 				if(strat == bestStratum) {
 					if(num == r) {
 						hs[i].oms = hs[i+1].oms = (uint32_t)(hs.size()/2);
-						reportHits(NULL, &hs, i, i+2, threadId, 0, 0, true);
+						reportHits(NULL, &hs, i, i+2, threadId, 0, 0, true, p.rdid());
 						break;
 					}
 					num++;
@@ -62,7 +62,7 @@ void VerboseHitSink::reportMaxed(
 			uint32_t r = rand.nextU32() % num;
 			Hit& h = hs[r];
 			h.oms = (uint32_t)hs.size();
-			reportHits(&h, NULL, 0, 1, threadId, 0, 0, true);
+			reportHits(&h, NULL, 0, 1, threadId, 0, 0, true, p.rdid());
 		}
 	}
 }
@@ -74,8 +74,6 @@ void VerboseHitSink::append(
 	BTString& o,
 	const Hit& h,
 	const vector<string>* refnames,
-	ReferenceMap *rmap,
-	AnnotationMap *amap,
 	bool fullRef,
 	int partition,
 	int offBase,
@@ -107,9 +105,7 @@ void VerboseHitSink::append(
 				else o << '\t';
 				// Output a partitioning key
 				// First component of the key is the reference index
-				if(refnames != NULL && rmap != NULL) {
-					printUptoWs(o, rmap->getName(h.h.first), !fullRef);
-				} else if(refnames != NULL && h.h.first < refnames->size()) {
+				if(refnames != NULL && h.h.first < refnames->size()) {
 					printUptoWs(o, (*refnames)[h.h.first], !fullRef);
 				} else {
 					o << h.h.first;
@@ -195,9 +191,7 @@ void VerboseHitSink::append(
 				if(firstfield) firstfield = false;
 				else o << '\t';
 				// .first is text id, .second is offset
-				if(refnames != NULL && rmap != NULL) {
-					printUptoWs(o, rmap->getName(h.h.first), !fullRef);
-				} else if(refnames != NULL && h.h.first < refnames->size()) {
+				if(refnames != NULL && h.h.first < refnames->size()) {
 					printUptoWs(o, (*refnames)[h.h.first], !fullRef);
 				} else {
 					o << h.h.first;
@@ -236,30 +230,7 @@ void VerboseHitSink::append(
 		if(!suppress.test((uint32_t)field++)) {
 			if(firstfield) firstfield = false;
 			else o << '\t';
-			// Look for SNP annotations falling within the alignment
-			map<int, char> snpAnnots;
 			const size_t len = length(h.patSeq);
-			if(amap != NULL) {
-				AnnotationMap::Iter ai = amap->lower_bound(h.h);
-				for(; ai != amap->end(); ai++) {
-					assert_geq(ai->first.first, h.h.first);
-					if(ai->first.first != h.h.first) {
-						// Different chromosome
-						break;
-					}
-					if(ai->first.second >= h.h.second + len) {
-						// Doesn't fall into alignment
-						break;
-					}
-					if(ai->second.first != 'S') {
-						// Not a SNP annotation
-						continue;
-					}
-					size_t off = ai->first.second - h.h.second;
-					if(!h.fw) off = len - off - 1;
-					snpAnnots[(int)off] = ai->second.second;
-				}
-			}
 			// Output mismatch column
 			bool firstmm = true;
 			for (unsigned int i = 0; i < len; ++ i) {
@@ -275,12 +246,6 @@ void VerboseHitSink::append(
 					assert_neq(refChar, qryChar);
 					o << ":" << refChar << ">" << qryChar;
 					firstmm = false;
-				} else if(snpAnnots.find(i) != snpAnnots.end()) {
-					if (!firstmm) o << ",";
-					o << i; // position
-					char qryChar = (h.fw ? h.patSeq[i] : h.patSeq[length(h.patSeq)-i-1]);
-					o << "S:" << snpAnnots[i] << ">" << qryChar;
-					firstmm = false;
 				}
 			}
 			if(partition != 0 && firstmm) o << '-';
diff --git a/hit.h b/hit.h
index ccc6ce5..f414211 100644
--- a/hit.h
+++ b/hit.h
@@ -17,9 +17,8 @@
 #include "formats.h"
 #include "filebuf.h"
 #include "edit.h"
-#include "refmap.h"
-#include "annot.h"
 #include "sstring.h"
+#include <algorithm>
 
 /**
  * Classes for dealing with reporting alignments.
@@ -31,7 +30,6 @@ using namespace seqan;
 /// Constants for the various output modes
 enum output_types {
 	OUTPUT_FULL = 1,
-	OUTPUT_CONCISE,
 	OUTPUT_BINARY,
 	OUTPUT_CHAIN,
 	OUTPUT_SAM,
@@ -42,7 +40,6 @@ enum output_types {
 static const std::string output_type_names[] = {
 	"Invalid!",
 	"Full",
-	"Concise",
 	"Binary",
 	"None"
 };
@@ -150,7 +147,7 @@ bool operator< (const Hit& a, const Hit& b);
 class HitSink {
 public:
 	explicit HitSink(
-		OutFileBuf* out,
+		OutFileBuf& out,
 		const std::string& dumpAl,
 		const std::string& dumpUnal,
 		const std::string& dumpMax,
@@ -158,13 +155,11 @@ public:
 		bool sampleMax,
 		vector<string>* refnames,
 		size_t nthreads,
-		int perThreadBufSize) :
-		_outs(),
-		_deleteOuts(false),
+		size_t perThreadBufSize,
+		bool reorder) :
+		out_(out),
 		_refnames(refnames),
-		_numWrappers(0),
-		_locks(),
-		ts_wrap(NULL),
+		mutex_(),
 		dumpAlBase_(dumpAl),
 		dumpUnalBase_(dumpUnal),
 		dumpMaxBase_(dumpMax),
@@ -174,8 +169,10 @@ public:
 		nthreads_((nthreads > 0) ? nthreads : 1),
 		ptBufs_(),
 		ptCounts_(nthreads_),
+		batchIds_(nthreads_),
 		perThreadBufSize_(perThreadBufSize),
-		ptNumAligned_(NULL)
+		ptNumAligned_(NULL),
+		reorder_(reorder)
 	{
 		size_t nelt = 5 * nthreads_;
 		ptNumAligned_ = new uint64_t[nelt];
@@ -184,49 +181,10 @@ public:
 		ptNumReportedPaired_ = ptNumReported_ + nthreads_;
 		ptNumUnaligned_ = ptNumReportedPaired_ + nthreads_;
 		ptNumMaxed_ = ptNumUnaligned_ + nthreads_;
-		_outs.push_back(out);
 		ptBufs_.resize(nthreads_);
 		ptCounts_.resize(nthreads_, 0);
-		//had to move this below the array inits, otherwise it get's mangled leading to segfaults on access
-		_locks.push_back(new MUTEX_T);
-		initDumps();
-	}
-
-	/**
-	 * Open a number of output streams; usually one per reference
-	 * sequence.  For now, we give then names refXXXXX.map where XXXXX
-	 * is the 0-padded reference index.  Someday we may want to include
-	 * the name of the reference sequence in the filename somehow.
-	 */
-	explicit HitSink(
-		size_t numOuts,
-		const std::string& dumpAl,
-		const std::string& dumpUnal,
-		const std::string& dumpMax,
-		bool onePairFile,
-		bool sampleMax,
-		vector<string>* refnames,
-		size_t nthreads,
-		int perThreadBufSize) :
-		_outs(),
-		_deleteOuts(true),
-		_refnames(refnames),
-		_locks(),
-		dumpAlBase_(dumpAl),
-		dumpUnalBase_(dumpUnal),
-		dumpMaxBase_(dumpMax),
-		onePairFile_(onePairFile),
-		sampleMax_(sampleMax),
-		quiet_(false),
-		nthreads_(0),
-		perThreadBufSize_(0),
-		ptNumAligned_(NULL)
-	{
-		// Open all files for writing and initialize all locks
-		for(size_t i = 0; i < numOuts; i++) {
-			_outs.push_back(NULL); // we open output streams lazily
-			_locks.push_back(new MUTEX_T);
-		}
+		batchIds_.assign(nthreads_, 0);
+		lastBatchIdSeen = 0;
 		initDumps();
 	}
 
@@ -235,46 +193,14 @@ public:
 	 */
 	virtual ~HitSink() {
 		if(ptNumAligned_ != NULL) {
-			delete ptNumAligned_;
+			delete[] ptNumAligned_;
 			ptNumAligned_ = NULL;
 		}
 		closeOuts();
-		if(_deleteOuts) {
-			// Delete all non-NULL output streams
-			for(size_t i = 0; i < _outs.size(); i++) {
-				if(_outs[i] != NULL) {
-					delete _outs[i];
-					_outs[i] = NULL;
-				}
-				if(_locks[i] != NULL) {
-					delete _locks[i];
-					_locks[i] = NULL;
-				}
-			}
-		}
 		destroyDumps();
 	}
 
 	/**
-	 * Call this whenever this HitSink is wrapped by a new
-	 * HitSinkPerThread.  This helps us keep track of whether the main
-	 * lock or any of the per-stream locks will be contended.
-	 */
-	void addWrapper() {
-		ThreadSafe ts(&numWrapper_mutex_m);
-		_numWrappers++;
-	}
-
-	/**
-	 * Called by concrete subclasses to figure out which elements of
-	 * the _outs/_locks array to use when outputting the alignment.
-	 */
-	size_t refIdxToStreamIdx(size_t refIdx) {
-		if(refIdx >= _outs.size()) return 0;
-		return refIdx;
-	}
-
-	/**
 	 * Append a single hit to the given output stream.
 	 */
 	virtual void append(BTString& o, const Hit& h, int mapq, int xms) = 0;
@@ -285,7 +211,8 @@ public:
 	 * alignments or because of -m.
 	 */
 	void tallyAlignments(size_t threadId, size_t numAl, bool paired) {
-		ptNumAligned_[threadId] += numAl;
+        assert(!paired || (numAl % 2) == 0);
+        ptNumAligned_[threadId] ++;
 		if(paired) {
 			ptNumReportedPaired_[threadId] += numAl;
 		} else {
@@ -304,7 +231,7 @@ public:
 		size_t threadId,
 		int mapq,
 		int xms,
-		bool tally)
+		bool tally, size_t rdid)
 	{
 		assert_geq(end, start);
 		assert(nthreads_ > 1 || threadId == 0);
@@ -313,51 +240,22 @@ public:
 		}
 		const Hit& firstHit = (hptr == NULL) ? (*hsptr)[start] : *hptr;
 		bool paired = firstHit.mate > 0;
-		// Sort reads so that those against the same reference sequence
-		// are consecutive.
-		if(hsptr != NULL && _outs.size() > 1 && end - start > 2) {
-			sort(hsptr->begin() + start, hsptr->begin() + end);
-		}
+		maybeFlush(threadId);
 		BTString& o = ptBufs_[threadId];
-		if(_outs.size() == 1) {
-			// Per-thread buffering is active
-			for(size_t i = start; i < end; i++) {
-				const Hit& h = (hptr == NULL) ? (*hsptr)[i] : *hptr;
-				assert(h.repOk());
-				if(nthreads_ > 1) {
-					maybeFlush(threadId, 0);
-					append(o, h, mapq, xms);
-					ptCounts_[threadId]++;
-				} else {
-					append(o, h, mapq, xms);
-					out(0).writeString(o);
-					o.clear();
-				}
-			}
-		} else {
-			// multiple output streams or alignments
-			// Note: in this case we basically don't get the benefit
-			// from per-thread buffering, becuase it is too
-			// complicated to provide per-thread, per-output-lock
-			// buffers.
-			size_t i = start;
-			while(i < end) {
-				const Hit& h = (hptr == NULL) ? (*hsptr)[i] : *hptr;
-				size_t strIdx = refIdxToStreamIdx(h.h.first);
-				{
-					assert(h.repOk());
-					do {
-						append(o, h, mapq, xms);
-						{
-							ThreadSafe _ts(_locks[strIdx]);
-							out(h.h.first).writeString(o);
-						}
-						o.clear();
-						i++;
-					} while(refIdxToStreamIdx(h.h.first) == strIdx && i < end);
-				}
+		// Per-thread buffering is active
+		for(size_t i = start; i < end; i++) {
+			const Hit& h = (hptr == NULL) ? (*hsptr)[i] : *hptr;
+			assert(h.repOk());
+			append(o, h, mapq, xms);
+			if(nthreads_ == 1) {
+				out_.writeString(o);
+				o.clear();
 			}
 		}
+		ptCounts_[threadId]++;
+		if (reorder_) {
+			batchIds_[threadId] = rdid / perThreadBufSize_ + 1;
+		}
 		if(tally) {
 			tallyAlignments(threadId, end - start, paired);
 		}
@@ -369,7 +267,7 @@ public:
 	 */
 	void finish(bool hadoopOut) {
 		// Flush all per-thread buffers
-		flushAll(0);
+		flushAll();
 		
 		// Close all output streams
 		closeOuts();
@@ -380,7 +278,7 @@ public:
 			uint64_t numReported = 0, numReportedPaired = 0;
 			uint64_t numAligned = 0, numUnaligned = 0;
 			uint64_t numMaxed = 0;
-			for(int i = 0; i < nthreads_; i++) {
+			for(size_t i = 0; i < nthreads_; i++) {
 				numReported += ptNumReported_[i];
 				numReportedPaired += ptNumReportedPaired_[i];
 				numAligned += ptNumAligned_[i];
@@ -418,20 +316,17 @@ public:
 			}
 			else if(numReportedPaired > 0 && numReported == 0) {
 				cerr << "Reported " << (numReportedPaired >> 1)
-					 << " paired-end alignments to " << _outs.size()
-					 << " output stream(s)" << endl;
+					 << " paired-end alignments" << endl;
 			}
 			else if(numReported > 0 && numReportedPaired == 0) {
 				cerr << "Reported " << numReported
-					 << " alignments to " << _outs.size()
-					 << " output stream(s)" << endl;
+					 << " alignments" << endl;
 			}
 			else {
 				assert_gt(numReported + numReportedPaired, 0);
 				cerr << "Reported " << (numReportedPaired >> 1)
 					 << " paired-end alignments and " << numReported
-					 << " singleton alignments to " << _outs.size()
-					 << " output stream(s)" << endl;
+					 << " singleton alignments" << endl;
 			}
 			if(hadoopOut) {
 				cerr << "reporter:counter:Bowtie,Reads with reported alignments," << numAligned << endl;
@@ -444,28 +339,9 @@ public:
 	}
 
 	/**
-	 * Returns alignment output stream, lazily created if needed.
+	 * Returns alignment output stream.
 	 */
-	OutFileBuf& out(size_t refIdx) {
-		const size_t strIdx = refIdxToStreamIdx(refIdx);
-		if(_outs[strIdx] == NULL) {
-			assert(_deleteOuts);
-			{
-				ThreadSafe _ts(&main_mutex_m);
-				if(_outs[strIdx] == NULL) { // avoid race
-					BTString o;
-					o << "ref";
-					if     (strIdx < 10)    o << "0000";
-					else if(strIdx < 100)   o << "000";
-					else if(strIdx < 1000)  o << "00";
-					else if(strIdx < 10000) o << "0";
-					o << strIdx << ".map";
-					_outs[strIdx] = new OutFileBuf(o.toZBuf(), false);
-				}
-			}
-		}
-		return *(_outs[strIdx]);
-	}
+	OutFileBuf& out() { return out_; }
 
 	/**
 	 * Return true iff this HitSink dumps aligned reads to an output
@@ -684,10 +560,33 @@ protected:
 	/**
 	 * Flush thread's output buffer and reset both buffer and count.
 	 */
-	void flush(size_t threadId, size_t outId) {
+	void flush(size_t threadId, bool finalBatch) {
 		{
-			ThreadSafe _ts(_locks[0]); // flush
-			out(outId).writeString(ptBufs_[threadId]);
+			ThreadSafe _ts(&mutex_); // flush
+			if (reorder_) {
+				nchars += ptBufs_[threadId].length();
+				batch b(ptBufs_[threadId], batchIds_[threadId], false /* has batch been written */);
+				unwrittenBatches_.push_back(b);
+				// consider writing if we have enough data to fill the buffer
+				// or we're ready to output the final batch
+				if (finalBatch || nchars >= OutFileBuf::BUF_SZ) {
+					// sort by batch ID
+					std::sort(unwrittenBatches_.begin(), unwrittenBatches_.end());
+					for (std::vector<batch>::size_type i = 0; i < unwrittenBatches_.size(); i++) {
+						if (unwrittenBatches_[i].batchId - lastBatchIdSeen == 1) {
+							nchars -= out_.writeString(unwrittenBatches_[i].btString);
+							lastBatchIdSeen = unwrittenBatches_[i].batchId;
+							unwrittenBatches_[i].isWritten = true;
+						}
+					}
+					unwrittenBatches_.erase(std::remove_if(unwrittenBatches_.begin(),
+					                        unwrittenBatches_.end(), batch::remove_written_batches),
+					                        unwrittenBatches_.end());
+				}
+			}
+			else {
+				out_.writeString(ptBufs_[threadId]);
+			}
 		}
 		ptCounts_[threadId] = 0;
 		ptBufs_[threadId].clear();
@@ -696,9 +595,9 @@ protected:
 	/**
 	 * Flush all output buffers.
 	 */
-	void flushAll(size_t outId) {
-		for(int i = 0; i < nthreads_; i++) {
-			flush(i, outId);
+	void flushAll() {
+		for(size_t i = 0; i < nthreads_; i++) {
+			flush(i, i == nthreads_ - 1);
 		}
 	}
 
@@ -706,9 +605,9 @@ protected:
 	 * If the thread's output buffer is currently full, flush it and
 	 * reset both buffer and count.
 	 */
-	void maybeFlush(size_t threadId, size_t outId) {
+	void maybeFlush(size_t threadId) {
 		if(ptCounts_[threadId] >= perThreadBufSize_) {
-			flush(threadId, outId);
+			flush(threadId, false /* final batch? */);
 		}
 	}
 	
@@ -716,29 +615,54 @@ protected:
 	 * Close (and flush) all OutFileBufs.
 	 */
 	void closeOuts() {
-		// Flush and close all non-NULL output streams
-		for(size_t i = 0; i < _outs.size(); i++) {
-			if(_outs[i] != NULL && !_outs[i]->closed()) {
-				_outs[i]->close();
-			}
-		}
+		out_.close();
 	}
 
-	vector<OutFileBuf*> _outs;        /// the alignment output stream(s)
-	bool                _deleteOuts;  /// Whether to delete elements of _outs upon exit
+	OutFileBuf&         out_;        /// the alignment output stream(s)
 	vector<string>*     _refnames;    /// map from reference indexes to names
-	int                 _numWrappers; /// # threads owning a wrapper for this HitSink
-	vector<MUTEX_T*>    _locks;       /// pthreads mutexes for per-file critical sections
-	MUTEX_T             main_mutex_m;    /// pthreads mutexes for fields of this object
-	MUTEX_T             numWrapper_mutex_m;
-	MUTEX_T             firstLock;
-	ThreadSafe*         ts_wrap;      /// for mutual exclusion
+	MUTEX_T             mutex_;       /// pthreads mutexes for per-file critical sections
 	
 	// used for output read buffer	
 	size_t nthreads_;
 	std::vector<BTString> ptBufs_;
 	std::vector<size_t> ptCounts_;
 	int perThreadBufSize_;
+	bool reorder_;
+
+	struct batch {
+		BTString btString;
+		size_t batchId;
+		bool isWritten;
+
+		batch(BTString& s, size_t id, bool b)
+			: batchId(id), isWritten(b)
+		{
+			s.moveTo(btString);
+		}
+
+		bool operator<(const batch& other) const {
+			return batchId < other.batchId;
+		}
+
+		batch& operator=(batch& other) {
+			if (&other != this) {
+				batchId = other.batchId;
+				isWritten = other.isWritten;
+				other.btString.moveTo(btString);
+			}
+			return *this;
+		}
+
+		static bool remove_written_batches(const batch& b) {
+			return b.isWritten;
+		}
+	};
+
+
+	std::vector<batch> unwrittenBatches_;
+	std::vector<size_t> batchIds_;
+	size_t lastBatchIdSeen;
+	size_t nchars;
 
 	// Output filenames for dumping
 	std::string dumpAlBase_;
@@ -886,10 +810,9 @@ public:
 		hitsForThisRead_(),
 		_max(max),
 		_n(n),
-        defaultMapq_(defaultMapq),
+		defaultMapq_(defaultMapq),
 		threadId_(threadId)
 	{
-		sink.addWrapper();
 		assert_gt(_n, 0);
 	}
 
@@ -938,7 +861,7 @@ public:
 			}
 			xms++;
 			_sink.reportHits(NULL, &_bufferedHits, 0, _bufferedHits.size(),
-			                 threadId_, mapq, xms, true);
+			                 threadId_, mapq, xms, true, p.rdid());
 			_sink.dumpAlign(p);
 			ret = (uint32_t)_bufferedHits.size();
 			_bufferedHits.clear();
@@ -1282,7 +1205,7 @@ public:
 	 * If there have been any hits reported so far, classify any
 	 * subsequent alignments with higher strata as irrelevant.
 	 */
-	virtual bool irrelevantCost(uint16_t cost) {
+	virtual bool irrelevantCost(uint16_t cost) const {
 		if(hitsForThisRead_) {
 			// irrelevant iff at worse stratum
 			return ((int)cost >> 14) > bestStratum_;
@@ -1420,110 +1343,6 @@ private:
 };
 
 /**
- * Sink that prints lines like this:
- * (pat-id)[-|+]:<hit1-text-id,hit2-text-offset>,<hit2-text-id...
- *
- * Activated with --concise
- */
-class ConciseHitSink : public HitSink {
-public:
-	/**
-	 * Construct a single-stream ConciseHitSink (default)
-	 */
-	ConciseHitSink(
-		OutFileBuf* out,
-		int offBase,
-		const std::string& dumpAl,
-		const std::string& dumpUnal,
-		const std::string& dumpMax,
-		bool onePairFile,
-		bool sampleMax,
-		std::vector<std::string>* refnames,
-		size_t nthreads,
-		int perThreadBufSize,
-		bool reportOpps = false) :
-		HitSink(
-			out,
-			dumpAl,
-			dumpUnal,
-			dumpMax,
-			onePairFile,
-			sampleMax,
-			refnames,
-			nthreads,
-			perThreadBufSize),
-		_reportOpps(reportOpps),
-		offBase_(offBase) { }
-
-	/**
-	 * Construct a multi-stream ConciseHitSink with one stream per
-	 * reference string (see --refout)
-	 */
-	ConciseHitSink(
-		size_t numOuts,
-		int offBase,
-		const std::string& dumpAl,
-		const std::string& dumpUnal,
-		const std::string& dumpMax,
-		bool onePairFile,
-		bool sampleMax,
-		std::vector<std::string>* refnames,
-		size_t nthreads,
-		int perThreadBufSize,
-		bool reportOpps = false) :
-		HitSink(
-			numOuts,
-			dumpAl,
-			dumpUnal,
-			dumpMax,
-			onePairFile,
-			sampleMax,
-			refnames,
-			nthreads,
-			perThreadBufSize),
-		_reportOpps(reportOpps),
-		offBase_(offBase) { }
-
-	/**
-	 * Append a verbose, readable hit to the given output stream.
-	 */
-	static void append(
-		BTString& o,
-		const Hit& h,
-		int offBase,
-		bool reportOpps)
-	{
-		o << h.patId;
-		if(h.mate > 0) {
-			assert(h.mate == 1 || h.mate == 2);
-			o << '/' << (int)h.mate;
-		}
-		o << (h.fw? '+' : '-') << ':';
-		// .first is text id, .second is offset
-		o << '<' << h.h.first << ',' << (h.h.second + offBase) << ',' << h.mms.count();
-		if(reportOpps) {
-			o << ',' << h.oms;
-		}
-		o << '>' << '\n';
-	}
-
-	/**
-	 * Append a verbose, readable hit to the given output stream.
-	 */
-	virtual void append(BTString& o, const Hit& h, int mapq, int xms) {
-		ConciseHitSink::append(o, h, this->offBase_, this->_reportOpps);
-	}
-
-protected:
-
-private:
-	bool _reportOpps;
-	int  offBase_;     /// Add this to reference offsets before outputting.
-	                   /// (An easy way to make things 1-based instead of
-	                   /// 0-based)
-};
-
-/**
  * Print the given string.  If ws = true, print only up to and not
  * including the first space or tab.  Useful for printing reference
  * names.
@@ -1553,14 +1372,12 @@ public:
 	 * Construct a single-stream VerboseHitSink (default)
 	 */
 	VerboseHitSink(
-		OutFileBuf* out,
+		OutFileBuf& out,
 		int offBase,
 		bool colorSeq,
 		bool colorQual,
 		bool printCost,
 		const Bitset& suppressOuts,
-		ReferenceMap *rmap,
-		AnnotationMap *amap,
 		bool fullRef,
 		const std::string& dumpAl,
 		const std::string& dumpUnal,
@@ -1569,7 +1386,7 @@ public:
 		bool sampleMax,
 		std::vector<std::string>* refnames,
 		size_t nthreads,
-		int perThreadBufSize,
+		size_t perThreadBufSize,
 		int partition = 0) :
 		HitSink(
 			out,
@@ -1580,67 +1397,21 @@ public:
 			sampleMax,
 			refnames,
 			nthreads,
-			perThreadBufSize),
+			perThreadBufSize,
+			false),
 		partition_(partition),
 		offBase_(offBase),
 		colorSeq_(colorSeq),
 		colorQual_(colorQual),
 		cost_(printCost),
 		suppress_(suppressOuts),
-		fullRef_(fullRef),
-		rmap_(rmap), amap_(amap)
-		{ }
-
-	/**
-	 * Construct a multi-stream VerboseHitSink with one stream per
-	 * reference string (see --refout)
-	 */
-	VerboseHitSink(
-		size_t numOuts,
-		int offBase,
-		bool colorSeq,
-		bool colorQual,
-		bool printCost,
-		const Bitset& suppressOuts,
-		ReferenceMap *rmap,
-		AnnotationMap *amap,
-		bool fullRef,
-		const std::string& dumpAl,
-		const std::string& dumpUnal,
-		const std::string& dumpMax,
-		bool onePairFile,
-		bool sampleMax,
-		std::vector<std::string>* refnames,
-		size_t nthreads,
-		int perThreadBufSize,
-		int partition = 0) :
-		HitSink(
-			numOuts,
-			dumpAl,
-			dumpUnal,
-			dumpMax,
-			onePairFile,
-			sampleMax,
-			refnames,
-			nthreads,
-			perThreadBufSize),
-		partition_(partition),
-		offBase_(offBase),
-		colorSeq_(colorSeq),
-		colorQual_(colorQual),
-		cost_(printCost),
-		suppress_(64),
-		fullRef_(fullRef),
-		rmap_(rmap),
-		amap_(amap)
+		fullRef_(fullRef)
 		{ }
 
 	static void append(
 		BTString& o,
 		const Hit& h,
 		const vector<string>* refnames,
-		ReferenceMap *rmap,
-		AnnotationMap *amap,
 		bool fullRef,
 		int partition,
 		int offBase,
@@ -1654,7 +1425,7 @@ public:
 	 * corresponding to the hit.
 	 */
 	virtual void append(BTString& o, const Hit& h, int mapq, int xms) {
-		VerboseHitSink::append(o, h, _refnames, rmap_, amap_,
+		VerboseHitSink::append(o, h, _refnames,
 		                       fullRef_, partition_, offBase_,
 		                       colorSeq_, colorQual_, cost_,
 		                       suppress_);
@@ -1678,18 +1449,6 @@ private:
 	bool     cost_;        /// true -> print statum and cost
 	Bitset   suppress_;    /// output fields to suppress
 	bool fullRef_;         /// print full reference name
-	ReferenceMap *rmap_;   /// mapping to reference coordinate system.
-	AnnotationMap *amap_;  ///
-};
-
-/**
- * Sink that does nothing.
- */
-class StubHitSink : public HitSink {
-public:
-	StubHitSink() : HitSink(new OutFileBuf(".tmp"), "", "", "", false, false, NULL, 1, 1) { }
-	
-	virtual void append(BTString& o, const Hit& h, int mapq, int xms) { }
 };
 
 #endif /*HIT_H_*/
diff --git a/hit_set.h b/hit_set.h
index 40df01b..7d23d2c 100644
--- a/hit_set.h
+++ b/hit_set.h
@@ -15,8 +15,6 @@
 #include "filebuf.h"
 #include "edit.h"
 #include "alphabet.h"
-#include "annot.h"
-#include "refmap.h"
 #include "btypes.h"
 
 /**
@@ -418,14 +416,6 @@ struct HitSet {
 	}
 
 	/**
-	 * Apply a reference mappings to all the contained hits.
-	 */
-	void applyReferenceMap(const ReferenceMap& map) {
-		std::vector<HitSetEnt>::iterator it;
-		for(it = ents.begin(); it != ents.end(); it++) map.map(it->h);
-	}
-
-	/**
 	 * Clear out all the strings and all the entries.
 	 */
 	void clearAll() {
diff --git a/pat.cpp b/pat.cpp
index 155d47c..860b835 100644
--- a/pat.cpp
+++ b/pat.cpp
@@ -4,6 +4,7 @@
 #include <stdexcept>
 #include <seqan/sequence.h>
 #include <seqan/file.h>
+#include <string.h>
 
 #include "pat.h"
 #include "filebuf.h"
@@ -231,11 +232,11 @@ pair<bool, int> CFilePatternSource::nextBatchImpl(
 	bool batch_a)
 {
 	bool done = false;
-	int nread = 0;
+	size_t nread = 0;
 	pt.setReadId(readCnt_);
 	while(true) { // loop that moves on to next file when needed
 		do {
-			pair<bool, int> ret = nextBatchFromFile(pt, batch_a);
+			pair<bool, int> ret = nextBatchFromFile(pt, batch_a, nread);
 			done = ret.first;
 			nread = ret.second;
 		} while(!done && nread == 0); // not sure why this would happen
@@ -243,9 +244,10 @@ pair<bool, int> CFilePatternSource::nextBatchImpl(
 			open();
 			resetForNextFile(); // reset state to handle a fresh file
 			filecur_++;
-			if(nread == 0) {
+			if(nread == 0 || nread < pt.max_buf_) {
 				continue;
 			}
+			done = false;
 		}
 		break;
 	}
@@ -356,10 +358,9 @@ void CFilePatternSource::open() {
 VectorPatternSource::VectorPatternSource(
 	const vector<string>& seqs,
 	bool color,
-	const char *dumpfile,
 	int trim3,
 	int trim5) :
-	TrimmingPatternSource(dumpfile, trim3, trim5),
+	TrimmingPatternSource(trim3, trim5),
 	color_(color),
 	cur_(0),
 	paired_(false),
@@ -572,7 +573,8 @@ bool VectorPatternSource::parse(Read& ra, Read& rb, TReadId rdid) const {
  */
 pair<bool, int> FastaPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a,
+	size_t readi)
 {
 	int c;
 	vector<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
@@ -591,7 +593,6 @@ pair<bool, int> FastaPatternSource::nextBatchFromFile(
 		first_ = false;
 	}
 	bool done = false;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	for(; readi < pt.max_buf_ && !done; readi++) {
 		readbuf[readi].readOrigBuf[0] = '>';
@@ -739,11 +740,11 @@ bool FastaPatternSource::parse(Read& r, Read& rb, TReadId rdid) const {
  */
 pair<bool, int> FastaContinuousPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a,
+	size_t readi)
 {
 	int c = -1;
 	vector<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
-	size_t readi = 0;
 	int nameoff = 0;
 	while(readi < pt.max_buf_) {
 		c = getc_wrapper();
@@ -903,7 +904,8 @@ bool FastaContinuousPatternSource::parse(
  */
 pair<bool, int> FastqPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a,
+	size_t readi)
 {
 	int c = 0;
 	vector<Read>* readBuf = batch_a ? &pt.bufa_ : &pt.bufb_;
@@ -917,11 +919,10 @@ pair<bool, int> FastqPatternSource::nextBatchFromFile(
 			throw 1;
 		}
 		first_ = false;
-		(*readBuf)[0].readOrigBuf[0] = c;
-		(*readBuf)[0].readOrigBufLen = 1;
+		(*readBuf)[readi].readOrigBuf[0] = c;
+		(*readBuf)[readi].readOrigBufLen = 1;
 	}
 	bool done = false, aborted = false;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	while (readi < pt.max_buf_ && !done) {
 		char* buf = (*readBuf)[readi].readOrigBuf;
@@ -1102,8 +1103,18 @@ bool FastqPatternSource::parse(Read &r, Read& rb, TReadId rdid) const {
 			tooFewQualities(r.name);
 			return false;
 		} else if(qualoff > seqoff) {
-			tooManyQualities(r.name);
-			return false;
+			// if qualoff is at most 2 characters longer than the sequence
+			// then the extra characters will most likely be the quality values
+			// of the primer and the first base (which get discarded by bowtie).
+			// In this case move the remainder of the sequence the (qualoff - seqoff)
+			// positions left.
+			if (r.color && qualoff - seqoff <= 2) {
+				memmove(r.qualBuf, r.qualBuf + (qualoff - seqoff), seqoff);
+			}
+			else {
+				tooManyQualities(r.name);
+				return false;
+			}
 		}
 	}
 	r.qualBuf[seqan::length(r.patFw)] = '\0';
@@ -1128,14 +1139,14 @@ bool FastqPatternSource::parse(Read &r, Read& rb, TReadId rdid) const {
  */
 pair<bool, int> TabbedPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a,
+	size_t readi)
 {
 	int c = getc_wrapper();
 	while(c >= 0 && (c == '\n' || c == '\r')) {
 		c = getc_wrapper();
 	}
 	vector<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	for(; readi < pt.max_buf_ && c >= 0; readi++) {
 		readbuf[readi].readOrigBufLen = 0;
@@ -1307,14 +1318,14 @@ bool TabbedPatternSource::parse(Read& ra, Read& rb, TReadId rdid) const {
  */
 pair<bool, int> RawPatternSource::nextBatchFromFile(
 	PerThreadReadBuf& pt,
-	bool batch_a)
+	bool batch_a,
+	size_t readi)
 {
 	int c = getc_wrapper();
 	while(c >= 0 && (c == '\n' || c == '\r')) {
 		c = getc_wrapper();
 	}
 	vector<Read>& readbuf = batch_a ? pt.bufa_ : pt.bufb_;
-	size_t readi = 0;
 	// Read until we run out of input or until we've filled the buffer
 	for(; readi < pt.max_buf_ && c >= 0; readi++) {
 		readbuf[readi].readOrigBufLen = 0;
diff --git a/pat.h b/pat.h
index 9001468..7c3ba9f 100644
--- a/pat.h
+++ b/pat.h
@@ -394,21 +394,10 @@ struct PerThreadReadBuf {
  */
 class PatternSource {
 public:
-	PatternSource(
-		const char *dumpfile = NULL) :
+	PatternSource() :
 		readCnt_(0),
-		dumpfile_(dumpfile),
 		mutex()
-	{
-		// Open dumpfile, if specified
-		if(dumpfile_ != NULL) {
-			out_.open(dumpfile_, ios_base::out);
-			if(!out_.good()) {
-				cerr << "Could not open pattern dump file \"" << dumpfile_ << "\" for writing" << endl;
-				throw 1;
-			}
-		}
-	}
+	{ }
 
 	virtual ~PatternSource() { }
 
@@ -440,19 +429,6 @@ public:
 protected:
 
 	/**
-	 * Dump the contents of the ReadBuf to the dump file.
-	 */
-	void dumpBuf(const Read& r) {
-		assert(dumpfile_ != NULL);
-		dump(out_, r.patFw,
-		     empty(r.qual) ? String<char>("(empty)") : r.qual,
-		     empty(r.name) ? String<char>("(empty)") : r.name);
-		dump(out_, r.patRc,
-		     empty(r.qualRev) ? String<char>("(empty)") : r.qualRev,
-		     empty(r.name) ? String<char>("(empty)") : r.name);
-	}
-
-	/**
 	 * Default format for dumping a read to an output stream.  Concrete
 	 * subclasses might want to do something fancier.
 	 */
@@ -467,9 +443,6 @@ protected:
 	/// The number of reads read by this PatternSource
 	volatile uint64_t readCnt_;
 
-	const char *dumpfile_; /// dump patterns to this file before returning them
-	ofstream out_;         /// output stream for dumpfile
-
 	/// Lock enforcing mutual exclusion for (a) file I/O, (b) writing fields
 	/// of this or another other shared object.
 	MUTEX_T mutex;
@@ -483,10 +456,9 @@ protected:
  */
 class TrimmingPatternSource : public PatternSource {
 public:
-	TrimmingPatternSource(const char *dumpfile = NULL,
-	                      int trim3 = 0,
+	TrimmingPatternSource(int trim3 = 0,
 	                      int trim5 = 0) :
-		PatternSource(dumpfile),
+		PatternSource(),
 		trim3_(trim3), trim5_(trim5) { }
 protected:
 	int trim3_;
@@ -506,7 +478,6 @@ public:
 	VectorPatternSource(
 		const vector<string>& v,
 		bool color,
-		const char *dumpfile = NULL,
 		int trim3 = 0,
 		int trim5 = 0);
 	
@@ -560,12 +531,11 @@ private:
 class CFilePatternSource : public TrimmingPatternSource {
 public:
 	CFilePatternSource(
-	    const vector<string>& infiles,
-	    const vector<string>* qinfiles,
-	    const char *dumpfile = NULL,
-	    int trim3 = 0,
+		const vector<string>& infiles,
+		const vector<string>* qinfiles,
+		int trim3 = 0,
 	    int trim5 = 0) :
-		TrimmingPatternSource(dumpfile, trim3, trim5),
+		TrimmingPatternSource( trim3, trim5),
 		infiles_(infiles),
 		filecur_(0),
 		fp_(NULL),
@@ -640,7 +610,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a) = 0;
+		bool batch_a,
+		size_t read_idx) = 0;
 
 	/**
 	 * Reset state to handle a fresh file
@@ -708,20 +679,18 @@ public:
 
 	FastaPatternSource(
 		const vector<string>& infiles,
-	    const vector<string>* qinfiles,
-	    bool color,
-	    const char *dumpfile = NULL,
-	    int trim3 = 0,
-	    int trim5 = 0,
-	    bool solexa64 = false,
-	    bool phred64 = false,
-	    bool intQuals = false) :
+		const vector<string>* qinfiles,
+		bool color,
+		int trim3 = 0,
+		int trim5 = 0,
+		bool solexa64 = false,
+		bool phred64 = false,
+		bool intQuals = false) :
 		CFilePatternSource(
 			infiles,
 			qinfiles,
-		    dumpfile,
 			trim3,
-		    trim5),
+			trim5),
 		first_(true),
 		color_(color),
 		solexa64_(solexa64),
@@ -749,7 +718,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		size_t read_idx);
 
 	/**
 	 * Reset state to handle a fresh file
@@ -797,18 +767,16 @@ public:
 	TabbedPatternSource(
 		const vector<string>& infiles,
 		bool secondName,  // whether it's --12/--tab5 or --tab6
-	    bool color,
-	    const char *dumpfile = NULL,
-	    int trim3 = 0,
-	    int trim5 = 0,
-	    bool solQuals = false,
-	    bool phred64Quals = false,
-	    bool intQuals = false) :
+		bool color,
+		int trim3 = 0,
+		int trim5 = 0,
+		bool solQuals = false,
+		bool phred64Quals = false,
+		bool intQuals = false) :
 		CFilePatternSource(
 			infiles,
 			NULL,
-		    dumpfile,
-		    trim3,
+			trim3,
 			trim5),
 		color_(color),
 		solQuals_(solQuals),
@@ -827,7 +795,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		size_t read_idx);
 	
 	/**
 	 * Dump a FASTQ-style record for the read.
@@ -859,12 +828,10 @@ public:
 	FastaContinuousPatternSource(
 			const vector<string>& infiles,
 			size_t length,
-			size_t freq,
-			const char *dumpfile = NULL) :
+			size_t freq) :
 		CFilePatternSource(
 			infiles,
 			NULL,
-		    dumpfile,
 			0,
 			0),
 		length_(length),
@@ -896,7 +863,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		size_t read_idx);
 
 	/**
 	 * Reset state to be read for the next file.
@@ -935,20 +903,18 @@ class FastqPatternSource : public CFilePatternSource {
 public:
 	FastqPatternSource(
 		const vector<string>& infiles,
-	    bool color,
-	    const char *dumpfile = NULL,
-	    int trim3 = 0,
-	    int trim5 = 0,
-	    bool solexa_quals = false,
-	    bool phred64Quals = false,
-	    bool integer_quals = false,
-	    bool interleaved = false,
-	    uint32_t skip = 0) :
+		bool color,
+		int trim3 = 0,
+		int trim5 = 0,
+		bool solexa_quals = false,
+		bool phred64Quals = false,
+		bool integer_quals = false,
+		bool interleaved = false,
+		uint32_t skip = 0) :
 		CFilePatternSource(
 			infiles,
 			NULL,
-		    dumpfile,
-		    trim3,
+			trim3,
 			trim5),
 		first_(true),
 		solQuals_(solexa_quals),
@@ -978,7 +944,8 @@ protected:
 	 */
 	virtual pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		size_t read_idx);
 
 	virtual void resetForNextFile() {
 		first_ = true;
@@ -1013,14 +980,12 @@ public:
 
 	RawPatternSource(
 		const vector<string>& infiles,
-	    bool color,
-	    const char *dumpfile = NULL,
+		bool color,
 		int trim3 = 0,
-	    int trim5 = 0) :
+		int trim5 = 0) :
 		CFilePatternSource(
 			infiles,
 			NULL,
-		    dumpfile,
 			trim3,
 			trim5),
 		first_(true),
@@ -1043,7 +1008,8 @@ protected:
 	 */
 	virtual std::pair<bool, int> nextBatchFromFile(
 		PerThreadReadBuf& pt,
-		bool batch_a);
+		bool batch_a,
+		size_t read_idx);
 
 	virtual void resetForNextFile() {
 		first_ = true;
@@ -1360,6 +1326,8 @@ public:
 		// Free the vector
 		delete composers;
 	}
+
+	virtual ~PatternSourcePerThreadFactory() {}
 	
 private:
 	/// Container for obtaining paired reads from PatternSources
diff --git a/refmap.cpp b/refmap.cpp
deleted file mode 100644
index 29e3dc6..0000000
--- a/refmap.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * refmap.cpp
- *
- *  Created on: Aug 3, 2009
- *      Author: Ben Langmead
- */
-
-#include <stdexcept>
-#include "refmap.h"
-#include "assert_helpers.h"
-
-using namespace std;
-
-/**
- * Given a refid,offset pair in the index space, transform it into the
- * reference coordinate space according to the reference mappings
- * provided by the user.
- */
-void ReferenceMap::map(UPair& h) const {
-	if(h.first >= map_.size()) {
-		cerr << "Could not find a reference-map entry for reference "
-				  << h.first << " in map file \"" << fname_ << "\""
-				  << endl;
-		throw 1;
-	}
-	h.second += map_[h.first].second;
-	h.first = map_[h.first].first;
-}
-
-/**
- * Parse a reference-map file.
- */
-void ReferenceMap::parse() {
-	ifstream in(fname_);
-	if(!in.good() || !in.is_open()) {
-		cerr << "Could not open reference map file " << fname_ << endl;
-		throw 1;
-	}
-	int c;
-	while((c = in.peek()) != EOF) {
-		if(c == '>') {
-			// This appears to be a name line
-			in.get(); // chop off the initial '>'
-			TIndexOffU off;
-			in >> off;
-			in.get(); // chop off tab
-			char buf[1024];
-			in.getline(buf, 1023);
-			if(parseNames_) {
-				if(names_.size() <= off) names_.resize(off+1);
-				names_[off] = string(buf);
-			}
-			continue;
-		}
-		TIndexOffU id, off;
-		in >> id >> off;
-		map_.resize(map_.size()+1);
-		map_.back().first = id;
-		map_.back().second = off;
-		while(isspace(in.peek())) in.get();
-	}
-	assert_eq(EOF, c);
-	in.close();
-}
diff --git a/refmap.h b/refmap.h
deleted file mode 100644
index cf5ba7d..0000000
--- a/refmap.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * refmap.h
- *
- *  Created on: Aug 3, 2009
- *      Author: Ben Langmead
- */
-
-#ifndef REFMAP_H_
-#define REFMAP_H_
-
-#include <stdint.h>
-#include <cassert>
-#include <vector>
-#include <iostream>
-#include <fstream>
-#include "btypes.h"
-
-class ReferenceMap {
-	typedef std::pair<TIndexOffU, TIndexOffU> UPair;
-
-public:
-	ReferenceMap(const char *fname, bool parseNames) {
-		fname_ = fname;
-		parseNames_ = parseNames;
-		parse();
-	}
-
-	/**
-	 * Give a reference coordinate in the index, translate it into a
-	 * new reference coordinate via the reference map supplied by the
-	 * user.
-	 */
-	void map(UPair& h) const;
-
-	/**
-	 * Return true iff we have a name for reference with id 'i'.
-	 */
-	bool hasName(size_t i) const {
-		if(!parseNames_) return false;
-		return !names_[i].empty();
-	}
-
-	/**
-	 * Get the name for reference with id 'i'.
-	 */
-	const std::string& getName(size_t i) const {
-		assert(parseNames_);
-		assert(hasName(i));
-		return names_[i];
-	}
-
-protected:
-
-	/**
-	 * Parse a reference-map file.
-	 */
-	void parse();
-
-	const char *fname_;
-	std::vector<UPair> map_;
-	bool parseNames_;
-	std::vector<std::string> names_;
-};
-
-#endif /* REFMAP_H_ */
diff --git a/row_chaser.h b/row_chaser.h
index 1a7cacc..d4c72ce 100644
--- a/row_chaser.h
+++ b/row_chaser.h
@@ -106,7 +106,7 @@ public:
 			assert(prepped_);
 			prepped_ = false;
 			if(metrics_ != NULL) metrics_->curBwtOps_++;
-			uint32_t newrow = ebwt_->mapLF(sideloc_);
+			uint64_t newrow = ebwt_->mapLF(sideloc_);
 			ASSERT_ONLY(sideloc_.invalidate());
 			jumps_++;
 			assert_neq(newrow, row_);
diff --git a/sam.cpp b/sam.cpp
index 95fe940..e14c2c4 100644
--- a/sam.cpp
+++ b/sam.cpp
@@ -24,7 +24,6 @@ void SAMHitSink::appendHeaders(
 	const vector<string>& refnames,
 	bool color,
 	bool nosq,
-	ReferenceMap *rmap,
 	const TIndexOffU* plen,
 	bool fullRef,
 	bool noQnameTrunc,
@@ -37,9 +36,7 @@ void SAMHitSink::appendHeaders(
 		for(size_t i = 0; i < numRefs; i++) {
 			// RNAME
 			o << "@SQ\tSN:";
-			if(!refnames.empty() && rmap != NULL) {
-				printUptoWs(o, rmap->getName(i), !fullRef);
-			} else if(i < refnames.size()) {
+			if(i < refnames.size()) {
 				printUptoWs(o, refnames[i], !fullRef);
 			} else {
 				o << i;
@@ -80,7 +77,7 @@ void SAMHitSink::reportUnOrMax(
 	assert(!un || hs == NULL || hs->size() == 0);
 	size_t hssz = 0;
 	if(hs != NULL) hssz = hs->size();
-	maybeFlush(threadId, 0);
+	maybeFlush(threadId);
 	BTString& o = ptBufs_[threadId];
 	for(int i = 0; i < (int)seqan::length(p.bufa().name) - (paired ? 2 : 0); i++) {
 		if(!noQnameTrunc_ && isspace((int)p.bufa().name[i])) break;
@@ -113,6 +110,7 @@ void SAMHitSink::reportUnOrMax(
 	if(paired) {
 		// truncate final 2 chars
 		for(int i = 0; i < (int)seqan::length(p.bufb().name)-2; i++) {
+			if(!noQnameTrunc_ && isspace((int)p.bufb().name[i])) break;
 			o << p.bufb().name[i];
 		}
 		o << '\t'
@@ -141,6 +139,9 @@ void SAMHitSink::reportUnOrMax(
 		o << '\n';
 	}
 	ptCounts_[threadId]++;
+	if (reorder_) {
+	   batchIds_[threadId] = p.rdid() / perThreadBufSize_ + 1;
+	}
 }
 
 /**
@@ -172,9 +173,7 @@ void SAMHitSink::append(BTString& o, const Hit& h, int mapq, int xms) {
 	if(h.mate > 0 && !h.mfw) flags |= SAM_FLAG_MATE_STRAND;
 	o << flags << "\t";
 	// RNAME
-	if(_refnames != NULL && rmap_ != NULL) {
-		printUptoWs(o, rmap_->getName(h.h.first), !fullRef_);
-	} else if(_refnames != NULL && h.h.first < _refnames->size()) {
+	if(_refnames != NULL && h.h.first < _refnames->size()) {
 		printUptoWs(o, (*_refnames)[h.h.first], !fullRef_);
 	} else {
 		o << h.h.first;
@@ -338,7 +337,7 @@ void SAMHitSink::reportMaxed(
 				int strat = min(hs[i].stratum, hs[i+1].stratum);
 				if(strat == bestStratum) {
 					if(num == r) {
-						reportHits(NULL, &hs, i, i+2, threadId, 0, (int)(hs.size()/2)+1, false);
+						reportHits(NULL, &hs, i, i+2, threadId, 0, (int)(hs.size()/2)+1, false, p.rdid());
 						break;
 					}
 					num++;
@@ -353,7 +352,7 @@ void SAMHitSink::reportMaxed(
 			}
 			assert_leq(num, hs.size());
 			uint32_t r = rand.nextU32() % num;
-			reportHits(&hs[r], NULL, 0, 1, threadId, 0, (int)hs.size()+1, false);
+			reportHits(&hs[r], NULL, 0, 1, threadId, 0, (int)hs.size()+1, false, p.rdid());
 		}
 	} else {
 		reportUnOrMax(p, &hs, threadId, false);
diff --git a/sam.h b/sam.h
index b103418..23d1364 100644
--- a/sam.h
+++ b/sam.h
@@ -8,14 +8,10 @@
 #ifndef SAM_H_
 #define SAM_H_
 
-#include "refmap.h"
-#include "annot.h"
 #include "pat.h"
 #include "random_source.h"
 #include "btypes.h"
 
-class ReferenceMap;
-class AnnotationMap;
 class PatternSourcePerThread;
 
 enum {
@@ -41,10 +37,8 @@ public:
 	 * Construct a single-stream VerboseHitSink (default)
 	 */
 	SAMHitSink(
-		OutFileBuf* out,
+		OutFileBuf& out,
 		int offBase,
-		ReferenceMap *rmap,
-		AnnotationMap *amap,
 		bool fullRef,
 		bool noQnameTrunc,
 		const std::string& dumpAl,
@@ -54,7 +48,8 @@ public:
 		bool sampleMax,
 		std::vector<std::string>* refnames,
 		size_t nthreads,
-		int perThreadBufSize) :
+		int perThreadBufSize,
+		bool reorder) :
 		HitSink(
 			out,
 			dumpAl,
@@ -64,47 +59,13 @@ public:
 			sampleMax,
 			refnames,
 			nthreads,
-			perThreadBufSize),
+			perThreadBufSize,
+			reorder),
 		offBase_(offBase),
-		rmap_(rmap),
-		amap_(amap),
 		fullRef_(fullRef),
 		noQnameTrunc_(noQnameTrunc) { }
 
 	/**
-	 * Construct a multi-stream VerboseHitSink with one stream per
-	 * reference string (see --refout)
-	 */
-	SAMHitSink(
-		size_t numOuts,
-		int offBase,
-		ReferenceMap *rmap,
-		AnnotationMap *amap,
-		bool fullRef,
-		const std::string& dumpAl,
-		const std::string& dumpUnal,
-		const std::string& dumpMax,
-		bool onePairFile,
-		bool sampleMax,
-		std::vector<std::string>* refnames,
-		size_t nthreads,
-		int perThreadBufSize) :
-		HitSink(
-			numOuts,
-			dumpAl,
-			dumpUnal,
-			dumpMax,
-			onePairFile,
-			sampleMax,
-			refnames,
-			nthreads,
-			perThreadBufSize),
-		offBase_(offBase),
-		rmap_(rmap),
-		amap_(amap),
-		fullRef_(fullRef) { }
-
-	/**
 	 * Append a verbose, readable hit to the output stream
 	 * corresponding to the hit.
 	 */
@@ -119,7 +80,6 @@ public:
 		const vector<string>& refnames,
 		bool color,
 		bool nosq,
-		ReferenceMap *rmap,
 		const TIndexOffU* plen,
 		bool fullRef,
 		bool noQnameTrunc,
@@ -159,8 +119,6 @@ private:
 	int  offBase_;        /// Add this to reference offsets before outputting.
 	                      /// (An easy way to make things 1-based instead of
 	                      /// 0-based)
-	ReferenceMap *rmap_;  /// mapping to reference coordinate system.
-	AnnotationMap *amap_; ///
 	bool fullRef_;        /// print full reference name, not just up to whitespace
 	bool noQnameTrunc_;   /// true -> don't truncate QNAME at first whitespace
 };
diff --git a/scripts/test/simple_tests.pl b/scripts/test/simple_tests.pl
index ce1390c..d8b452f 100755
--- a/scripts/test/simple_tests.pl
+++ b/scripts/test/simple_tests.pl
@@ -950,6 +950,22 @@ my @cases = (
 	  hits  => [ { 4 => 1 } ],
 	  color => 1 },
 
+	{ name  => "Colorspace FASTQ - seq + 1",
+	  ref   => [ "AAAACGAAAGCTTTTATAGATGGGG" ],
+	  fastq =>   "\@r0\n132002320113332231\n+\nIIIIIIIIIIIIIIIIIII\n",
+	  args  => [ "-C -v 2",
+				 "-C -n 2" ],
+	  hits  => [ { 4 => 1 } ],
+	  color => 1 },
+
+	{ name  => "Colorspace FASTQ - seq + 2",
+	  ref   => [ "AAAACGAAAGCTTTTATAGATGGGG" ],
+	  fastq =>   "\@r0\n132002320113332231\n+\nIIIIIIIIIIIIIIIIIIII\n",
+	  args  => [ "-C -v 2",
+				 "-C -n 2" ],
+	  hits  => [ { 4 => 1 } ],
+	  color => 1 },
+
 	{ name  => "Colorspace FASTQ with primer",
 	  ref   => [ "AAAACGAAAGCTTTTATAGATGGGG" ],
 	  fastq =>   "\@r0\nA0132002320113332231\n+\nIIIIIIIIIIIIIIIIII\n",
diff --git a/sstring.h b/sstring.h
index 956a638..f68ddfd 100644
--- a/sstring.h
+++ b/sstring.h
@@ -2075,6 +2075,15 @@ public:
 	 */
 	T* wbuf() { return cs_; }
 
+	void moveTo(SStringExpandable<T,S>& o) {
+		if (&o != this) {
+			o.sz_ = sz_; sz_ = 0;
+			o.cs_ = cs_; cs_ = NULL;
+			o.len_ = len_; len_ = 0;
+			o.printcs_ = printcs_; printcs_ = NULL;
+		}
+	}
+
 protected:
 	/**
 	 * Allocate new, bigger buffer and copy old contents into it.  If
diff --git a/threading.h b/threading.h
index a5cf944..431d490 100644
--- a/threading.h
+++ b/threading.h
@@ -84,6 +84,16 @@ private:
 #endif
 };
 
+#if defined(_TTHREAD_WIN32_)
+#define SLEEP(x) Sleep(x)
+#else
+#define SLEEP(x) { \
+	const static timespec ts_tmp_ = {0, 1000000 * x}; \
+	nanosleep(&ts_tmp_, NULL); \
+}
+#endif
+
+
 #ifdef WITH_TBB
 #ifdef WITH_AFFINITY
 //ripped entirely from;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bowtie.git



More information about the debian-med-commit mailing list