[med-svn] [bwa] 03/04: Imported Upstream version 0.7.9a

Fri Jun 6 20:07:48 UTC 2014

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository bwa.

commit 30f8d351fd380844dcc010e5ec53c045b10e0032
Author: Andreas Tille <tille at debian.org>
Date:   Fri Jun 6 21:55:37 2014 +0200

    Imported Upstream version 0.7.9a
---
 Makefile        |  10 +-
 NEWS => NEWS.md | 186 ++++++++++-----
 README.md       | 136 +++++++++--
 bntseq.c        |  36 +++
 bntseq.h        |   3 +
 bwa-helper.js   | 706 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 bwa.1           |  14 +-
 bwa.c           |  65 +-----
 bwa.h           |   2 -
 bwamem.c        | 618 ++++++++++++++++++++++++++++---------------------
 bwamem.h        |  18 +-
 bwamem_extra.c  | 147 ++++++++++++
 bwamem_pair.c   |  66 ++++--
 bwape.c         |   1 +
 bwase.c         |   2 +-
 bwt_lite.c      |   4 +-
 bwtaln.c        |   4 +-
 fastmap.c       | 114 ++++++---
 kbtree.h        |   2 +-
 ksw.c           | 116 ++++++----
 main.c          |   2 +-
 21 files changed, 1744 insertions(+), 508 deletions(-)

diff --git a/Makefile b/Makefile
index 6490932..60c2104 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,10 @@
 CC=			gcc
 #CC=			clang --analyze
-CFLAGS=		-g -Wall -O2 -Wno-unused-function
+CFLAGS=		-g -Wall -Wno-unused-function -O2
 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
 AR=			ar
 DFLAGS=		-DHAVE_PTHREAD $(WRAP_MALLOC)
-LOBJS=		utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o malloc_wrap.o
+LOBJS=		utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o
 AOBJS=		QSufSort.o bwt_gen.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
 			is.o bwtindex.o bwape.o kopen.o pemerge.o \
 			bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
@@ -41,9 +41,10 @@ depend:
 QSufSort.o: QSufSort.h
 bamlite.o: bamlite.h malloc_wrap.h
 bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h
-bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h malloc_wrap.h kseq.h
+bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kstring.h malloc_wrap.h kseq.h
 bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
 bwamem.o: ksort.h utils.h kbtree.h
+bwamem_extra.o: bwa.h bntseq.h bwt.h bwamem.h kstring.h malloc_wrap.h
 bwamem_pair.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h kvec.h
 bwamem_pair.o: utils.h ksw.h
 bwape.o: bwtaln.h bwt.h kvec.h malloc_wrap.h bntseq.h utils.h bwase.h bwa.h
@@ -71,7 +72,8 @@ is.o: malloc_wrap.h
 kopen.o: malloc_wrap.h
 kstring.o: kstring.h malloc_wrap.h
 ksw.o: ksw.h malloc_wrap.h
-main.o: utils.h
+main.o: kstring.h malloc_wrap.h utils.h
 malloc_wrap.o: malloc_wrap.h
 pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h
+test-extend.o: ksw.h
 utils.o: utils.h ksort.h malloc_wrap.h kseq.h
diff --git a/NEWS b/NEWS.md
similarity index 85%
rename from NEWS
rename to NEWS.md
index 40f4433..82a0f6c 100644
--- a/NEWS
+++ b/NEWS.md
@@ -1,5 +1,66 @@
+Release 0.7.9 (19 May, 2014)
+----------------------------
+
+This release brings several major changes to BWA-MEM. Notably, BWA-MEM now
+formally supports PacBio read-to-reference alignment and experimentally supports
+PacBio read-to-read alignment. BWA-MEM also runs faster at a minor cost of
+accuracy. The speedup is more significant when GRCh38 is in use. More
+specifically:
+
+ * Support PacBio subread-to-reference alignment. Although older BWA-MEM works
+   with PacBio data in principle, the resultant alignments are frequently
+   fragmented. In this release, we fine tuned existing methods and introduced
+   new heuristics to improve PacBio alignment. These changes are not used by
+   default. Users need to add option "-x pacbio" to enable the feature.
+
+ * Support PacBio subread-to-subread alignment (EXPERIMENTAL). This feature is
+   enabled with option "-x pbread". In this mode, the output only gives the
+   overlapping region between a pair of reads without detailed alignment.
+
+ * Output alternative hits in the XA tag if there are not so many of them. This
+   is a BWA-backtrack feature.
+
+ * Support mapping to ALT contigs in GRCh38 (EXPERIMENTAL). We provide a script
+   to postprocess hits in the XA tag to adjust the mapping quality and generate
+   new primary alignments to all overlapping ALT contigs. We would *NOT*
+   recommend this feature for production uses.
+
+ * Improved alignments to many short reference sequences. Older BWA-MEM may
+   generate an alignment bridging two or more adjacent reference sequences.
+   Such alignments are split at a later step as postprocessing. This approach
+   is complex and does not always work. This release forbids these alignments
+   from the very beginning. BWA-MEM should not produce an alignment bridging
+   two or more reference sequences any more.
+
+ * Reduced the maximum seed occurrence from 10000 to 500. Reduced the maximum
+   rounds of Smith-Waterman mate rescue from 100 to 50. Added a heuristic to
+   lower the mapping quality if a read contains seeds with excessive
+   occurrences. These changes make BWA-MEM faster at a minor cost of accuracy
+   in highly repetitive regions.
+
+ * Added an option "-Y" to use soft clipping for supplementary alignments.
+
+ * Bugfix: incomplete alignment extension in corner cases.
+
+ * Bugfix: integer overflow when aligning long query sequences.
+
+ * Bugfix: chain score is not computed correctly (almost no practical effect)
+
+ * General code cleanup
+
+ * Added FAQs to README
+
+Changes in BWA-backtrack:
+
+ * Bugfix: a segmentation fault when an alignment stands out of the end of the
+   last chromosome.
+
+(0.7.9: 19 May 2014, r783)
+
+
+
 Release 0.7.8 (31 March, 2014)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------
 
 Changes in BWA-MEM:
 
@@ -7,7 +68,7 @@ Changes in BWA-MEM:
    Short-read alignment is not affected.
 
  * Bugfix: unnecessarily large bandwidth used during global alignment,
-   which reduces the mapping speed by ~5% for short reads. Results are not
+   which reduces the mapping speed by -5% for short reads. Results are not
    affected.
 
  * Bugfix: when the matching score is not one, paired-end mapping quality is
@@ -31,14 +92,14 @@ With the default setting, 0.7.8 and 0.7.7 gave identical output on one million
 
 
 Release 0.7.7 (25 Feburary, 2014)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
 This release fixes incorrect MD tags in the BWA-MEM output.
 
 A note about short-read mapping to GRCh38. The new human reference genome
 GRCh38 contains 60Mbp program generated alpha repeat arrays, some of which are
 hard masked as they cannot be localized. These highly repetitive arrays make
-BWA-MEM ~50% slower. If you are concerned with the performance of BWA-MEM, you
+BWA-MEM -50% slower. If you are concerned with the performance of BWA-MEM, you
 may consider to use option "-c2000 -m50". On simulated data, this setting helps
 the performance at a very minor cost on accuracy. I may consider to change the
 default in future releases.
@@ -48,7 +109,7 @@ default in future releases.
 
 
 Release 0.7.6 (31 Januaray, 2014)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
 Changes in BWA-MEM:
 
@@ -104,7 +165,7 @@ where BWA-SW may excel.
 
 
 Release 0.7.5a (30 May, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------
 
 Fixed a bug in BWA-backtrack which leads to off-by-one mapping errors in rare
 cases.
@@ -114,7 +175,7 @@ cases.
 
 
 Release 0.7.5 (29 May, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------
 
 Changes in all components:
 
@@ -166,7 +227,7 @@ Thank you.
 
 
 Release 0.7.4 (23 April, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------
 
 This is a bugfix release. Most of bugs are considered to be minor which only
 occur very rarely.
@@ -198,7 +259,7 @@ BWA-backtrack for short-read mapping.
 
 
 Release 0.7.3a (15 March, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------
 
 In 0.7.3, the wrong CIGAR bug was only fixed in one scenario, but not fixed
 in another corner case.
@@ -208,7 +269,7 @@ in another corner case.
 
 
 Release 0.7.3 (15 March, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------
 
 Changes to BWA-MEM:
 
@@ -224,7 +285,7 @@ Changes to BWA-MEM:
    we can see the entire picture of alignment from one SAM line. XP gives the
    position, CIGAR, NM and mapQ of each aligned subsequence of the query.
 
-BWA-MEM has been used to align ~300Gbp 100-700bp SE/PE reads. SNP/indel calling
+BWA-MEM has been used to align -300Gbp 100-700bp SE/PE reads. SNP/indel calling
 has also been evaluated on part of these data. BWA-MEM generally gives better
 pre-filtered SNP calls than BWA. No significant issues have been observed since
 0.7.2, though minor improvements or bugs (e.g. the bug fixed in this release)
@@ -239,17 +300,17 @@ In addition, more detailed description of the BWA-MEM algorithm can be found at
 
 
 Release 0.7.2 (9 March, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------
 
 Emergent bug fix: 0.7.0 and 0.7.1 give a wrong sign to TLEN. In addition,
-flagging `properly paired' also gets improved a little.
+flagging 'properly paired' also gets improved a little.
 
 (0.7.2: 9 March 2013, r351)
 
 
 
 Release 0.7.1 (8 March, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------
 
 Changes to BWA-MEM:
 
@@ -276,7 +337,7 @@ Changes to other components:
 
 An important note is that like BWA-SW, BWA-MEM may output multiple primary
 alignments for a read, which may cause problems to some tools. For aligning
-sequence reads, it is advised to use `-M' to flag extra hits as secondary. This
+sequence reads, it is advised to use '-M' to flag extra hits as secondary. This
 option is not the default because multiple primary alignments are theoretically
 possible in sequence alignment.
 
@@ -285,7 +346,7 @@ possible in sequence alignment.
 
 
 Beta Release 0.7.0 (28 Feburary, 2013)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 This release comes with a new alignment algorithm, BWA-MEM, for 70bp-1Mbp query
 sequences. BWA-MEM essentially seeds alignments with a variant of the fastmap
@@ -322,7 +383,7 @@ handy features in practical aspects:
     (bwa mem ref.fa '<bzcat r1.fq.bz2' '<bzcat r2.fq.bz2') to map bzip'd read
     files without replying on bash features.
 
- 6. BWA-MEM provides a few basic APIs for single-end mapping. The `example.c'
+ 6. BWA-MEM provides a few basic APIs for single-end mapping. The 'example.c'
     program in the source code directory implements a full single-end mapper in
     50 lines of code.
 
@@ -338,7 +399,7 @@ reads. Change of mappers will be necessary sooner or later.
 
 
 Release 0.6.2 (19 June, 2012)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------
 
 This is largely a bug-fix release. Notable changes in BWA-short and BWA-SW:
 
@@ -357,7 +418,7 @@ This is largely a bug-fix release. Notable changes in BWA-short and BWA-SW:
 
 
 Release 0.6.1 (28 November, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
 Notable changes to BWA-short:
 
@@ -392,7 +453,7 @@ Changes to fastmap:
 
 
 Release 0.5.10 and 0.6.0 (12 November, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------------
 
 The 0.6.0 release comes with two major changes. Firstly, the index data
 structure has been changed to support genomes longer than 4GB. The forward and
@@ -411,7 +472,7 @@ branch unless I find critical bugs in future.
 
 Other notable changes:
 
- * Added the `fastmap' command that finds super-maximal exact matches. It does
+ * Added the 'fastmap' command that finds super-maximal exact matches. It does
    not give the final alignment, but runs much faster. It can be a building
    block for other alignment algorithms. [0.6.0 only]
 
@@ -441,13 +502,13 @@ you.
 
 
 Beta Release 0.5.9 (24 January, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------------
 
 Notable changes:
 
- * Feature: barcode support via the `-B' option.
+ * Feature: barcode support via the '-B' option.
 
- * Feature: Illumina 1.3+ read format support via the `-I' option.
+ * Feature: Illumina 1.3+ read format support via the '-I' option.
 
  * Bugfix: RG tags are not attached to unmapped reads.
 
@@ -468,7 +529,7 @@ committed to this repository.
 
 
 Beta Release Candidate 0.5.9rc1 (10 December, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------------------------
 
 Notable changes in bwasw:
 
@@ -500,7 +561,7 @@ happens, please report the bugs to the bio-bwa-help mailing list.
 
 
 Beta Release 0.5.8 (8 June, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
 Notable changes in bwasw:
 
@@ -520,20 +581,20 @@ Notable changes in bwa-short:
  * Fixed a typo/bug in sampe which leads to unnecessarily large memory
    usage in some cases.
 
- * Further reduced the chance of reporting `weird pairing'.
+ * Further reduced the chance of reporting 'weird pairing'.
 
 (0.5.8: 8 June 2010, r1442)
 
 
 
 Beta Release 0.5.7 (1 March, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
 This release only has an effect on paired-end data with fat insert-size
 distribution. Users are still recommended to update as the new release
 improves the robustness to poor data.
 
- * The fix for `weird pairing' was not working in version 0.5.6, pointed
+ * The fix for 'weird pairing' was not working in version 0.5.6, pointed
    out by Carol Scott. It should work now.
 
  * Optionally output to a normal file rather than to stdout (by Tim
@@ -544,7 +605,7 @@ improves the robustness to poor data.
 
 
 Beta Release 0.5.6 (10 Feburary, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 Notable changes in bwa-short:
 
@@ -585,7 +646,7 @@ Notable changes in bwa-short:
 
 
 Beta Release 0.5.5 (10 November, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 This is a bug fix release:
 
@@ -605,7 +666,7 @@ This is a bug fix release:
 
 
 Beta Release 0.5.4 (9 October, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 Since this version, the default seed length used in the "aln" command is
 changed to 32.
@@ -634,7 +695,7 @@ Notable changes in dBWT-SW/BWA-SW:
 
 
 Beta Release 0.5.3 (15 September, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------------
 
 Fixed a critical bug in bwa-short: reads mapped to the reverse strand
 are not complemented.
@@ -644,11 +705,11 @@ are not complemented.
 
 
 Beta Release 0.5.2 (13 September, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------------
 
 Notable changes in bwa-short:
 
- * Optionally trim reads before alignment. See the manual page on `aln
+ * Optionally trim reads before alignment. See the manual page on 'aln
    -q' for detailed description.
 
  * Fixed a bug in calculating the NM tag for a gapped alignment.
@@ -668,7 +729,7 @@ Notable changes in dBWT-SW:
 
 
 Beta Release 0.5.1 (2 September, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 Notable changes in the short read alignment component:
 
@@ -692,7 +753,7 @@ Notable changes in dBWT-SW:
 
 
 Beta Release 0.5.0 (20 August, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 This release implements a novel algorithm, dBWT-SW, specifically
 designed for long reads. It is 10-50 times faster than SSAHA2, depending
@@ -724,7 +785,7 @@ Other notable changes in BWA are:
 
 
 Beta Release 0.4.9 (19 May, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
 Interestingly, the integer overflow bug claimed to be fixed in 0.4.7 has
 not in fact. Now I have fixed the bug. Sorry for this and thank Quan
@@ -735,9 +796,9 @@ Long for pointing out the bug (again).
 
 
 Beta Release 0.4.8 (18 May, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
-One change to "aln -R". Now by default, if there are no more than `-R'
+One change to "aln -R". Now by default, if there are no more than '-R'
 equally best hits, bwa will search for suboptimal hits. This change
 affects the ability in finding SNPs in segmental duplications.
 
@@ -749,7 +810,7 @@ likely to cause new bugs. Hope I am right.
 
 
 Beta Release 0.4.7 (12 May, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+---------------------------------
 
 Notable changes:
 
@@ -771,12 +832,12 @@ Notable changes:
 
 
 Beta Release 0.4.6 (9 March, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
 This release improves the SOLiD support. First, a script for converting
 SOLiD raw data is provided. This script is adapted from solid2fastq.pl
 in the MAQ package. Second, a nucleotide reference file can be directly
-used with `bwa index'. Third, SOLiD paired-end support is
+used with 'bwa index'. Third, SOLiD paired-end support is
 completed. Fourth, color-space reads will be converted to nucleotides
 when SAM output is generated. Color errors are corrected in this
 process. Please note that like MAQ, BWA cannot make use of the primer
@@ -790,7 +851,7 @@ little bit, although end-users may barely observe the difference.
 
 
 Beta Release 0.4.5 (18 Feburary, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 Not much happened, but I think it would be good to let the users use the
 latest version.
@@ -801,10 +862,10 @@ Notable changes (Thank Bob Handsaker for catching the two bugs):
    alignment coordinates in rare cases.
 
  * Fixed a bug in SW alignment when no residue matches. This only
-   affects the `sampe' command.
+   affects the 'sampe' command.
 
  * Robustly estimate insert size without setting the maximum on the
-   command line. Since this release `sampe -a' only has an effect if
+   command line. Since this release 'sampe -a' only has an effect if
    there are not enough good pairs to infer the insert size
    distribution.
 
@@ -817,7 +878,7 @@ Notable changes (Thank Bob Handsaker for catching the two bugs):
 
 
 Beta Release 0.4.4 (15 Feburary, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--------------------------------------
 
 This is mainly a bug fix release. Notable changes are:
 
@@ -834,7 +895,7 @@ This is mainly a bug fix release. Notable changes are:
 
 
 Beta Release 0.4.3 (22 January, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 Notable changes:
 
@@ -862,7 +923,7 @@ Notable changes:
 
 
 Beta Release 0.4.2 (9 January, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 Aaron Quinlan found a bug in the indexer: the bwa indexer segfaults if
 there are no comment texts in the FASTA header. This is a critical
@@ -873,7 +934,7 @@ bug. Nothing else was changed.
 
 
 Beta Release 0.4.1 (7 January, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 I am sorry for the quick updates these days. I like to set a milestone
 for BWA and this release seems to be. For paired end reads, BWA also
@@ -886,24 +947,24 @@ maq. Benchmark is also updated accordingly.
 
 
 Beta Release 0.4.0 (6 January, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
 In comparison to the release two days ago, this release is mainly tuned
 for performance with some tricks I learnt from Bowtie. However, as the
 indexing format has also been changed, I have to increase the version
 number to 0.4.0 to emphasize that *DATABASE MUST BE RE-INDEXED* with
-`bwa index'.
+'bwa index'.
 
  * Improved the speed by about 20%.
 
- * Added multi-threading to `bwa aln'.
+ * Added multi-threading to 'bwa aln'.
 
 (0.4.0: 6 January 2009, r756)
 
 
 
 Beta Release 0.3.0 (4 January, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------------
 
  * Added paired-end support by separating SA calculation and alignment
    output.
@@ -917,7 +978,7 @@ Beta Release 0.3.0 (4 January, 2009)
 
 
 Beta Release 0.2.0 (15 Augusst, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------------
 
  * Take the subsequence at the 5'-end as seed. Seeding strategy greatly
    improves the speed for long reads, at the cost of missing a few true
@@ -932,7 +993,7 @@ Beta Release 0.2.0 (15 Augusst, 2008)
 
 
 Beta Release 0.1.6 (08 Augusst, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-------------------------------------
 
  * Give accurate CIGAR string.
 
@@ -943,7 +1004,7 @@ Beta Release 0.1.6 (08 Augusst, 2008)
 
 
 Beta Release 0.1.5 (27 July, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
  * Improve the speed. This version is expected to give the same results.
 
@@ -952,7 +1013,7 @@ Beta Release 0.1.5 (27 July, 2008)
 
 
 Beta Release 0.1.4 (22 July, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
  * Fixed a bug which may cause missing gapped alignments.
 
@@ -967,7 +1028,7 @@ Beta Release 0.1.4 (22 July, 2008)
 
 
 Beta Release 0.1.3 (21 July, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
 Improve the speed with some tricks on retrieving occurences. The results
 should be exactly the same as that of 0.1.2.
@@ -977,7 +1038,7 @@ should be exactly the same as that of 0.1.2.
 
 
 Beta Release 0.1.2 (17 July, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
 Support gapped alignment. Codes for ungapped alignment has been removed.
 
@@ -986,12 +1047,9 @@ Support gapped alignment. Codes for ungapped alignment has been removed.
 
 
 Beta Release 0.1.1 (03 June, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------------
 
 This is the first release of BWA, Burrows-Wheeler Alignment tool. Please
 read man page for more information about this software.
 
 (0.1.1: 03 June 2008, r349)
-
-
-
diff --git a/README.md b/README.md
index ac1e57e..dff245e 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-###Getting started
+##Getting started
 
 	git clone https://github.com/lh3/bwa.git
 	cd bwa; make
@@ -6,32 +6,31 @@
 	./bwa mem ref.fa read-se.fq.gz | gzip -3 > aln-se.sam.gz
 	./bwa mem ref.fa read1.fq read2.fq | gzip -3 > aln-pe.sam.gz
 
-###Introduction
+##Introduction
 
-BWA is a software package for mapping low-divergent sequences against a large
-reference genome, such as the human genome. It consists of three algorithms:
+BWA is a software package for mapping DNA sequences against a large reference
+genome, such as the human genome. It consists of three algorithms:
 BWA-backtrack, BWA-SW and BWA-MEM. The first algorithm is designed for Illumina
 sequence reads up to 100bp, while the rest two for longer sequences ranged from
-70bp to 1Mbp. BWA-MEM and BWA-SW share similar features such as the support of
-long reads and chimeric alignment, but BWA-MEM, which is the latest, is
-generally recommended for high-quality queries as it is faster and more
-accurate. BWA-MEM also has better performance than BWA-backtrack for 70-100bp
-Illumina reads.
+70bp to a few megabases. BWA-MEM and BWA-SW share similar features such as the
+support of long reads and chimeric alignment, but BWA-MEM, which is the latest,
+is generally recommended as it is faster and more accurate. BWA-MEM also has
+better performance than BWA-backtrack for 70-100bp Illumina reads.
 
 For all the algorithms, BWA first needs to construct the FM-index for the
 reference genome (the **index** command). Alignment algorithms are invoked with
 different sub-commands: **aln/samse/sampe** for BWA-backtrack,
 **bwasw** for BWA-SW and **mem** for the BWA-MEM algorithm.
 
-###Availability
+##Availability
 
-BWA is released under [GPLv3][1]. The latest souce code is [freely
-available][2] at github. Released packages can [be downloaded][3] at
+BWA is released under [GPLv3][1]. The latest source code is [freely
+available at github][2]. Released packages can [be downloaded][3] at
 SourceForge. After you acquire the source code, simply use `make` to compile
 and copy the single executable `bwa` to the destination you want. The only
-dependency of BWA is [zlib][14].
+dependency required to build BWA is [zlib][14].
 
-###Seeking helps
+##Seeking helps
 
 The detailed usage is described in the man page available together with the
 source code. You can use `man ./bwa.1` to view the man page in a terminal. The
@@ -40,7 +39,7 @@ have questions about BWA, you may [sign up the mailing list][6] and then send
 the questions to [bio-bwa-help at sourceforge.net][7]. You may also ask questions
 in forums such as [BioStar][8] and [SEQanswers][9].
 
-###Citing BWA
+##Citing BWA
 
 * Li H. and Durbin R. (2009) Fast and accurate short read alignment with
  Burrows-Wheeler transform. *Bioinformatics*, **25**, 1754-1760. [PMID:
@@ -57,6 +56,112 @@ in forums such as [BioStar][8] and [SEQanswers][9].
 Please note that the last reference is a preprint hosted at [arXiv.org][13]. I
 do not have plan to submit it to a peer-reviewed journal in the near future.
 
+##Frequently asked questions (FAQs)
+
+1. [What types of data does BWA work with?](#type)
+2. [Why does a read appear multiple times in the output SAM?](#multihit)
+3. [Does BWA work on reference sequences longer than 4GB in total?](#4gb)
+4. [Why can one read in a pair has high mapping quality but the other has zero?](#pe0)
+5. [How can a BWA-backtrack alignment stands out of the end of a chromosome?](#endref)
+6. [How to map sequences to GRCh38 with ALT contigs?](#h38)
+
+####<a name="type"></a>1. What types of data does BWA work with?
+
+BWA works with a variety types of DNA sequence data, though the optimal
+algorithm and setting may vary. The following list gives the recommended
+settings:
+
+* Illumina/454/IonTorrent single-end reads longer than ~70bp or assembly
+  contigs up to a few megabases mapped to a close related reference genome:
+
+		bwa mem ref.fa reads.fq > aln.sam
+
+* Illumina single-end reads no longer than ~70bp:
+
+		bwa aln ref.fa reads.fq > reads.sai; bwa samse ref.fa reads.sai reads.fq > aln-se.sam
+
+* Illumina/454/IonTorrent paired-end reads longer than ~70bp:
+
+		bwa mem ref.fa read1.fq read2.fq > aln-pe.sam
+
+* Illumina paired-end reads no longer than ~70bp:
+
+		bwa aln ref.fa read1.fq > read1.sai; bwa aln ref.fa read2.fq > read2.sai
+		bwa samse ref.fa reads.sai reads.fq > aln-pe.sam
+
+* PacBio subreads to a reference genome:
+
+		bwa mem -x pacbio ref.fa reads.fq > aln.sam
+
+* PacBio subreads to themselves (the output is not SAM):
+
+		bwa mem -x pbread reads.fq reads.fq > overlap.pas
+
+BWA-MEM is recommended for query sequences longer than ~70bp for a variety of
+error rates (or sequence divergence). Generally, BWA-MEM is more tolerant with
+errors given longer query sequences as the chance of missing all seeds is small.
+As is shown above, with non-default settings, BWA-MEM works with PacBio subreads
+with a sequencing error rate as high as ~15%.
+
+####<a name="multihit"></a>2. Why does a read appear multiple times in the output SAM?
+
+BWA-SW and BWA-MEM perform local alignments. If there is a translocation, a gene
+fusion or a long deletion, a read bridging the break point may have two hits,
+occupying two lines in the SAM output. With the default setting of BWA-MEM, one
+and only one line is primary and is soft clipped; other lines are tagged with
+0x800 SAM flag (supplementary alignment) and are hard clipped.
+
+####<a name="4gb"></a>3. Does BWA work on reference sequences longer than 4GB in total?
+
+Yes. Since 0.6.x, all BWA algorithms work with a genome with total length over
+4GB. However, individual chromosome should not be longer than 2GB.
+
+####<a name="pe0"></a>4. Why can one read in a pair has high mapping quality but the other has zero?
+
+This is correct. Mapping quality is assigned for individual read, not for a read
+pair. It is possible that one read can be mapped unambiguously, but its mate
+falls in a tandem repeat and thus its accurate position cannot be determined.
+
+####<a name="endref"></a>5. How can a BWA-backtrack alignment stands out of the end of a chromosome?
+
+Internally BWA concatenates all reference sequences into one long sequence. A
+read may be mapped to the junction of two adjacent reference sequences. In this
+case, BWA-backtrack will flag the read as unmapped (0x4), but you will see
+position, CIGAR and all the tags. A similar issue may occur to BWA-SW alignment
+as well. BWA-MEM does not have this problem.
+
+####<a name="h38"></a>6. How to map sequences to GRCh38 with ALT contigs?
+
+BWA-backtrack and BWA-MEM partially support mapping to a reference containing
+ALT contigs that represent alternative alleles highly divergent from the
+reference genome.
+
+	# download the K8 executable required by bwa-helper.js
+	wget http://sourceforge.net/projects/lh3/files/k8/k8-0.2.1.tar.bz2/download
+	tar -jxf k8-0.2.1.tar.bz2
+
+	# download the ALT-to-GRCh38 alignment in the SAM format
+	wget http://sourceforge.net/projects/bio-bwa/files/hs38.alt.sam.gz/download
+
+	# download the GRCh38 sequences with ALT contigs
+	wget ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_full_analysis_set.fna.gz
+
+	# index and mapping
+	bwa index -p hs38a GCA_000001405.15_GRCh38_full_analysis_set.fna.gz
+	bwa mem -h50 hs38a reads.fq | ./k8-linux bwa-helper.js genalt hs38.alt.sam.gz > out.sam
+
+Here, option `-h50` asks bwa-mem to output multiple hits in the XA tag if the
+read has 50 or fewer hits. For each SAM line containing the XA tag,
+`bwa-helper.js genalt` decodes the alignments in the XA tag, groups hits lifted
+to the same chromosomal region, adjusts mapping quality and outputs all the
+hits overlapping the reported hit. A read may be mapped to both the primary
+assembly and one or more ALT contigs all with high mapping quality.
+
+Note that this procedure assumes reads are single-end and may miss hits to
+highly repetitive regions as these hits will not be reported with option
+`-h50`. `bwa-helper.js` is a prototype implementation not recommended for
+production uses.
+
 
 
 [1]: http://en.wikipedia.org/wiki/GNU_General_Public_License
@@ -74,3 +179,4 @@ do not have plan to submit it to a peer-reviewed journal in the near future.
 [13]: http://arxiv.org/
 [14]: http://zlib.net/
 [15]: https://github.com/lh3/bwa/tree/mem
+[16]: ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/
diff --git a/bntseq.c b/bntseq.c
index e1cd323..eddae84 100644
--- a/bntseq.c
+++ b/bntseq.c
@@ -329,6 +329,15 @@ int bns_pos2rid(const bntseq_t *bns, int64_t pos_f)
 	return mid;
 }
 
+int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re)
+{
+	int is_rev, rid_b, rid_e;
+	if (rb < bns->l_pac && re > bns->l_pac) return -2;
+	rid_b = bns_pos2rid(bns, bns_depos(bns, rb, &is_rev));
+	rid_e = bns_pos2rid(bns, bns_depos(bns, re, &is_rev) - 1);
+	return rid_b == rid_e? rid_b : -1;
+}
+
 int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id)
 {
 	int left, mid, right, nn;
@@ -374,3 +383,30 @@ uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end
 	} else *len = 0; // if bridging the forward-reverse boundary, return nothing
 	return seq;
 }
+
+uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid)
+{
+	int64_t far_beg, far_end, len;
+	int is_rev;
+	uint8_t *seq;
+
+	if (*end < *beg) *end ^= *beg, *beg ^= *end, *end ^= *beg; // if end is smaller, swap
+	assert(*beg <= mid && mid < *end);
+	*rid = bns_pos2rid(bns, bns_depos(bns, mid, &is_rev));
+	far_beg = bns->anns[*rid].offset;
+	far_end = far_beg + bns->anns[*rid].len;
+	if (is_rev) { // flip to the reverse strand
+		int64_t tmp = far_beg;
+		far_beg = (bns->l_pac<<1) - far_end;
+		far_end = (bns->l_pac<<1) - tmp;
+	}
+	*beg = *beg > far_beg? *beg : far_beg;
+	*end = *end < far_end? *end : far_end;
+	seq = bns_get_seq(bns->l_pac, pac, *beg, *end, &len);
+	if (seq == 0 || *end - *beg != len) {
+		fprintf(stderr, "[E::%s] begin=%ld, mid=%ld, end=%ld, len=%ld, seq=%p, rid=%d, far_beg=%ld, far_end=%ld\n",
+				__func__, (long)*beg, (long)mid, (long)*end, (long)len, seq, *rid, (long)far_beg, (long)far_end);
+	}
+	assert(seq && *end - *beg == len); // assertion failure should never happen
+	return seq;
+}
diff --git a/bntseq.h b/bntseq.h
index 4061438..6437cf6 100644
--- a/bntseq.h
+++ b/bntseq.h
@@ -28,6 +28,7 @@
 #ifndef BWT_BNTSEQ_H
 #define BWT_BNTSEQ_H
 
+#include <assert.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <zlib.h>
@@ -75,6 +76,8 @@ extern "C" {
 	int bns_pos2rid(const bntseq_t *bns, int64_t pos_f);
 	int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id);
 	uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len);
+	uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid);
+	int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re);
 
 #ifdef __cplusplus
 }
diff --git a/bwa-helper.js b/bwa-helper.js
new file mode 100644
index 0000000..8a234b8
--- /dev/null
+++ b/bwa-helper.js
@@ -0,0 +1,706 @@
+/*****************************************************************
+ * The K8 Javascript interpreter is required to run this script. *
+ *                                                               *
+ *   Source code: https://github.com/attractivechaos/k8          *
+ *   Binary: http://sourceforge.net/projects/lh3/files/k8/       *
+ *                                                               *
+ * Data file used for generating GRCh38 ALT alignments:          *
+ *                                                               *
+ *   http://sourceforge.net/projects/bio-bwa/files/              *
+ *****************************************************************/
+
+/******************
+ *** From k8.js ***
+ ******************/
+
+var getopt = function(args, ostr) {
+	var oli; // option letter list index
+	if (typeof(getopt.place) == 'undefined')
+		getopt.ind = 0, getopt.arg = null, getopt.place = -1;
+	if (getopt.place == -1) { // update scanning pointer
+		if (getopt.ind >= args.length || args[getopt.ind].charAt(getopt.place = 0) != '-') {
+			getopt.place = -1;
+			return null;
+		}
+		if (getopt.place + 1 < args[getopt.ind].length && args[getopt.ind].charAt(++getopt.place) == '-') { // found "--"
+			++getopt.ind;
+			getopt.place = -1;
+			return null;
+		}
+	}
+	var optopt = args[getopt.ind].charAt(getopt.place++); // character checked for validity
+	if (optopt == ':' || (oli = ostr.indexOf(optopt)) < 0) {
+		if (optopt == '-') return null; //  if the user didn't specify '-' as an option, assume it means null.
+		if (getopt.place < 0) ++getopt.ind;
+		return '?';
+	}
+	if (oli+1 >= ostr.length || ostr.charAt(++oli) != ':') { // don't need argument
+		getopt.arg = null;
+		if (getopt.place < 0 || getopt.place >= args[getopt.ind].length) ++getopt.ind, getopt.place = -1;
+	} else { // need an argument
+		if (getopt.place >= 0 && getopt.place < args[getopt.ind].length)
+			getopt.arg = args[getopt.ind].substr(getopt.place);
+		else if (args.length <= ++getopt.ind) { // no arg
+			getopt.place = -1;
+			if (ostr.length > 0 && ostr.charAt(0) == ':') return ':';
+			return '?';
+		} else getopt.arg = args[getopt.ind]; // white space
+		getopt.place = -1;
+		++getopt.ind;
+	}
+	return optopt;
+}
+
+function obj2str(o)
+{
+	if (typeof(o) != 'object') {
+		return o.toString();
+	} else if (o == null) {
+		return "null";
+	} else if (Array.isArray(o)) {
+		var s = "[";
+		for (var i = 0; i < o.length; ++i) {
+			if (i) s += ',';
+			s += obj2str(o[i]);
+		}
+		return s + "]";
+	} else {
+		var i = 0, s = "{";
+		for (var key in o) {
+			if (i++) s += ',';
+			s += key + ":";
+			s += obj2str(o[key]);
+		}
+		return s + "}";
+	}
+}
+
+Bytes.prototype.reverse = function()
+{
+	for (var i = 0; i < this.length>>1; ++i) {
+		var tmp = this[i];
+		this[i] = this[this.length - i - 1];
+		this[this.length - i - 1] = tmp;
+	}
+}
+
+Bytes.prototype.revcomp = function()
+{
+	if (Bytes.rctab == null) {
+		var s1 = 'WSATUGCYRKMBDHVNwsatugcyrkmbdhvn';
+		var s2 = 'WSTAACGRYMKVHDBNwstaacgrymkvhdbn';
+		Bytes.rctab = [];
+		for (var i = 0; i < 256; ++i) Bytes.rctab[i] = 0;
+		for (var i = 0; i < s1.length; ++i)
+			Bytes.rctab[s1.charCodeAt(i)] = s2.charCodeAt(i);
+	}
+	for (var i = 0; i < this.length>>1; ++i) {
+		var tmp = this[this.length - i - 1];
+		this[this.length - i - 1] = Bytes.rctab[this[i]];
+		this[i] = Bytes.rctab[tmp];
+	}
+	if (this.length>>1)
+		this[this.length>>1] = Bytes.rctab[this[this.length>>1]];
+}
+
+/************************
+ *** command markovlp ***
+ ************************/
+
+function bwa_markOvlp(args)
+{
+	var c, min_aln_ratio = .9, min_ext = 50;
+	while ((c = getopt(args, "r:e:")) != null) {
+		if (c == 'r') min_aln_ratio = parseFloat(getopt.arg);
+		else if (c == 'e') min_ext = parseInt(getopt.arg);
+	}
+
+	var file = args.length == getopt.ind? new File() : new File(args[getopt.ind]);
+	var buf = new Bytes();
+	var dir4 = ['>>', '><', '<>', '<<'];
+
+	while (file.readline(buf) >= 0) {
+		var t = buf.toString().split("\t")
+			for (var i = 0; i < t.length; ++i)
+				if (i != 0 && i != 4)
+					t[i] = parseInt(t[i]);
+		var el, a1, a2, e1, e2, r1, r2; // a: aligned length; e: extended length; r: remaining length
+		e2 = a2 = t[7] - t[6];
+		if (t[2] < t[3]) { // forward-forward match
+			e1 = a1 = t[3] - t[2];
+			r1 = t[2] - t[6];
+			r2 = (t[5] - t[7]) - (t[1] - t[3]);
+			el = r1 > 0? t[6] : t[2];
+			el += r2 > 0? t[1] - t[3] : t[5] - t[7];
+		} else { // reverse-forward match
+			e1 = a1 = t[2] - t[3];
+			r1 = (t[1] - t[2]) - t[6];
+			r2 = (t[5] - t[7]) - t[3];
+			el = r1 > 0? t[6] : t[1] - t[2];
+			el += r2 > 0? t[3] : t[5] - t[7];
+		}
+		e1 += el; e2 += el;
+		var type;
+		if (a1 / e1 >= min_aln_ratio && a2 / e2 >= min_aln_ratio) {
+			if ((r1 >= min_ext && r2 >= min_ext) || (r1 <= -min_ext && r2 <= -min_ext)) { // suffix-prefix match
+				var d = t[2] < t[3]? 0 : 2;
+				if (r1 < 0) d ^= 3; // reverse the direction
+				type = 'O' + dir4[d];
+			} else type = 'C' + (e1 / t[1] > e2 / t[5]? 1 : 2);
+		} else type = 'I'; // internal local match; not a suffix-prefix match
+		//print(t[1], e1, a1, t[5], e2, a2);
+		print(type, buf);
+	}
+
+	buf.destroy();
+	file.close();
+}
+
+/***********************
+ *** command pas2bed ***
+ ***********************/
+
+function bwa_pas2reg(args)
+{
+	var file = args.length? new File(args[0]) : new File();
+	var buf = new Bytes();
+
+	while (file.readline(buf) >= 0) {
+		var t = buf.toString().split("\t");
+		if (t[0] == t[4]) continue;
+		if (parseInt(t[2]) < parseInt(t[3])) print(t[0], t[1], t[2], t[3], t[8]);
+		else print(t[0], t[1], t[3], t[2], t[8]);
+		print(t[4], t[5], t[6], t[7], t[8]);
+	}
+
+	buf.destroy();
+	file.close();
+}
+
+/*******************
+ * command sam2pas *
+ *******************/
+
+function bwa_sam2pas(args)
+{
+	var file = args.length == 0? new File() : new File(args[0]);
+	var buf = new Bytes();
+	var seq_dict = {};
+
+	while (file.readline(buf) >= 0) {
+		var line = buf.toString();
+		var m;
+		if (/^@SQ/.test(line)) {
+			var name = null, len = null;
+			if ((m = /\tSN:(\S+)/.exec(line)) != null) name = m[1];
+			if ((m = /\tLN:(\S+)/.exec(line)) != null) len = parseInt(m[1]);
+			if (name != null && len != null) seq_dict[name] = len;
+		}
+		if (/^@/.test(line)) continue;
+		var t = line.split("\t");
+		var pos = parseInt(t[3]) - 1;
+		var x = 0, y = 0, i = 0, clip = [0, 0], n_ins = 0, n_del = 0, o_ins = 0, o_del = 0, n_M = 0;
+		var re = /(\d+)([MIDSH])/g;
+		while ((m = re.exec(t[5])) != null) {
+			var l = parseInt(m[1]);
+			if (m[2] == 'M') x += l, y += l, n_M += l;
+			else if (m[2] == 'I') y += l, n_ins += l, ++o_ins;
+			else if (m[2] == 'D') x += l, n_del += l, ++o_del;
+			else if (m[2] == 'S' || m[2] == 'H')
+				clip[i == 0? 0 : 1] = l;
+			++i;
+		}
+		var is_rev = (parseInt(t[1]) & 16)? true : false;
+		var misc = 'mapQ=' + t[4] + ';';
+		var usc = 1;
+		if ((m = /\tNM:i:(\d+)/.exec(line)) != null) {
+			var NM = parseInt(m[1]);
+			var diff = (NM / (n_M + n_ins + n_del)).toFixed(3);
+			misc += 'diff=' + diff + ';n_mis=' + (NM - n_del - n_ins) + ';';
+		}
+		misc += 'n_del='+n_del+';n_ins='+n_ins+';o_del='+o_del+';o_ins='+o_ins + ';';
+		if ((m = /\tAS:i:(\d+)/.exec(line)) != null) {
+			misc += 'AS='+m[1] + ';';
+			usc = (parseInt(m[1]) / (x > y? x : y)).toFixed(3);
+		}
+		if ((m = /\tXS:i:(\d+)/.exec(line)) != null) misc += 'XS='+m[1] + ';';
+		var len = y + clip[0] + clip[1];
+		var z = [t[0], len, clip[0], clip[0] + y, t[2], seq_dict[t[2]], pos, pos + x, usc, misc];
+		if (parseInt(t[1]) & 16) z[2] = clip[1] + y, z[3] = clip[1];
+		print(z.join("\t"));
+	}
+
+	buf.destroy();
+	file.close();
+}
+
+/***********************
+ *** command reg2cut ***
+ ***********************/
+
+function bwa_reg2cut(args)
+{
+	var c, min_usc = 0.5, min_ext = 100, min_len = 5000, cut = 250;
+	while ((c = getopt(args, "s:e:l:c:")) != null) {
+		if (c == 's') min_usc = parseFloat(getopt.arg);
+		else if (c == 'e') min_ext = parseInt(getopt.arg);
+		else if (c == 'l') min_len = parseInt(getopt.arg);
+		else if (c == 'c') cut = parseInt(getopt.arg);
+	}
+
+	var file = args.length == getopt.ind? new File () : new File(args[getopt.ind]);
+	var buf = new Bytes();
+
+	function print_bed() {
+		for (var i = 0; i < a.length; ++i) {
+			var start = a[i][0] - cut > 0? a[i][0] : 0;
+			var end   = a[i][1] + cut < last_len? a[i][1] : last_len;
+			if (end - start >= min_len) print(last_chr, start, end);
+		}
+	}
+
+	var last_chr = null, last_len = null, max_c_usc = 0, start = 0, end = 0;
+	var a = [];
+	while (file.readline(buf) >= 0) {
+		var t = buf.toString().split("\t");
+		t[1] = parseInt(t[1]);
+		t[2] = parseInt(t[2]);
+		t[3] = parseInt(t[3]);
+		t[4] = parseFloat(t[4]);
+		var is_contained = t[2] < min_ext && t[1] - t[3] < min_ext? true : false;
+		if (t[3] - t[2] < cut<<1) continue;
+		t[2] += cut; t[3] -= cut;
+		if (t[0] != last_chr) {
+			a.push([start, end]);
+			if (last_chr != null && max_c_usc < min_usc) print_bed();
+			last_chr = t[0]; last_len = t[1]; start = t[2]; end = t[3];
+			max_c_usc = is_contained? t[4] : 0;
+			a.length = 0;
+		} else {
+			if (is_contained)
+				max_c_usc = max_c_usc > t[4]? max_c_usc : t[4];
+			if (t[4] < min_usc) continue;
+			if (t[2] > end) {
+				a.push([start, end]);
+				start = t[2];
+				end = end > t[3]? end : t[3];
+			} else end = end > t[3]? end : t[3];
+		}
+	}
+	a.push([start, end]);
+	if (max_c_usc < min_usc) print_bed(); // the last sequence
+
+	buf.destroy();
+	file.close();
+}
+
+function bwa_shortname(args)
+{
+	var file = args.length? new File(args[0]) : new File();
+	var buf = new Bytes();
+
+	var re = /(\S+)\/(\d+)_(\d+)((:\d+-\d+)+)/g;
+	var re2 = /:(\d+)-(\d+)/g;
+	while (file.readline(buf) >= 0) {
+		var match, match2;
+		var line = buf.toString();
+		var x = [];
+		while ((match = re.exec(line)) != null) {
+			var start = parseInt(match[2]), len = parseInt(match[3]) - start;
+			while ((match2 = re2.exec(match[4])) != null) {
+				var a = parseInt(match2[1]) - 1;
+				var b = parseInt(match2[2]);
+				start += a; len = b - a;
+			}
+			x.push([match[0], match[1] + '/' + start.toString() + '_' + (start+len).toString()]);
+		}
+		for (var i = 0; i < x.length; ++i)
+			line = line.replace(x[i][0], x[i][1]);
+		print(line);
+	}
+
+	buf.destroy();
+	file.close();
+}
+
+/*******************
+ * Command gff2sam *
+ *******************/
+
+function bwa_gff2sam(args)
+{
+	if (args.length < 2) {
+		print("Usage: k8 bwa-helper.js <aln.gff> <query-length.txt>");
+		exit(1);
+	}
+
+	var file = new File(args[1]);
+	var buf = new Bytes();
+	var len = {};
+
+	while (file.readline(buf) >= 0) {
+		var t = buf.toString().split(/\s+/);
+		len[t[0]] = parseInt(t[1]);
+	}
+	file.close();
+
+	file = new File(args[0]);
+	var re_cigar = /([MID])(\d+)/g;
+	var lineno = 0;
+	while (file.readline(buf) >= 0) {
+		++lineno;
+		var t = buf.toString().split("\t");
+		var m = /Target=(\S+)\s+(\d+)\s+(\d+)\s+([+-])/.exec(t[8]);
+		if (m == null) {
+			warn("WARNING: skipped line "+lineno+" due to the lack of Target.");
+			continue;
+		}
+		var qname = m[1];
+		var flag = t[6] == m[4]? 0 : 16;
+		var qb = parseInt(m[2]) - 1, qe = parseInt(m[3]), qlen = len[qname];
+		if (qlen == null)
+			throw Error("Sequence "+qname+" is not present in the query-length.txt");
+		var clip5 = qb, clip3 = qlen - qe;
+		if (flag&16) clip5 ^= clip3, clip3 ^= clip5, clip5 ^= clip3; // swap
+
+		m = /Gap\s*=\s*(([MID]\d+\s*)+)/.exec(t[8]);
+		var cigar = clip5? clip5 + 'S' : '';
+		var n_ins = 0, n_del = 0, n_match = 0, NM = null;
+		if (m) {
+			var mc;
+			while ((mc = re_cigar.exec(m[1])) != null) {
+				var l = parseInt(mc[2]);
+				cigar += mc[2] + mc[1];
+				if (mc[1] == 'I') n_ins += l;
+				else if (mc[1] == 'D') n_del += l;
+				else if (mc[1] == 'M') n_match += l;
+			}
+			if (n_ins + n_match != qe - qb || n_del + n_match != parseInt(t[4]) - parseInt(t[3]) + 1)
+				throw Error("Inconsistent CIGAR at line "+lineno);
+		} else { // ungapped alignment
+			var tb = parseInt(t[3]) - 1, te = parseInt(t[4]);
+			if (te - tb != qe - qb) {
+				warn("WARNING: line "+lineno+" should contain gaps, but lacks Gap. Skipped.\n");
+			} else cigar = (qe - qb) + 'M';
+		}
+		if (clip3) cigar += clip3 + 'S';
+		if ((m = /num_mismatch=(\d+)/.exec(t[8])) != null)
+			NM = parseInt(m[1]) + n_ins + n_del;
+		var out = [qname, flag, t[0], t[3], 255, cigar, '*', 0, 0, '*', '*'];
+		if (NM != null) out.push('NM:i:' + NM);
+		print(out.join("\t"));
+	}
+	buf.destroy();
+	file.close();
+}
+
+
+/******************************
+ *** Generate ALT alignment ***
+ ******************************/
+
+function intv_ovlp(intv, bits) // interval index
+{
+	if (typeof bits == "undefined") bits = 13;
+	intv.sort(function(a,b) {return a[0]-b[0];});
+	// create the index
+	var idx = [], max = 0;
+	for (var i = 0; i < intv.length; ++i) {
+		var b = intv[i][0]>>bits;
+		var e = (intv[i][1]-1)>>bits;
+		if (b != e) {
+			for (var j = b; j <= e; ++j)
+				if (idx[j] == null) idx[j] = i;
+		} else if (idx[b] == null) idx[b] = i;
+		max = max > e? max : e;
+	}
+	// closure
+	return function(_b, _e) {
+		var x = _b >> bits;
+		if (x > max) return [];
+		var off = idx[x];
+		if (off == null) {
+			var i;
+			for (i = ((_e - 1) >> bits) - 1; i >= 0; --i)
+				if (idx[i] != null) break;
+			off = i < 0? 0 : idx[i];
+		}
+		var ovlp = [];
+		for (var i = off; i < intv.length && intv[i][0] < _e; ++i)
+			if (intv[i][1] > _b) ovlp.push(intv[i]);
+		return ovlp;
+	}
+}
+
+function bwa_genalt(args)
+{
+	var re_cigar = /(\d+)([MIDSHN])/g;
+
+	function cigar2pos(cigar, pos) // given a pos on ALT and the ALT-to-REF CIGAR, find the pos on REF
+	{
+		var x = 0, y = 0;
+		for (var i = 0; i < cigar.length; ++i) {
+			var op = cigar[i][0], len = cigar[i][1];
+			if (op == 'M') {
+				if (y <= pos && pos < y + len)
+					return x + (pos - y);
+				x += len, y += len;
+			} else if (op == 'D') {
+				x += len;
+			} else if (op == 'I') {
+				if (y <= pos && pos < y + len)
+					return x;
+				y += len;
+			} else if (op == 'S' || op == 'H') {
+				if (y <= pos && pos < y + len)
+					return -1;
+				y += len;
+			}
+		}
+		return -1;
+	}
+
+	function parse_hit(s, opt) // parse a hit. s looks something like ["chr1", "+12345", "100M", 5]
+	{
+		var h = {};
+		h.ctg = s[0];
+		h.start = parseInt(s[1].substr(1)) - 1;
+		h.rev = (s[1].charAt(0) == '-');
+		h.cigar = s[2];
+		h.NM = parseInt(s[3]);
+		var m, l_ins, n_ins, l_del, n_del, l_match, l_skip, l_clip;
+		l_ins = l_del = n_ins = n_del = l_match = l_skip = l_clip = 0;
+		while ((m = re_cigar.exec(h.cigar)) != null) {
+			var l = parseInt(m[1]);
+			if (m[2] == 'M') l_match += l;
+			else if (m[2] == 'D') ++n_del, l_del += l;
+			else if (m[2] == 'I') ++n_ins, l_ins += l;
+			else if (m[2] == 'N') l_skip += l;
+			else if (m[2] == 'H' || m[2] == 'S') l_clip += l;
+		}
+		h.end = h.start + l_match + l_del + l_skip;
+		h.NM = h.NM > l_del + l_ins? h.NM : l_del + l_ins;
+		h.score = Math.floor((opt.a * l_match - (opt.a + opt.b) * (h.NM - l_del - l_ins) - opt.o * (n_del + n_ins) - opt.e * (l_del + l_ins)) / opt.a + .499);
+		h.l_query = l_match + l_ins + l_clip;
+		return h;
+	}
+
+	var c, opt = { a:1, b:4, o:6, e:1, verbose:3 };
+
+	while ((c = getopt(args, 'v:')) != null) {
+		if (c == 'v') opt.verbose = parseInt(getopt.arg);
+	}
+
+	if (args.length == getopt.ind) {
+		print("Usage: k8 bwa-helper.js genalt <alt.sam> [aln.sam]");
+		exit(1);
+	}
+
+	var file, buf = new Bytes();
+	var aux = new Bytes(); // used for reverse and reverse complement
+
+	// read the ALT-to-REF alignment and generate the index
+	var intv = {};
+	file = new File(args[getopt.ind]);
+	while (file.readline(buf) >= 0) {
+		var line = buf.toString();
+		var t = line.split("\t");
+		if (line.charAt(0) == '@') continue;
+		var flag = parseInt(t[1]);
+		var m, cigar = [], l_qaln = 0, l_qclip = 0;
+		while ((m = re_cigar.exec(t[5])) != null) {
+			var l = parseInt(m[1]);
+			cigar.push([m[2] != 'H'? m[2] : 'S', l]); // convert hard clip to soft clip
+			if (m[2] == 'M' || m[2] == 'I') l_qaln += l;
+			else if (m[2] == 'S' || m[2] == 'H') l_qclip += l;
+		}
+		var j = flag&16? cigar.length-1 : 0;
+		var start = cigar[j][0] == 'S'? cigar[j][1] : 0;
+		if (intv[t[0]] == null) intv[t[0]] = [];
+		intv[t[0]].push([start, start + l_qaln, l_qaln + l_qclip, t[2], flag&16? true : false, parseInt(t[3]) - 1, cigar]);
+		//print(start, start + l_qaln, t[2], flag&16? true : false, parseInt(t[3]), cigar);
+	}
+	file.close();
+	// create the interval index
+	var idx = {};
+	for (var ctg in intv)
+		idx[ctg] = intv_ovlp(intv[ctg]);
+
+	// process SAM
+	file = args.length - getopt.ind >= 2? new File(args[getopt.ind+1]) : new File();
+	while (file.readline(buf) >= 0) {
+		var m, line = buf.toString();
+		if (line.charAt(0) == '@' || (m = /\tXA:Z:(\S+)/.exec(line)) == null) { // TODO: this does not work with PE file
+			if (opt.verbose < 4) print(line);
+			continue;
+		}
+
+		// parse hits
+		var hits = [];
+		var XA_strs = m[1].split(";");
+		var NM = (m = /\tNM:i:(\d+)/.exec(line)) == null? '0' : m[1];
+		var t = line.split("\t");
+		var flag = parseInt(t[1]);
+		hits.push(parse_hit([t[2], ((flag&16)?'-':'+') + t[3], t[5], NM], opt));
+		for (var i = 0; i < XA_strs.length; ++i) // hits in the XA tag
+			if (XA_strs[i] != '') // as the last symbol in an XA tag is ";", the last split is an empty string
+				hits.push(parse_hit(XA_strs[i].split(","), opt));
+
+		// lift mapping positions to coordinates on the primary assembly
+		var n_lifted = 0;
+		for (var i = 0; i < hits.length; ++i) {
+			var h = hits[i];
+
+			if (idx[h.ctg] == null) continue;
+			var a = idx[h.ctg](h.start, h.end);
+			if (a == null || a.length == 0) continue;
+
+			// find the approximate position on the primary assembly
+			var lifted = [];
+			for (var j = 0; j < a.length; ++j) {
+				var s, e;
+				if (!a[j][4]) { // ALT is mapped to the forward strand of the primary assembly
+					s = cigar2pos(a[j][6], h.start);
+					e = cigar2pos(a[j][6], h.end - 1) + 1;
+				} else {
+					s = cigar2pos(a[j][6], a[j][2] - h.end);
+					e = cigar2pos(a[j][6], a[j][2] - h.start - 1) + 1;
+				}
+				if (s < 0 || e < 0) continue; // read is mapped to clippings in the ALT-to-chr alignment
+				s += a[j][5]; e += a[j][5];
+				lifted.push([a[j][3], (h.rev!=a[j][4]), s, e]);
+			}
+			if (lifted.length) ++n_lifted, hits[i].lifted = lifted;
+		}
+		if (n_lifted == 0) {
+			if (opt.verbose < 4) print(line);
+			continue;
+		}
+
+		// group hits
+		for (var i = 0; i < hits.length; ++i) { // set keys for sorting
+			if (hits[i].lifted && hits[i].lifted.length) // TODO: only the first element in lifted[] is used
+				hits[i].pctg = hits[i].lifted[0][0], hits[i].pstart = hits[i].lifted[0][2], hits[i].pend = hits[i].lifted[0][3];
+			else hits[i].pctg = hits[i].ctg, hits[i].pstart = hits[i].start, hits[i].pend = hits[i].end;
+			hits[i].i = i; // keep the original index
+		}
+		hits.sort(function(a,b) { return a.pctg != b.pctg? (a.pctg < b.pctg? -1 : 1) : a.pstart - b.pstart });
+		var last_chr = null, end = 0, g = -1;
+		for (var i = 0; i < hits.length; ++i) {
+			if (last_chr != hits[i].pctg) ++g, last_chr = hits[i].pctg, end = 0;
+			else if (hits[i].pstart >= end) ++g;
+			hits[i].g = g;
+			end = end > hits[i].pend? end : hits[i].pend;
+		}
+		var reported_g = null, reported_i = null;
+		for (var i = 0; i < hits.length; ++i)
+			if (hits[i].i == 0)
+				reported_g = hits[i].g, reported_i = i;
+		var n_group0 = 0; // #hits overlapping the reported hit
+		for (var i = 0; i < hits.length; ++i)
+			if (hits[i].g == reported_g)
+				++n_group0;
+		if (n_group0 == 1) { // then keep the reported alignment and mapQ
+			if (opt.verbose < 4) print(line);
+			continue;
+		}
+
+		// re-estimate mapQ
+		var group_max = [];
+		for (var i = 0; i < hits.length; ++i) {
+			var g = hits[i].g;
+			if (group_max[g] == null || group_max[g][0] < hits[i].score)
+				group_max[g] = [hits[i].score, g];
+		}
+		if (group_max.length > 1)
+			group_max.sort(function(x,y) {return y[0]-x[0]});
+		var mapQ;
+		if (group_max[0][1] == reported_g) { // the best hit is the hit reported in SAM
+			mapQ = group_max.length == 1? 60 : 6 * (group_max[0][0] - group_max[1][0]);
+		} else mapQ = 0;
+		mapQ = mapQ < 60? mapQ : 60;
+		var ori_mapQ = parseInt(t[4]);
+		mapQ = mapQ > ori_mapQ? mapQ : ori_mapQ;
+
+		// generate lifted_str
+		for (var i = 0; i < hits.length; ++i) {
+			if (hits[i].lifted && hits[i].lifted.length) {
+				var lifted = hits[i].lifted;
+				var u = '';
+				for (var j = 0; j < lifted.length; ++j)
+					u += lifted[j][0] + "," + lifted[j][2] + "," + lifted[j][3] + "," + (lifted[j][1]?'-':'+') + ";";
+				hits[i].lifted_str = u;
+			}
+		}
+
+		// generate reversed quality and reverse-complemented sequence if necessary
+		var rs = null, rq = null; // reversed quality and reverse complement sequence
+		var need_rev = false;
+		for (var i = 0; i < hits.length; ++i) {
+			if (hits[i].g != reported_g || i == reported_i) continue;
+			if (hits[i].rev != hits[reported_i].rev)
+				need_rev = true;
+		}
+		if (need_rev) { // reverse and reverse complement
+			aux.set(t[9], 0); aux.revcomp(); rs = aux.toString();
+			aux.set(t[10],0); aux.reverse(); rq = aux.toString();
+		}
+
+		// print
+		t[4] = mapQ;
+		t.push("om:i:"+ori_mapQ);
+		if (hits[reported_i].lifted_str) t.push("lt:Z:" + hits[reported_i].lifted_str);
+		print(t.join("\t"));
+		var cnt = 0;
+		for (var i = 0; i < hits.length; ++i) {
+			if (opt.verbose >= 5) print(obj2str(hits[i]));
+			if (hits[i].g != reported_g || i == reported_i) continue;
+			var s = [t[0], flag&0xf10, hits[i].ctg, hits[i].start+1, mapQ, hits[i].cigar, '*', 0, 0];
+			// update name
+			if (flag&0x40) s[0] += "/1";
+			if (flag&0x80) s[0] += "/2";
+			s[0] += "_" + (++cnt);
+			if (hits[i].rev == hits[reported_i].rev) s.push(t[9], t[10]);
+			else s.push(rs, rq);
+			s.push("NM:i:" + hits[i].NM);
+			if (hits[i].lifted_str) s.push("lt:Z:" + hits[i].lifted_str);
+			print(s.join("\t"));
+		}
+	}
+	file.close();
+
+	aux.destroy();
+	buf.destroy();
+}
+
+/*********************
+ *** Main function ***
+ *********************/
+
+function main(args)
+{
+	if (args.length == 0) {
+		print("\nUsage:    k8 bwa-helper.js <command> [arguments]\n");
+		print("Commands: genalt       generate ALT alignments");
+		print("          sam2pas      convert SAM to pairwise alignment summary format (PAS)");
+		print("          pas2reg      extract covered regions");
+		print("          reg2cut      regions to extract for the 2nd round bwa-mem");
+		print("          markovlp     identify bi-directional overlaps");
+		print("          gff2sam      convert GFF3 alignment to SAM");
+		print("          shortname    shorten sequence name after subseq (PacBio read names only)");
+		print("");
+		exit(1);
+	}
+
+	var cmd = args.shift();
+	if (cmd == 'sam2pas') bwa_sam2pas(args);
+	else if (cmd == 'gff2sam') bwa_gff2sam(args);
+	else if (cmd == 'markovlp') bwa_markOvlp(args);
+	else if (cmd == 'pas2reg') bwa_pas2reg(args);
+	else if (cmd == 'reg2cut') bwa_reg2cut(args);
+	else if (cmd == 'genalt') bwa_genalt(args);
+	else if (cmd == 'shortname') bwa_shortname(args);
+	else warn("Unrecognized command");
+}
+
+main(arguments);
diff --git a/bwa.1 b/bwa.1
index b6354e5..ba8bc9e 100644
--- a/bwa.1
+++ b/bwa.1
@@ -1,4 +1,4 @@
-.TH bwa 1 "31 March 2014" "bwa-0.7.8" "Bioinformatics tools"
+.TH bwa 1 "19 May 2014" "bwa-0.7.9-r783" "Bioinformatics tools"
 .SH NAME
 .PP
 bwa - Burrows-Wheeler Alignment Tool
@@ -191,7 +191,7 @@ yields fewer seeds, which leads to faster alignment speed but lower accuracy. [1
 .BI -c \ INT
 Discard a MEM if it has more than
 .I INT
-occurence in the genome. This is an insensitive parameter. [10000]
+occurence in the genome. This is an insensitive parameter. [500]
 .TP
 .B -P
 In the paired-end mode, perform SW to rescue missing hits only but do not try to find
@@ -248,6 +248,11 @@ Don't output alignment with score lower than
 .IR INT .
 This option affects output and occasionally SAM flag 2. [30]
 .TP
+.BI -h \ INT
+If a query has not more than
+.I INT
+hits with score higher than 80% of the best hit, output them all in the XA tag [5]
+.TP
 .B -a
 Output all found alignments for single-end or unpaired paired-end reads. These
 alignments will be flagged as secondary alignments.
@@ -258,6 +263,11 @@ transfer read meta information (e.g. barcode) to the SAM output. Note that the
 FASTA/Q comment (the string after a space in the header line) must conform the SAM
 spec (e.g. BC:Z:CGTAC). Malformated comments lead to incorrect SAM output.
 .TP
+.B -Y
+Use soft clipping CIGAR operation for supplementary alignments. By default, BWA-MEM
+uses soft clipping for the primary alignment and hard clipping for
+supplementary alignments.
+.TP
 .B -M
 Mark shorter split hits as secondary (for Picard compatibility).
 .TP
diff --git a/bwa.c b/bwa.c
index 0e9e606..43d8a58 100644
--- a/bwa.c
+++ b/bwa.c
@@ -95,7 +95,8 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins,
 	kstring_t str;
 	const char *int2base;
 
-	*n_cigar = 0; *NM = -1;
+	if (n_cigar) *n_cigar = 0;
+	if (NM) *NM = -1;
 	if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand
 	rseq = bns_get_seq(l_pac, pac, rb, re, &rlen);
 	if (re - rb != rlen) goto ret_gen_cigar; // possible if out of range
@@ -106,10 +107,12 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins,
 			tmp = rseq[i], rseq[i] = rseq[rlen - 1 - i], rseq[rlen - 1 - i] = tmp;
 	}
 	if (l_query == re - rb && w_ == 0) { // no gap; no need to do DP
-		// FIXME: due to an issue in mem_reg2aln(), we never come to this block. This does not affect accuracy, but it hurts performance.
-		cigar = malloc(4);
-		cigar[0] = l_query<<4 | 0;
-		*n_cigar = 1;
+		// UPDATE: we come to this block now... FIXME: due to an issue in mem_reg2aln(), we never come to this block. This does not affect accuracy, but it hurts performance.
+		if (n_cigar) {
+			cigar = malloc(4);
+			cigar[0] = l_query<<4 | 0;
+			*n_cigar = 1;
+		}
 		for (i = 0, *score = 0; i < l_query; ++i)
 			*score += mat[rseq[i]*5 + query[i]];
 	} else {
@@ -131,7 +134,7 @@ uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins,
 		}
 		*score = ksw_global2(l_query, query, rlen, rseq, 5, mat, o_del, e_del, o_ins, e_ins, w, n_cigar, &cigar);
 	}
-	{// compute NM and MD
+	if (NM && n_cigar) {// compute NM and MD
 		int k, x, y, u, n_mm = 0, n_gap = 0;
 		str.l = str.m = *n_cigar * 4; str.s = (char*)cigar; // append MD to CIGAR
 		int2base = rb < l_pac? "ACGTN" : "TGCAN";
@@ -176,56 +179,6 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
 	return bwa_gen_cigar2(mat, q, r, q, r, w_, l_pac, pac, l_query, query, rb, re, score, n_cigar, NM);
 }
 
-int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re)
-{
-	int is_rev;
-	int64_t cb, ce, fm;
-	bntann1_t *ra;
-	if (*rb < bns->l_pac && *re > bns->l_pac) { // cross the for-rev boundary; actually with BWA-MEM, we should never come to here
-		*qb = *qe = *rb = *re = -1;
-		return -1; // unable to fix
-	}
-	fm = bns_depos(bns, (*rb + *re) >> 1, &is_rev); // coordinate of the middle point on the forward strand
-	ra = &bns->anns[bns_pos2rid(bns, fm)]; // annotation of chr corresponding to the middle point
-	cb = is_rev? (bns->l_pac<<1) - (ra->offset + ra->len) : ra->offset; // chr start on the mapping strand
-	ce = cb + ra->len; // chr end
-	if (cb > *rb || ce < *re) { // fix is needed
-		int i, score, n_cigar, y, NM;
-		uint32_t *cigar;
-		int64_t x;
-		cb = cb > *rb? cb : *rb;
-		ce = ce < *re? ce : *re;
-		cigar = bwa_gen_cigar2(mat, o_del, e_del, o_ins, e_ins, w, bns->l_pac, pac, *qe - *qb, query + *qb, *rb, *re, &score, &n_cigar, &NM);
-		for (i = 0, x = *rb, y = *qb; i < n_cigar; ++i) {
-			int op = cigar[i]&0xf, len = cigar[i]>>4;
-			if (op == 0) {
-				if (x <= cb && cb < x + len)
-					*qb = y + (cb - x), *rb = cb;
-				if (x < ce && ce <= x + len) {
-					*qe = y + (ce - x), *re = ce;
-					break;
-				} else x += len, y += len;
-			} else if (op == 1) {
-				y += len;
-			} else if (op == 2) {
-				if (x <= cb && cb < x + len)
-					*qb = y, *rb = x + len;
-				if (x < ce && ce <= x + len) {
-					*qe = y, *re = x;
-					break;
-				} else x += len;
-			} else abort(); // should not be here
-		}
-		free(cigar);
-	}
-	return (*qb == *qe || *rb == *re)? -2 : 0;
-}
-
-int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re)
-{
-	return bwa_fix_xref2(mat, q, r, q, r, w, bns, pac, query, qb, qe, rb, re);
-}
-
 /*********************
  * Full index reader *
  *********************/
diff --git a/bwa.h b/bwa.h
index 8d46e58..bbc2525 100644
--- a/bwa.h
+++ b/bwa.h
@@ -33,8 +33,6 @@ extern "C" {
 	void bwa_fill_scmat(int a, int b, int8_t mat[25]);
 	uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
 	uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
-	int bwa_fix_xref(const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re);
-	int bwa_fix_xref2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int *qb, int *qe, int64_t *rb, int64_t *re);
 
 	char *bwa_idx_infer_prefix(const char *hint);
 	bwt_t *bwa_idx_load_bwt(const char *hint);
diff --git a/bwamem.c b/bwamem.c
index 33b86ad..76871e0 100644
--- a/bwamem.c
+++ b/bwamem.c
@@ -42,6 +42,8 @@
  * When there are gaps, l should be the length of alignment matches (i.e. the M operator in CIGAR)
  */
 
+static const bntseq_t *global_bns = 0; // for debugging only
+
 mem_opt_t *mem_opt_init()
 {
 	mem_opt_t *o;
@@ -57,120 +59,100 @@ mem_opt_t *mem_opt_init()
 	o->pen_clip5 = o->pen_clip3 = 5;
 	o->min_seed_len = 19;
 	o->split_width = 10;
-	o->max_occ = 10000;
+	o->max_occ = 500;
 	o->max_chain_gap = 10000;
 	o->max_ins = 10000;
 	o->mask_level = 0.50;
-	o->chain_drop_ratio = 0.50;
+	o->drop_ratio = 0.50;
+	o->XA_drop_ratio = 0.80;
 	o->split_factor = 1.5;
 	o->chunk_size = 10000000;
 	o->n_threads = 1;
-	o->max_matesw = 100;
+	o->max_hits = 5;
+	o->max_matesw = 50;
 	o->mask_level_redun = 0.95;
+	o->min_chain_weight = 0;
+	o->max_chain_extend = 1<<30;
 	o->mapQ_coef_len = 50; o->mapQ_coef_fac = log(o->mapQ_coef_len);
-//	o->mapQ_coef_len = o->mapQ_coef_fac = 0;
 	bwa_fill_scmat(o->a, o->b, o->mat);
 	return o;
 }
 
 /***************************
- * SMEM iterator interface *
+ * Collection SA invervals *
  ***************************/
 
-struct __smem_i {
-	const bwt_t *bwt;
-	const uint8_t *query;
-	int start, len;
-	bwtintv_v *matches; // matches; to be returned by smem_next()
-	bwtintv_v *sub;     // sub-matches inside the longest match; temporary
-	bwtintv_v *tmpvec[2]; // temporary arrays
-};
-
-smem_i *smem_itr_init(const bwt_t *bwt)
-{
-	smem_i *itr;
-	itr = calloc(1, sizeof(smem_i));
-	itr->bwt = bwt;
-	itr->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
-	itr->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
-	itr->matches   = calloc(1, sizeof(bwtintv_v));
-	itr->sub       = calloc(1, sizeof(bwtintv_v));
-	return itr;
-}
+#define intv_lt(a, b) ((a).info < (b).info)
+KSORT_INIT(mem_intv, bwtintv_t, intv_lt)
+
+typedef struct {
+	bwtintv_v mem, mem1, *tmpv[2];
+} smem_aux_t;
 
-void smem_itr_destroy(smem_i *itr)
+static smem_aux_t *smem_aux_init()
 {
-	free(itr->tmpvec[0]->a); free(itr->tmpvec[0]);
-	free(itr->tmpvec[1]->a); free(itr->tmpvec[1]);
-	free(itr->matches->a);   free(itr->matches);
-	free(itr->sub->a);       free(itr->sub);
-	free(itr);
+	smem_aux_t *a;
+	a = calloc(1, sizeof(smem_aux_t));
+	a->tmpv[0] = calloc(1, sizeof(bwtintv_v));
+	a->tmpv[1] = calloc(1, sizeof(bwtintv_v));
+	return a;
 }
 
-void smem_set_query(smem_i *itr, int len, const uint8_t *query)
-{
-	itr->query = query;
-	itr->start = 0;
-	itr->len = len;
+static void smem_aux_destroy(smem_aux_t *a)
+{	
+	free(a->tmpv[0]->a); free(a->tmpv[0]);
+	free(a->tmpv[1]->a); free(a->tmpv[1]);
+	free(a->mem.a); free(a->mem1.a);
+	free(a);
 }
 
-const bwtintv_v *smem_next2(smem_i *itr, int split_len, int split_width, int start_width)
+static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq, smem_aux_t *a)
 {
-	int i, max, max_i, ori_start;
-	itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = itr->sub->n = 0;
-	if (itr->start >= itr->len || itr->start < 0) return 0;
-	while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
-	if (itr->start == itr->len) return 0;
-	ori_start = itr->start;
-	itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, ori_start, start_width, itr->matches, itr->tmpvec); // search for SMEM
-	if (itr->matches->n == 0) return itr->matches; // well, in theory, we should never come here
-	for (i = max = 0, max_i = 0; i < itr->matches->n; ++i) { // look for the longest match
-		bwtintv_t *p = &itr->matches->a[i];
-		int len = (uint32_t)p->info - (p->info>>32);
-		if (max < len) max = len, max_i = i;
+	int i, k, x = 0, old_n;
+	int start_width = (opt->flag & MEM_F_SELF_OVLP)? 2 : 1;
+	int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
+	a->mem.n = 0;
+	// first pass: find all SMEMs
+	while (x < len) {
+		if (seq[x] < 4) {
+			x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
+			for (i = 0; i < a->mem1.n; ++i) {
+				bwtintv_t *p = &a->mem1.a[i];
+				int slen = (uint32_t)p->info - (p->info>>32); // seed length
+				if (slen >= opt->min_seed_len)
+					kv_push(bwtintv_t, a->mem, *p);
+			}
+		} else ++x;
 	}
-	if (split_len > 0 && max >= split_len && itr->matches->a[max_i].x[2] <= split_width) { // if the longest SMEM is unique and long
-		int j;
-		bwtintv_v *a = itr->tmpvec[0]; // reuse tmpvec[0] for merging
-		bwtintv_t *p = &itr->matches->a[max_i];
-		bwt_smem1(itr->bwt, itr->len, itr->query, ((uint32_t)p->info + (p->info>>32))>>1, itr->matches->a[max_i].x[2]+1, itr->sub, itr->tmpvec); // starting from the middle of the longest MEM
-		i = j = 0; a->n = 0;
-		while (i < itr->matches->n && j < itr->sub->n) { // ordered merge
-			int64_t xi = itr->matches->a[i].info>>32<<32 | (itr->len - (uint32_t)itr->matches->a[i].info);
-			int64_t xj = itr->sub->a[j].info>>32<<32 | (itr->len - (uint32_t)itr->sub->a[j].info);
-			if (xi < xj) {
-				kv_push(bwtintv_t, *a, itr->matches->a[i]);
-				++i;
-			} else if ((uint32_t)itr->sub->a[j].info - (itr->sub->a[j].info>>32) >= max>>1 && (uint32_t)itr->sub->a[j].info > ori_start) {
-				kv_push(bwtintv_t, *a, itr->sub->a[j]);
-				++j;
-			} else ++j;
-		}
-		for (; i < itr->matches->n; ++i) kv_push(bwtintv_t, *a, itr->matches->a[i]);
-		for (; j < itr->sub->n; ++j)
-			if ((uint32_t)itr->sub->a[j].info - (itr->sub->a[j].info>>32) >= max>>1 && (uint32_t)itr->sub->a[j].info > ori_start)
-				kv_push(bwtintv_t, *a, itr->sub->a[j]);
-		kv_copy(bwtintv_t, *itr->matches, *a);
+	// second pass: find MEMs inside a long SMEM
+	old_n = a->mem.n;
+	for (k = 0; k < old_n; ++k) {
+		bwtintv_t *p = &a->mem.a[k];
+		int start = p->info>>32, end = (int32_t)p->info;
+		if (end - start < split_len || p->x[2] > opt->split_width) continue;
+		bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv);
+		for (i = 0; i < a->mem1.n; ++i)
+			if ((a->mem1.a[i].info>>32) - (uint32_t)a->mem1.a[i].info >= opt->min_seed_len)
+				kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
 	}
-	return itr->matches;
+	// sort
+	ks_introsort(mem_intv, a->mem.n, a->mem.a);
 }
 
-const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
-{
-	return smem_next2(itr, split_len, split_width, 1);
-}
-
-/********************************
- * Chaining while finding SMEMs *
- ********************************/
+/************
+ * Chaining *
+ ************/
 
 typedef struct {
 	int64_t rbeg;
 	int32_t qbeg, len;
-} mem_seed_t;
+	int score;
+} mem_seed_t; // unaligned memory
 
 typedef struct {
-	int n, m;
+	int n, m, first, rid;
+	uint32_t w:30, kept:2;
+	float frac_rep;
 	int64_t pos;
 	mem_seed_t *seeds;
 } mem_chain_t;
@@ -182,12 +164,14 @@ typedef struct { size_t n, m; mem_chain_t *a;  } mem_chain_v;
 #define chain_cmp(a, b) (((b).pos < (a).pos) - ((a).pos < (b).pos))
 KBTREE_INIT(chn, mem_chain_t, chain_cmp)
 
-static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p)
+// return 1 if the seed is merged into the chain
+static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, const mem_seed_t *p, int seed_rid)
 {
 	int64_t qend, rend, x, y;
 	const mem_seed_t *last = &c->seeds[c->n-1];
 	qend = last->qbeg + last->len;
 	rend = last->rbeg + last->len;
+	if (seed_rid != c->rid) return 0; // different chr; request a new chain
 	if (p->qbeg >= c->seeds[0].qbeg && p->qbeg + p->len <= qend && p->rbeg >= c->seeds[0].rbeg && p->rbeg + p->len <= rend)
 		return 1; // contained seed; do nothing
 	if ((last->rbeg < l_pac || c->seeds[0].rbeg < l_pac) && p->rbeg >= l_pac) return 0; // don't chain if on different strand
@@ -204,43 +188,6 @@ static int test_and_merge(const mem_opt_t *opt, int64_t l_pac, mem_chain_t *c, c
 	return 0; // request to add a new chain
 }
 
-static void mem_insert_seed(const mem_opt_t *opt, int64_t l_pac, kbtree_t(chn) *tree, smem_i *itr)
-{
-	const bwtintv_v *a;
-	int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
-	int start_width = (opt->flag & MEM_F_NO_EXACT)? 2 : 1;
-	split_len = split_len < itr->len? split_len : itr->len;
-	while ((a = smem_next2(itr, split_len, opt->split_width, start_width)) != 0) { // to find all SMEM and some internal MEM
-		int i;
-		for (i = 0; i < a->n; ++i) { // go through each SMEM/MEM up to itr->start
-			bwtintv_t *p = &a->a[i];
-			int slen = (uint32_t)p->info - (p->info>>32); // seed length
-			int64_t k;
-			if (slen < opt->min_seed_len || p->x[2] > opt->max_occ) continue; // ignore if too short or too repetitive
-			for (k = 0; k < p->x[2]; ++k) {
-				mem_chain_t tmp, *lower, *upper;
-				mem_seed_t s;
-				int to_add = 0;
-				s.rbeg = tmp.pos = bwt_sa(itr->bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
-				s.qbeg = p->info>>32;
-				s.len  = slen;
-				if (bwa_verbose >= 5) printf("* Found SEED: length=%d,query_beg=%d,ref_beg=%ld\n", s.len, s.qbeg, (long)s.rbeg);
-				if (s.rbeg < l_pac && l_pac < s.rbeg + s.len) continue; // bridging forward-reverse boundary; skip
-				if (kb_size(tree)) {
-					kb_intervalp(chn, tree, &tmp, &lower, &upper); // find the closest chain
-					if (!lower || !test_and_merge(opt, l_pac, lower, &s)) to_add = 1;
-				} else to_add = 1;
-				if (to_add) { // add the seed as a new chain
-					tmp.n = 1; tmp.m = 4;
-					tmp.seeds = calloc(tmp.m, sizeof(mem_seed_t));
-					tmp.seeds[0] = s;
-					kb_putp(chn, tree, &tmp);
-				}
-			}
-		}
-	}
-}
-
 int mem_chain_weight(const mem_chain_t *c)
 {
 	int64_t end;
@@ -251,14 +198,15 @@ int mem_chain_weight(const mem_chain_t *c)
 		else if (s->qbeg + s->len > end) w += s->qbeg + s->len - end;
 		end = end > s->qbeg + s->len? end : s->qbeg + s->len;
 	}
-	tmp = w;
+	tmp = w; w = 0;
 	for (j = 0, end = 0; j < c->n; ++j) {
 		const mem_seed_t *s = &c->seeds[j];
 		if (s->rbeg >= end) w += s->len;
 		else if (s->rbeg + s->len > end) w += s->rbeg + s->len - end;
-		end = end > s->qbeg + s->len? end : s->qbeg + s->len;
+		end = end > s->rbeg + s->len? end : s->rbeg + s->len;
 	}
-	return w < tmp? w : tmp;
+	w = w < tmp? w : tmp;
+	return w < 1<<30? w : (1<<30)-1;
 }
 
 void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn)
@@ -269,28 +217,66 @@ void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn)
 		err_printf("* Found CHAIN(%d): n=%d; weight=%d", i, p->n, mem_chain_weight(p));
 		for (j = 0; j < p->n; ++j) {
 			bwtint_t pos;
-			int is_rev, ref_id;
+			int is_rev;
 			pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev);
 			if (is_rev) pos -= p->seeds[j].len - 1;
-			bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id);
-			err_printf("\t%d;%d,%ld(%s:%c%ld)", p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
+			err_printf("\t%d;%d;%d,%ld(%s:%c%ld)", p->seeds[j].score, p->seeds[j].len, p->seeds[j].qbeg, (long)p->seeds[j].rbeg, bns->anns[p->rid].name, "+-"[is_rev], (long)(pos - bns->anns[p->rid].offset) + 1);
 		}
 		err_putchar('\n');
 	}
 }
 
-mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int64_t l_pac, int len, const uint8_t *seq)
+mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, int len, const uint8_t *seq, void *buf)
 {
+	int i, b, e, l_rep;
+	int64_t l_pac = bns->l_pac;
 	mem_chain_v chain;
-	smem_i *itr;
 	kbtree_t(chn) *tree;
+	smem_aux_t *aux;
 
 	kv_init(chain);
 	if (len < opt->min_seed_len) return chain; // if the query is shorter than the seed length, no match
 	tree = kb_init(chn, KB_DEFAULT_SIZE);
-	itr = smem_itr_init(bwt);
-	smem_set_query(itr, len, seq);
-	mem_insert_seed(opt, l_pac, tree, itr);
+
+	aux = buf? (smem_aux_t*)buf : smem_aux_init();
+	mem_collect_intv(opt, bwt, len, seq, aux);
+	for (i = 0, b = e = l_rep = 0; i < aux->mem.n; ++i) { // compute frac_rep
+		bwtintv_t *p = &aux->mem.a[i];
+		int sb = (p->info>>32), se = (uint32_t)p->info;
+		if (p->x[2] <= opt->max_occ) continue;
+		if (sb > e) l_rep += e - b, b = sb, e = se;
+		else e = e > se? e : se;
+	}
+	l_rep += e - b;
+	for (i = 0; i < aux->mem.n; ++i) {
+		bwtintv_t *p = &aux->mem.a[i];
+		int step, count, slen = (uint32_t)p->info - (p->info>>32); // seed length
+		int64_t k;
+		if (slen < opt->min_seed_len) continue; // ignore if too short or too repetitive
+		step = p->x[2] > opt->max_occ? p->x[2] / opt->max_occ : 1;
+		for (k = count = 0; k < p->x[2] && count < opt->max_occ; k += step, ++count) {
+			mem_chain_t tmp, *lower, *upper;
+			mem_seed_t s;
+			int rid, to_add = 0;
+			s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference
+			s.qbeg = p->info>>32;
+			s.score= s.len = slen;
+			rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len);
+			if (rid < 0) continue; // bridging multiple reference sequences or the forward-reverse boundary
+			if (kb_size(tree)) {
+				kb_intervalp(chn, tree, &tmp, &lower, &upper); // find the closest chain
+				if (!lower || !test_and_merge(opt, l_pac, lower, &s, rid)) to_add = 1;
+			} else to_add = 1;
+			if (to_add) { // add the seed as a new chain
+				tmp.n = 1; tmp.m = 4;
+				tmp.seeds = calloc(tmp.m, sizeof(mem_seed_t));
+				tmp.seeds[0] = s;
+				tmp.rid = rid;
+				kb_putp(chn, tree, &tmp);
+			}
+		}
+	}
+	if (buf == 0) smem_aux_destroy(aux);
 
 	kv_resize(mem_chain_t, chain, kb_size(tree));
 
@@ -298,7 +284,9 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int64_t l_pac, int
 	__kb_traverse(mem_chain_t, tree, traverse_func);
 	#undef traverse_func
 
-	smem_itr_destroy(itr);
+	for (i = 0; i < chain.n; ++i) chain.a[i].frac_rep = (float)l_rep / len;
+	if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len);
+
 	kb_destroy(chn, tree);
 	return chain;
 }
@@ -307,75 +295,70 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int64_t l_pac, int
  * Filtering chains *
  ********************/
 
-typedef struct {
-	int beg, end, w;
-	void *p, *p2;
-} flt_aux_t;
+#define chn_beg(ch) ((ch).seeds->qbeg)
+#define chn_end(ch) ((ch).seeds[(ch).n-1].qbeg + (ch).seeds[(ch).n-1].len)
 
 #define flt_lt(a, b) ((a).w > (b).w)
-KSORT_INIT(mem_flt, flt_aux_t, flt_lt)
+KSORT_INIT(mem_flt, mem_chain_t, flt_lt)
 
-int mem_chain_flt(const mem_opt_t *opt, int n_chn, mem_chain_t *chains)
+int mem_chain_flt(const mem_opt_t *opt, int n_chn, mem_chain_t *a)
 {
-	flt_aux_t *a;
-	int i, j, n;
-	if (n_chn <= 1) return n_chn; // no need to filter
-	a = malloc(sizeof(flt_aux_t) * n_chn);
-	for (i = 0; i < n_chn; ++i) {
-		mem_chain_t *c = &chains[i];
-		int w;
-		w = mem_chain_weight(c);
-		a[i].beg = c->seeds[0].qbeg;
-		a[i].end = c->seeds[c->n-1].qbeg + c->seeds[c->n-1].len;
-		a[i].w = w; a[i].p = c; a[i].p2 = 0;
+	int i, k;
+	kvec_t(int) chains = {0,0,0}; // this keeps int indices of the non-overlapping chains
+	if (n_chn == 0) return 0; // no need to filter
+	// compute the weight of each chain and drop chains with small weight
+	for (i = k = 0; i < n_chn; ++i) {
+		mem_chain_t *c = &a[i];
+		c->first = -1; c->kept = 0;
+		c->w = mem_chain_weight(c);
+		if (c->w < opt->min_chain_weight) free(c->seeds);
+		else a[k++] = *c;
 	}
+	n_chn = k;
 	ks_introsort(mem_flt, n_chn, a);
-	{ // reorder chains such that the best chain appears first
-		mem_chain_t *swap;
-		swap = malloc(sizeof(mem_chain_t) * n_chn);
-		for (i = 0; i < n_chn; ++i) {
-			swap[i] = *((mem_chain_t*)a[i].p);
-			a[i].p = &chains[i]; // as we will memcpy() below, a[i].p is changed
-		}
-		memcpy(chains, swap, sizeof(mem_chain_t) * n_chn);
-		free(swap);
-	}
-	for (i = 1, n = 1; i < n_chn; ++i) {
-		for (j = 0; j < n; ++j) {
-			int b_max = a[j].beg > a[i].beg? a[j].beg : a[i].beg;
-			int e_min = a[j].end < a[i].end? a[j].end : a[i].end;
+	// pairwise chain comparisons
+	a[0].kept = 3;
+	kv_push(int, chains, 0);
+	for (i = 1; i < n_chn; ++i) {
+		int large_ovlp = 0;
+		for (k = 0; k < chains.n; ++k) {
+			int j = chains.a[k];
+			int b_max = chn_beg(a[j]) > chn_beg(a[i])? chn_beg(a[j]) : chn_beg(a[i]);
+			int e_min = chn_end(a[j]) < chn_end(a[i])? chn_end(a[j]) : chn_end(a[i]);
 			if (e_min > b_max) { // have overlap
-				int min_l = a[i].end - a[i].beg < a[j].end - a[j].beg? a[i].end - a[i].beg : a[j].end - a[j].beg;
-				if (e_min - b_max >= min_l * opt->mask_level) { // significant overlap
-					if (a[j].p2 == 0) a[j].p2 = a[i].p;
-					if (a[i].w < a[j].w * opt->chain_drop_ratio && a[j].w - a[i].w >= opt->min_seed_len<<1)
+				int li = chn_end(a[i]) - chn_beg(a[i]);
+				int lj = chn_end(a[j]) - chn_beg(a[j]);
+				int min_l = li < lj? li : lj;
+				if (e_min - b_max >= min_l * opt->mask_level && min_l < opt->max_chain_gap) { // significant overlap
+					large_ovlp = 1;
+					if (a[j].first < 0) a[j].first = i; // keep the first shadowed hit s.t. mapq can be more accurate
+					if (a[i].w < a[j].w * opt->drop_ratio && a[j].w - a[i].w >= opt->min_seed_len<<1)
 						break;
 				}
 			}
 		}
-		if (j == n) a[n++] = a[i]; // if have no significant overlap with better chains, keep it.
+		if (k == chains.n) {
+			kv_push(int, chains, i);
+			a[i].kept = large_ovlp? 2 : 3;
+		}
 	}
-	for (i = 0; i < n; ++i) { // mark chains to be kept
-		mem_chain_t *c = (mem_chain_t*)a[i].p;
-		if (c->n > 0) c->n = -c->n;
-		c = (mem_chain_t*)a[i].p2;
-		if (c && c->n > 0) c->n = -c->n;
+	for (i = 0; i < chains.n; ++i) {
+		mem_chain_t *c = &a[chains.a[i]];
+		if (c->first >= 0) a[c->first].kept = 1;
 	}
-	free(a);
-	for (i = 0; i < n_chn; ++i) { // free discarded chains
-		mem_chain_t *c = &chains[i];
-		if (c->n >= 0) {
-			free(c->seeds);
-			c->n = c->m = 0;
-		} else c->n = -c->n;
+	free(chains.a);
+	for (i = k = 0; i < n_chn; ++i) { // don't extend more than opt->max_chain_extend .kept=1/2 chains
+		if (a[i].kept == 0 || a[i].kept == 3) continue;
+		if (++k >= opt->max_chain_extend) break;
 	}
-	for (i = n = 0; i < n_chn; ++i) { // squeeze out discarded chains
-		if (chains[i].n > 0) {
-			if (n != i) chains[n++] = chains[i];
-			else ++n;
-		}
+	for (; i < n_chn; ++i)
+		if (a[i].kept < 3) a[i].kept = 0;
+	for (i = k = 0; i < n_chn; ++i) { // free discarded chains
+		mem_chain_t *c = &a[i];
+		if (c->kept == 0) free(c->seeds);
+		else a[k++] = a[i];
 	}
-	return n;
+	return k;
 }
 
 /******************************
@@ -391,27 +374,71 @@ KSORT_INIT(mem_ars, mem_alnreg_t, alnreg_slt)
 #define alnreg_hlt(a, b) ((a).score > (b).score || ((a).score == (b).score && (a).hash < (b).hash))
 KSORT_INIT(mem_ars_hash, mem_alnreg_t, alnreg_hlt)
 
-int mem_sort_and_dedup(int n, mem_alnreg_t *a, float mask_level_redun)
+#define PATCH_MAX_R_BW 0.05f
+#define PATCH_MIN_SC_RATIO 0.90f
+
+int mem_patch_reg(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, const mem_alnreg_t *a, const mem_alnreg_t *b, int *_w)
+{
+	int w, score, q_s, r_s;
+	double r;
+	if (bns == 0 || pac == 0 || query == 0) return 0;
+	assert(a->rid == b->rid && a->rb <= b->rb);
+	if (a->qb >= b->qb || a->qe >= b->qe || a->re >= b->re) return 0; // not colinear
+	w = (a->re - b->rb) - (a->qe - b->qb); // required bandwidth
+	w = w > 0? w : -w; // l = abs(l)
+	r = (double)(a->re - b->rb) / (b->re - a->rb) - (double)(a->qe - b->qb) / (b->qe - a->qb); // relative bandwidth
+	r = r > 0.? r : -r; // r = fabs(r)
+	if (bwa_verbose >= 4)
+		printf("* potential hit merge between [%d,%d)<=>[%ld,%ld) and [%d,%d)<=>[%ld,%ld), @ %s; w=%d, r=%.4g\n",
+			   a->qb, a->qe, (long)a->rb, (long)a->re, b->qb, b->qe, (long)b->rb, (long)b->re, bns->anns[a->rid].name, w, r);
+	if (a->re < b->rb || a->qe < b->qb) { // no overlap on query or on ref
+		if (w > opt->w<<1 || r >= PATCH_MAX_R_BW) return 0; // the bandwidth or the relative bandwidth is too large
+	} else if (w > opt->w<<2 || r >= PATCH_MAX_R_BW*2) return 0; // more permissive if overlapping on both ref and query
+	// global alignment
+	w += a->w + b->w;
+	w = w < opt->w<<2? w : opt->w<<2;
+	if (bwa_verbose >= 4) printf("* test potential hit merge with global alignment; w=%d\n", w);
+	bwa_gen_cigar2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, w, bns->l_pac, pac, b->qe - a->qb, query + a->qb, a->rb, b->re, &score, 0, 0);
+	q_s = (int)((double)(b->qe - a->qb) / ((b->qe - b->qb) + (a->qe - a->qb)) * (b->score + a->score) + .499); // predicted score from query
+	r_s = (int)((double)(b->re - a->rb) / ((b->re - b->rb) + (a->re - a->rb)) * (b->score + a->score) + .499); // predicted score from ref
+	if (bwa_verbose >= 4) printf("* score=%d;(%d,%d)\n", score, q_s, r_s);
+	if ((double)score / (q_s > r_s? q_s : r_s) < PATCH_MIN_SC_RATIO) return 0;
+	*_w = w;
+	return score;
+}
+
+int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int n, mem_alnreg_t *a)
 {
 	int m, i, j;
 	if (n <= 1) return n;
-	ks_introsort(mem_ars2, n, a);
+	ks_introsort(mem_ars2, n, a); // sort by the END position, not START!
+	for (i = 0; i < n; ++i) a[i].n_comp = 1;
 	for (i = 1; i < n; ++i) {
 		mem_alnreg_t *p = &a[i];
-		if (p->rb >= a[i-1].re) continue;
-		for (j = i - 1; j >= 0 && p->rb < a[j].re; --j) {
+		if (p->rid != a[i-1].rid || p->rb >= a[i-1].re + opt->max_chain_gap) continue; // then no need to go into the loop below
+		for (j = i - 1; j >= 0 && p->rid == a[j].rid && p->rb < a[j].re + opt->max_chain_gap; --j) {
 			mem_alnreg_t *q = &a[j];
 			int64_t or, oq, mr, mq;
+			int score, w;
 			if (q->qe == q->qb) continue; // a[j] has been excluded
 			or = q->re - p->rb; // overlap length on the reference
 			oq = q->qb < p->qb? q->qe - p->qb : p->qe - q->qb; // overlap length on the query
 			mr = q->re - q->rb < p->re - p->rb? q->re - q->rb : p->re - p->rb; // min ref len in alignment
 			mq = q->qe - q->qb < p->qe - p->qb? q->qe - q->qb : p->qe - p->qb; // min qry len in alignment
-			if (or > mask_level_redun * mr && oq > mask_level_redun * mq) { // one of the hits is redundant
+			if (or > opt->mask_level_redun * mr && oq > opt->mask_level_redun * mq) { // one of the hits is redundant
 				if (p->score < q->score) {
 					p->qe = p->qb;
 					break;
 				} else q->qe = q->qb;
+			} else if (q->rb < p->rb && (score = mem_patch_reg(opt, bns, pac, query, q, p, &w)) > 0) { // then merge q into p
+				p->n_comp += q->n_comp + 1;
+				p->seedcov = p->seedcov > q->seedcov? p->seedcov : q->seedcov;
+				p->sub = p->sub > q->sub? p->sub : q->sub;
+				p->csub = p->csub > q->csub? p->csub : q->csub;
+				p->qb = q->qb, p->rb = q->rb;
+				p->truesc = p->score = score;
+				p->w = w;
+				q->qb = q->qe;
 			}
 		}
 	}
@@ -436,12 +463,12 @@ int mem_sort_and_dedup(int n, mem_alnreg_t *a, float mask_level_redun)
 
 int mem_test_and_remove_exact(const mem_opt_t *opt, int n, mem_alnreg_t *a, int qlen)
 {
-	if (!(opt->flag & MEM_F_NO_EXACT) || n == 0 || a->truesc != qlen * opt->a) return n;
+	if (!(opt->flag & MEM_F_SELF_OVLP) || n == 0 || a->truesc != qlen * opt->a) return n;
 	memmove(a, a + 1, (n - 1) * sizeof(mem_alnreg_t));
 	return n - 1;
 }
 
-void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id) // IMPORTANT: must run mem_sort_and_dedup() before calling this function
+void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id)
 { // similar to the loop in mem_chain_flt()
 	int i, k, tmp;
 	kvec_t(int) z;
@@ -473,6 +500,56 @@ void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t i
 	free(z.a);
 }
 
+/*********************************
+ * Test if a seed is good enough *
+ *********************************/
+
+#define MEM_SHORT_EXT 50
+#define MEM_SHORT_LEN 200
+
+#define MEM_HSP_COEF 1.1
+
+int mem_seed_sw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_seed_t *s)
+{
+	int qb, qe, rid;
+	int64_t rb, re, mid, l_pac = bns->l_pac;
+	uint8_t *rseq = 0;
+	kswr_t x;
+
+	qb = s->qbeg, qe = s->qbeg + s->len;
+	rb = s->rbeg, re = s->rbeg + s->len;
+	mid = (rb + re) >> 1;
+	qb -= MEM_SHORT_EXT; qb = qb > 0? qb : 0;
+	qe += MEM_SHORT_EXT; qe = qe < l_query? qe : l_query;
+	rb -= MEM_SHORT_EXT; rb = rb > 0? rb : 0;
+	re += MEM_SHORT_EXT; re = re < l_pac<<1? re : l_pac<<1;
+	if (rb < l_pac && l_pac < re) {
+		if (mid < l_pac) re = l_pac;
+		else rb = l_pac;
+	}
+	if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return -1;
+
+	rseq = bns_fetch_seq(bns, pac, &rb, mid, &re, &rid);
+	x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, KSW_XSTART, 0);
+	free(rseq);
+	return x.score;
+}
+
+void mem_flt_chained_seeds(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, int n_chn, mem_chain_t *a)
+{
+	int i, j, k, min_HSP_score = (int)(opt->min_chain_weight * opt->a * MEM_HSP_COEF + .499);
+	for (i = 0; i < n_chn; ++i) {
+		mem_chain_t *c = &a[i];
+		for (j = k = 0; j < c->n; ++j) {
+			mem_seed_t *s = &c->seeds[j];
+			s->score = mem_seed_sw(opt, bns, pac, l_query, query, s);
+			if (s->score < 0 || s->score >= min_HSP_score)
+				c->seeds[k++] = *s;
+		}
+		c->n = k;
+	}
+}
+
 /****************************************
  * Construct the alignment from a chain *
  ****************************************/
@@ -487,14 +564,10 @@ void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t i
  * mem_chain2aln_short() is almost never used for short-read alignment.
  */
 
-#define MEM_SHORT_EXT 50
-#define MEM_SHORT_LEN 200
-#define MAX_BAND_TRY  2
-
-int mem_chain2aln_short(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
+int mem_chain2aln_short(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
 {
-	int i, qb, qe, xtra;
-	int64_t rb, re, rlen;
+	int i, qb, qe, xtra, rid;
+	int64_t rb, re, l_pac = bns->l_pac;
 	uint8_t *rseq = 0;
 	mem_alnreg_t a;
 	kswr_t x;
@@ -524,19 +597,20 @@ int mem_chain2aln_short(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac,
 	if (qe - qb >= opt->w * 4 || re - rb >= opt->w * 4) return 1;
 	if (qe - qb >= MEM_SHORT_LEN || re - rb >= MEM_SHORT_LEN) return 1;
 
-	rseq = bns_get_seq(l_pac, pac, rb, re, &rlen);
-	assert(rlen == re - rb);
+	rseq = bns_fetch_seq(bns, pac, &rb, c->seeds[0].rbeg, &re, &rid);
+	assert(c->rid == rid);
 	xtra = KSW_XSUBO | KSW_XSTART | ((qe - qb) * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a);
 	x = ksw_align2(qe - qb, (uint8_t*)query + qb, re - rb, rseq, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0);
 	free(rseq);
-	if (x.tb < MEM_SHORT_EXT>>1 || x.te > re - rb - (MEM_SHORT_EXT>>1)) return 1;
-
 	a.rb = rb + x.tb; a.re = rb + x.te + 1;
 	a.qb = qb + x.qb; a.qe = qb + x.qe + 1;
 	a.score = x.score;
 	a.csub = x.score2;
+	a.rid = c->rid;
+	a.frac_rep = c->frac_rep;
+	if (bwa_verbose >= 4) printf("** Attempted alignment via mem_chain2aln_short(): [%d,%d) <=> [%ld,%ld); score=%d; %d/%d\n", a.qb, a.qe, (long)a.rb, (long)a.re, x.score, a.qe-a.qb, qe-qb);
+	if (x.tb < MEM_SHORT_EXT>>1 || x.te > re - rb - (MEM_SHORT_EXT>>1)) return 1;
 	kv_push(mem_alnreg_t, *av, a);
-	if (bwa_verbose >= 4) printf("** Added alignment region via mem_chain2aln_short(): [%d,%d) <=> [%ld,%ld)\n", a.qb, a.qe, (long)a.rb, (long)a.re);
 	return 0;
 }
 
@@ -549,10 +623,12 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)
 	return l < opt->w<<1? l : opt->w<<1;
 }
 
-void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
+#define MAX_BAND_TRY  2
+
+void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
 {
-	int i, k, max_off[2], aw[2]; // aw: actual bandwidth used in extension
-	int64_t rlen, rmax[2], tmp, max = 0;
+	int i, k, rid, max_off[2], aw[2]; // aw: actual bandwidth used in extension
+	int64_t l_pac = bns->l_pac, rmax[2], tmp, max = 0;
 	const mem_seed_t *s;
 	uint8_t *rseq = 0;
 	uint64_t *srt;
@@ -576,12 +652,12 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
 		else rmax[0] = l_pac;
 	}
 	// retrieve the reference sequence
-	rseq = bns_get_seq(l_pac, pac, rmax[0], rmax[1], &rlen);
-	assert(rlen == rmax[1] - rmax[0]);
+	rseq = bns_fetch_seq(bns, pac, &rmax[0], c->seeds[0].rbeg, &rmax[1], &rid);
+	assert(c->rid == rid);
 
 	srt = malloc(c->n * 8);
 	for (i = 0; i < c->n; ++i)
-		srt[i] = (uint64_t)c->seeds[i].len<<32 | i;
+		srt[i] = (uint64_t)c->seeds[i].score<<32 | i;
 	ks_introsort_64(c->n, srt);
 
 	for (k = c->n - 1; k >= 0; --k) {
@@ -593,6 +669,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
 			int64_t rd;
 			int qd, w, max_gap;
 			if (s->rbeg < p->rb || s->rbeg + s->len > p->re || s->qbeg < p->qb || s->qbeg + s->len > p->qe) continue; // not fully contained
+			if (s->len - p->seedlen0 > .1 * l_query) continue; // this seed may give a better alignment
 			// qd: distance ahead of the seed on query; rd: on reference
 			qd = s->qbeg - p->qb; rd = s->rbeg - p->rb;
 			max_gap = cal_max_gap(opt, qd < rd? qd : rd); // the maximal gap allowed in regions ahead of the seed
@@ -606,7 +683,8 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
 		}
 		if (i < av->n) { // the seed is (almost) contained in an existing alignment; further testing is needed to confirm it is not leading to a different aln
 			if (bwa_verbose >= 4)
-				printf("** Seed(%d) [%ld;%ld,%ld] is almost contained in an existing alignment. Confirming whether extension is needed...\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg);
+				printf("** Seed(%d) [%ld;%ld,%ld] is almost contained in an existing alignment [%d,%d) <=> [%ld,%ld)\n",
+					   k, (long)s->len, (long)s->qbeg, (long)s->rbeg, av->a[i].qb, av->a[i].qe, (long)av->a[i].rb, (long)av->a[i].re);
 			for (i = k + 1; i < c->n; ++i) { // check overlapping seeds in the same chain
 				const mem_seed_t *t;
 				if (srt[i] == 0) continue;
@@ -627,8 +705,9 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
 		memset(a, 0, sizeof(mem_alnreg_t));
 		a->w = aw[0] = aw[1] = opt->w;
 		a->score = a->truesc = -1;
+		a->rid = c->rid;
 
-		if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg);
+		if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name);
 		if (s->qbeg) { // left extension
 			uint8_t *rs, *qs;
 			int qle, tle, gtle, gscore;
@@ -695,6 +774,9 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int
 				a->seedcov += t->len; // this is not very accurate, but for approx. mapQ, this is good enough
 		}
 		a->w = aw[0] > aw[1]? aw[0] : aw[1];
+		a->seedlen0 = s->len;
+
+		a->frac_rep = c->frac_rep;
 	}
 	free(srt); free(rseq);
 }
@@ -723,9 +805,9 @@ static inline int get_rlen(int n_cigar, const uint32_t *cigar)
 	return l;
 }
 
-void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_)
+void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m_, int softclip_all)
 {
-	int i;
+	int i, l_name;
 	mem_aln_t ptmp = list[which], *p = &ptmp, mtmp, *m = 0; // make a copy of the alignment to convert
 
 	if (m_) mtmp = *m_, m = &mtmp;
@@ -741,7 +823,9 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
 	p->flag |= m && m->is_rev? 0x20 : 0; // is mate on the reverse strand
 
 	// print up to CIGAR
-	kputs(s->name, str); kputc('\t', str); // QNAME
+	l_name = strlen(s->name);
+	ks_resize(str, str->l + s->l_seq + l_name + (s->qual? s->l_seq : 0) + 20);
+	kputsn(s->name, l_name, str); kputc('\t', str); // QNAME
 	kputw((p->flag&0xffff) | (p->flag&0x10000? 0x100 : 0), str); kputc('\t', str); // FLAG
 	if (p->rid >= 0) { // with coordinate
 		kputs(bns->anns[p->rid].name, str); kputc('\t', str); // RNAME
@@ -750,7 +834,8 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
 		if (p->n_cigar) { // aligned
 			for (i = 0; i < p->n_cigar; ++i) {
 				int c = p->cigar[i]&0xf;
-				if (c == 3 || c == 4) c = which? 4 : 3; // use hard clipping for supplementary alignments
+				if (!softclip_all && (c == 3 || c == 4))
+					c = which? 4 : 3; // use hard clipping for supplementary alignments
 				kputw(p->cigar[i]>>4, str); kputc("MIDSH"[c], str);
 			}
 		} else kputc('*', str); // having a coordinate but unaligned (e.g. when copy_mate is true)
@@ -777,9 +862,9 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
 		kputsn("*\t*", 3, str);
 	} else if (!p->is_rev) { // the forward strand
 		int i, qb = 0, qe = s->l_seq;
-		if (p->n_cigar) {
-			if (which && ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3)) qb += p->cigar[0]>>4;
-			if (which && ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3)) qe -= p->cigar[p->n_cigar-1]>>4;
+		if (p->n_cigar && which && !softclip_all) { // have cigar && not the primary alignment && not softclip all
+			if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qb += p->cigar[0]>>4;
+			if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qe -= p->cigar[p->n_cigar-1]>>4;
 		}
 		ks_resize(str, str->l + (qe - qb) + 1);
 		for (i = qb; i < qe; ++i) str->s[str->l++] = "ACGTN"[(int)s->seq[i]];
@@ -791,9 +876,9 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
 		} else kputc('*', str);
 	} else { // the reverse strand
 		int i, qb = 0, qe = s->l_seq;
-		if (p->n_cigar) {
-			if (which && ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3)) qe -= p->cigar[0]>>4;
-			if (which && ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3)) qb += p->cigar[p->n_cigar-1]>>4;
+		if (p->n_cigar && which && !softclip_all) {
+			if ((p->cigar[0]&0xf) == 4 || (p->cigar[0]&0xf) == 3) qe -= p->cigar[0]>>4;
+			if ((p->cigar[p->n_cigar-1]&0xf) == 4 || (p->cigar[p->n_cigar-1]&0xf) == 3) qb += p->cigar[p->n_cigar-1]>>4;
 		}
 		ks_resize(str, str->l + (qe - qb) + 1);
 		for (i = qe-1; i >= qb; --i) str->s[str->l++] = "TGCAN"[(int)s->seq[i]];
@@ -834,6 +919,7 @@ void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const m
 			}
 		}
 	}
+	if (p->XA) { kputsn("\tXA:Z:", 6, str); kputs(p->XA, str); }
 	if (s->comment) { kputc('\t', str); kputs(s->comment, str); }
 	kputc('\n', str);
 }
@@ -864,16 +950,21 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a)
 	if (a->sub_n > 0) mapq -= (int)(4.343 * log(a->sub_n+1) + .499);
 	if (mapq > 60) mapq = 60;
 	if (mapq < 0) mapq = 0;
+	mapq = (int)(mapq * (1. - a->frac_rep) + .499);
 	return mapq;
 }
 
 // TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible
 void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m)
 {
+	extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);
 	kstring_t str;
 	kvec_t(mem_aln_t) aa;
 	int k;
+	char **XA = 0;
 
+	if (!(opt->flag & MEM_F_ALL))
+		XA = mem_gen_alt(opt, bns, pac, a, s->l_seq, s->seq);
 	kv_init(aa);
 	str.l = str.m = 0; str.s = 0;
 	for (k = 0; k < a->n; ++k) {
@@ -881,9 +972,11 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
 		mem_aln_t *q;
 		if (p->score < opt->T) continue;
 		if (p->secondary >= 0 && !(opt->flag&MEM_F_ALL)) continue;
-		if (p->secondary >= 0 && p->score < a->a[p->secondary].score * .5) continue;
+		if (p->secondary >= 0 && p->score < a->a[p->secondary].score * opt->drop_ratio) continue;
 		q = kv_pushp(mem_aln_t, aa);
 		*q = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, p);
+		assert(q->rid >= 0); // this should not happen with the new code
+		q->XA = XA? XA[k] : 0;
 		q->flag |= extra_flag; // flag secondary
 		if (p->secondary >= 0) q->sub = -1; // don't output sub-optimal score
 		if (k && p->secondary < 0) // if supplementary
@@ -894,17 +987,21 @@ void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pa
 		mem_aln_t t;
 		t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
 		t.flag |= extra_flag;
-		mem_aln2sam(bns, &str, s, 1, &t, 0, m);
+		mem_aln2sam(bns, &str, s, 1, &t, 0, m, opt->flag&MEM_F_SOFTCLIP);
 	} else {
 		for (k = 0; k < aa.n; ++k)
-			mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m);
+			mem_aln2sam(bns, &str, s, aa.n, aa.a, k, m, opt->flag&MEM_F_SOFTCLIP);
 		for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
 		free(aa.a);
 	}
 	s->sam = str.s;
+	if (XA) {
+		for (k = 0; k < a->n; ++k) free(XA[k]);
+		free(XA);
+	}
 }
 
-mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq)
+mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq, void *buf)
 {
 	int i;
 	mem_chain_v chn;
@@ -913,39 +1010,35 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse
 	for (i = 0; i < l_seq; ++i) // convert to 2-bit encoding if we have not done so
 		seq[i] = seq[i] < 4? seq[i] : nst_nt4_table[(int)seq[i]];
 
-	chn = mem_chain(opt, bwt, bns->l_pac, l_seq, (uint8_t*)seq);
+	chn = mem_chain(opt, bwt, bns, l_seq, (uint8_t*)seq, buf);
 	chn.n = mem_chain_flt(opt, chn.n, chn.a);
+	if (opt->min_chain_weight > 0) mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chn.n, chn.a);
 	if (bwa_verbose >= 4) mem_print_chain(bns, &chn);
 
 	kv_init(regs);
 	for (i = 0; i < chn.n; ++i) {
 		mem_chain_t *p = &chn.a[i];
-		int ret;
+		int ret = 1;
 		if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", i);
-		ret = mem_chain2aln_short(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, p, &regs);
-		if (ret > 0) mem_chain2aln(opt, bns->l_pac, pac, l_seq, (uint8_t*)seq, p, &regs);
+		if (opt->min_chain_weight == 0)
+			ret = mem_chain2aln_short(opt, bns, pac, l_seq, (uint8_t*)seq, p, &regs);
+		if (ret > 0) mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, &regs);
 		free(chn.a[i].seeds);
 	}
 	free(chn.a);
-	regs.n = mem_sort_and_dedup(regs.n, regs.a, opt->mask_level_redun);
-	if (opt->flag & MEM_F_NO_EXACT)
+	regs.n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regs.n, regs.a);
+	if (opt->flag & MEM_F_SELF_OVLP)
 		regs.n = mem_test_and_remove_exact(opt, regs.n, regs.a, l_seq);
+	if (bwa_verbose >= 4) {
+		err_printf("* %ld chains remain after removing duplicated chains\n", regs.n);
+		for (i = 0; i < regs.n; ++i) {
+			mem_alnreg_t *p = &regs.a[i];
+			printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re);
+		}
+	}
 	return regs;
 }
 
-mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_)
-{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence
-	mem_alnreg_v ar;
-	char *seq;
-	seq = malloc(l_seq);
-	memcpy(seq, seq_, l_seq); // makes a copy of seq_
-	ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq);
-	mem_mark_primary_se(opt, ar.n, ar.a, lrand48());
-	free(seq);
-	return ar;
-}
-
-// This routine is only used for the API purpose
 mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
 {
 	mem_aln_t a;
@@ -965,22 +1058,18 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
 		query[i] = query_[i] < 5? query_[i] : nst_nt4_table[(int)query_[i]];
 	a.mapq = ar->secondary < 0? mem_approx_mapq_se(opt, ar) : 0;
 	if (ar->secondary >= 0) a.flag |= 0x100; // secondary alignment
-	if (bwa_fix_xref2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->w, bns, pac, (uint8_t*)query, &qb, &qe, &rb, &re) < 0) {
-		fprintf(stderr, "[E::%s] If you see this message, please let the developer know. Abort. Sorry.\n", __func__);
-		exit(1);
-	}
 	tmp = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_del, opt->e_del);
 	w2  = infer_bw(qe - qb, re - rb, ar->truesc, opt->a, opt->o_ins, opt->e_ins);
 	w2 = w2 > tmp? w2 : tmp;
 	if (bwa_verbose >= 4) printf("* Band width: inferred=%d, cmd_opt=%d, alnreg=%d\n", w2, opt->w, ar->w);
 	if (w2 > opt->w) w2 = w2 < ar->w? w2 : ar->w;
-//	else w2 = opt->w; // TODO: check if we need this line on long reads. On 1-800bp reads, it does not matter and it should be.
 	i = 0; a.cigar = 0;
 	do {
 		free(a.cigar);
+		w2 = w2 < opt->w<<2? w2 : opt->w<<2;
 		a.cigar = bwa_gen_cigar2(opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, w2, bns->l_pac, pac, qe - qb, (uint8_t*)&query[qb], rb, re, &score, &a.n_cigar, &NM);
 		if (bwa_verbose >= 4) printf("* Final alignment: w2=%d, global_sc=%d, local_sc=%d\n", w2, score, ar->truesc);
-		if (score == last_sc) break; // it is possible that global alignment and local alignment give different scores
+		if (score == last_sc || w2 == opt->w<<2) break; // it is possible that global alignment and local alignment give different scores
 		last_sc = score;
 		w2 <<= 1;
 	} while (++i < 3 && score < ar->truesc - opt->a);
@@ -1014,6 +1103,7 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
 		}
 	}
 	a.rid = bns_pos2rid(bns, pos);
+	assert(a.rid == ar->rid);
 	a.pos = pos - bns->anns[a.rid].offset;
 	a.score = ar->score; a.sub = ar->sub > ar->csub? ar->sub : ar->csub;
 	free(query);
@@ -1026,6 +1116,7 @@ typedef struct {
 	const bntseq_t *bns;
 	const uint8_t *pac;
 	const mem_pestat_t *pes;
+	smem_aux_t **aux;
 	bseq1_t *seqs;
 	mem_alnreg_v *regs;
 	int64_t n_processed;
@@ -1036,23 +1127,28 @@ static void worker1(void *data, int i, int tid)
 	worker_t *w = (worker_t*)data;
 	if (!(w->opt->flag&MEM_F_PE)) {
 		if (bwa_verbose >= 4) printf("=====> Processing read '%s' <=====\n", w->seqs[i].name);
-		w->regs[i] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq);
+		w->regs[i] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq, w->aux[tid]);
 	} else {
 		if (bwa_verbose >= 4) printf("=====> Processing read '%s'/1 <=====\n", w->seqs[i<<1|0].name);
-		w->regs[i<<1|0] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|0].l_seq, w->seqs[i<<1|0].seq);
+		w->regs[i<<1|0] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|0].l_seq, w->seqs[i<<1|0].seq, w->aux[tid]);
 		if (bwa_verbose >= 4) printf("=====> Processing read '%s'/2 <=====\n", w->seqs[i<<1|1].name);
-		w->regs[i<<1|1] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|1].l_seq, w->seqs[i<<1|1].seq);
+		w->regs[i<<1|1] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|1].l_seq, w->seqs[i<<1|1].seq, w->aux[tid]);
 	}
 }
 
 static void worker2(void *data, int i, int tid)
 {
 	extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]);
+	extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a);
 	worker_t *w = (worker_t*)data;
 	if (!(w->opt->flag&MEM_F_PE)) {
 		if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name);
-		mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
-		mem_reg2sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
+		if (w->opt->flag & MEM_F_ALN_REG) {
+			mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
+		} else {
+			mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
+			mem_reg2sam_se(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
+		}
 		free(w->regs[i].a);
 	} else {
 		if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name);
@@ -1068,13 +1164,21 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
 	mem_alnreg_v *regs;
 	mem_pestat_t pes[4];
 	double ctime, rtime;
+	int i;
 
 	ctime = cputime(); rtime = realtime();
+	global_bns = bns;
 	regs = malloc(n * sizeof(mem_alnreg_v));
 	w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac;
 	w.seqs = seqs; w.regs = regs; w.n_processed = n_processed;
 	w.pes = &pes[0];
+	w.aux = malloc(opt->n_threads * sizeof(smem_aux_t));
+	for (i = 0; i < opt->n_threads; ++i)
+		w.aux[i] = smem_aux_init();
 	kt_for(opt->n_threads, worker1, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
+	for (i = 0; i < opt->n_threads; ++i)
+		smem_aux_destroy(w.aux[i]);
+	free(w.aux);
 	if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
 		if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
 		else mem_pestat(opt, bns->l_pac, n, regs, pes); // otherwise, infer the insert size distribution from data
diff --git a/bwamem.h b/bwamem.h
index 5291491..7b14ca8 100644
--- a/bwamem.h
+++ b/bwamem.h
@@ -16,7 +16,9 @@ typedef struct __smem_i smem_i;
 #define MEM_F_ALL       0x8
 #define MEM_F_NO_MULTI  0x10
 #define MEM_F_NO_RESCUE 0x20
-#define MEM_F_NO_EXACT  0x40
+#define MEM_F_SELF_OVLP 0x40
+#define MEM_F_ALN_REG   0x80
+#define MEM_F_SOFTCLIP  0x200
 
 typedef struct {
 	int a, b;               // match score and mismatch penalty
@@ -30,6 +32,8 @@ typedef struct {
 	int T;                  // output score threshold; only affecting output
 	int flag;               // see MEM_F_* macros
 	int min_seed_len;       // minimum seed length
+	int min_chain_weight;
+	int max_chain_extend;
 	float split_factor;     // split into a seed if MEM is longer than min_seed_len*split_factor
 	int split_width;        // split into a seed if its occurence is smaller than this value
 	int max_occ;            // skip a seed if its occurence is larger than this value
@@ -37,18 +41,21 @@ typedef struct {
 	int n_threads;          // number of threads
 	int chunk_size;         // process chunk_size-bp sequences in a batch
 	float mask_level;       // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits
-	float chain_drop_ratio; // drop a chain if its seed coverage is below chain_drop_ratio times the seed coverage of a better chain overlapping with the small chain
+	float drop_ratio;       // drop a chain if its seed coverage is below drop_ratio times the seed coverage of a better chain overlapping with the small chain
+	float XA_drop_ratio;    // when counting hits for the XA tag, ignore alignments with score < XA_drop_ratio * max_score; only effective for the XA tag
 	float mask_level_redun;
 	float mapQ_coef_len;
 	int mapQ_coef_fac;
 	int max_ins;            // when estimating insert size distribution, skip pairs with insert longer than this value
 	int max_matesw;         // perform maximally max_matesw rounds of mate-SW for each end
+	int max_hits;           // if there are max_hits or fewer, output them all
 	int8_t mat[25];         // scoring matrix; mat[0] == 0 if unset
 } mem_opt_t;
 
 typedef struct {
 	int64_t rb, re; // [rb,re): reference sequence in the alignment
 	int qb, qe;     // [qb,qe): query sequence in the alignment
+	int rid;        // reference seq ID
 	int score;      // best local SW score
 	int truesc;     // actual score corresponding to the aligned region; possibly smaller than $score
 	int sub;        // 2nd best SW score
@@ -57,6 +64,9 @@ typedef struct {
 	int w;          // actual band width used in extension
 	int seedcov;    // length of regions coverged by seeds
 	int secondary;  // index of the parent hit shadowing the current hit; <0 if primary
+	int seedlen0;   // length of the starting seed
+	int n_comp;     // number of sub-alignments chained together
+	float frac_rep;
 	uint64_t hash;
 } mem_alnreg_t;
 
@@ -75,6 +85,7 @@ typedef struct { // This struct is only used for the convenience of API.
 	uint32_t is_rev:1, mapq:8, NM:23; // is_rev: whether on the reverse strand; mapq: mapping quality; NM: edit distance
 	int n_cigar;     // number of CIGAR operations
 	uint32_t *cigar; // CIGAR in the BAM encoding: opLen<<4|op; op to integer mapping: MIDSH=>01234
+	char *XA;        // alternative mappings
 
 	int score, sub;
 } mem_aln_t;
@@ -86,7 +97,7 @@ extern "C" {
 	smem_i *smem_itr_init(const bwt_t *bwt);
 	void smem_itr_destroy(smem_i *itr);
 	void smem_set_query(smem_i *itr, int len, const uint8_t *query);
-	const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width);
+	const bwtintv_v *smem_next(smem_i *itr);
 
 	mem_opt_t *mem_opt_init(void);
 	void mem_fill_scmat(int a, int b, int8_t mat[25]);
@@ -145,6 +156,7 @@ extern "C" {
 	 * @return       CIGAR, strand, mapping quality and forward-strand position
 	 */
 	mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar);
+	mem_aln_t mem_reg2aln2(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq, const mem_alnreg_t *ar, const char *name);
 
 	/**
 	 * Infer the insert size distribution from interleaved alignment regions
diff --git a/bwamem_extra.c b/bwamem_extra.c
new file mode 100644
index 0000000..59bb68e
--- /dev/null
+++ b/bwamem_extra.c
@@ -0,0 +1,147 @@
+#include "bwa.h"
+#include "bwamem.h"
+#include "bntseq.h"
+#include "kstring.h"
+
+/***************************
+ * SMEM iterator interface *
+ ***************************/
+
+struct __smem_i {
+	const bwt_t *bwt;
+	const uint8_t *query;
+	int start, len;
+	bwtintv_v *matches; // matches; to be returned by smem_next()
+	bwtintv_v *sub;     // sub-matches inside the longest match; temporary
+	bwtintv_v *tmpvec[2]; // temporary arrays
+};
+
+smem_i *smem_itr_init(const bwt_t *bwt)
+{
+	smem_i *itr;
+	itr = calloc(1, sizeof(smem_i));
+	itr->bwt = bwt;
+	itr->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
+	itr->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
+	itr->matches   = calloc(1, sizeof(bwtintv_v));
+	itr->sub       = calloc(1, sizeof(bwtintv_v));
+	return itr;
+}
+
+void smem_itr_destroy(smem_i *itr)
+{
+	free(itr->tmpvec[0]->a); free(itr->tmpvec[0]);
+	free(itr->tmpvec[1]->a); free(itr->tmpvec[1]);
+	free(itr->matches->a);   free(itr->matches);
+	free(itr->sub->a);       free(itr->sub);
+	free(itr);
+}
+
+void smem_set_query(smem_i *itr, int len, const uint8_t *query)
+{
+	itr->query = query;
+	itr->start = 0;
+	itr->len = len;
+}
+
+const bwtintv_v *smem_next(smem_i *itr)
+{
+	int i, max, max_i, ori_start;
+	itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = itr->sub->n = 0;
+	if (itr->start >= itr->len || itr->start < 0) return 0;
+	while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
+	if (itr->start == itr->len) return 0;
+	ori_start = itr->start;
+	itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, ori_start, 1, itr->matches, itr->tmpvec); // search for SMEM
+	if (itr->matches->n == 0) return itr->matches; // well, in theory, we should never come here
+	for (i = max = 0, max_i = 0; i < itr->matches->n; ++i) { // look for the longest match
+		bwtintv_t *p = &itr->matches->a[i];
+		int len = (uint32_t)p->info - (p->info>>32);
+		if (max < len) max = len, max_i = i;
+	}
+	return itr->matches;
+}
+
+/***********************
+ *** Extra functions ***
+ ***********************/
+
+mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_)
+{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence
+	extern mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq, void *buf);
+	extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
+	mem_alnreg_v ar;
+	char *seq;
+	seq = malloc(l_seq);
+	memcpy(seq, seq_, l_seq); // makes a copy of seq_
+	ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq, 0);
+	mem_mark_primary_se(opt, ar.n, ar.a, lrand48());
+	free(seq);
+	return ar;
+}
+
+void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
+{
+	int i;
+	kstring_t str = {0,0,0};
+	for (i = 0; i < a->n; ++i) {
+		const mem_alnreg_t *p = &a->a[i];
+		int is_rev, rid, qb = p->qb, qe = p->qe;
+		int64_t pos, rb = p->rb, re = p->re;
+		pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev);
+		rid = bns_pos2rid(bns, pos);
+		assert(rid == p->rid);
+		pos -= bns->anns[rid].offset;
+		kputs(s->name, &str); kputc('\t', &str);
+		kputw(s->l_seq, &str); kputc('\t', &str);
+		if (is_rev) qb ^= qe, qe ^= qb, qb ^= qe; // swap
+		kputw(qb, &str); kputc('\t', &str); kputw(qe, &str); kputc('\t', &str);
+		kputs(bns->anns[rid].name, &str); kputc('\t', &str);
+		kputw(bns->anns[rid].len, &str); kputc('\t', &str);
+		kputw(pos, &str); kputc('\t', &str); kputw(pos + (re - rb), &str); kputc('\t', &str);
+		ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb));
+		kputc('\n', &str);
+	}
+	s->sam = str.s;
+}
+
+// Okay, returning strings is bad, but this has happened a lot elsewhere. If I have time, I need serious code cleanup.
+char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query) // ONLY work after mem_mark_primary_se()
+{
+	int i, k, *cnt, tot;
+	kstring_t *aln = 0;
+	char **XA = 0;
+
+	cnt = calloc(a->n, sizeof(int));
+	for (i = 0, tot = 0; i < a->n; ++i) {
+		int j = a->a[i].secondary;
+		if (j >= 0 && a->a[i].score >= a->a[j].score * opt->XA_drop_ratio)
+			++cnt[j], ++tot;
+	}
+	if (tot == 0) goto end_gen_alt;
+	aln = calloc(a->n, sizeof(kstring_t));
+	for (i = 0; i < a->n; ++i) {
+		mem_aln_t t;
+		int j = a->a[i].secondary;
+		if (j < 0 || a->a[i].score < a->a[j].score * opt->XA_drop_ratio) continue; // we don't process the primary alignments as they will be converted to SAM later
+		if (cnt[j] > opt->max_hits) continue;
+		t = mem_reg2aln(opt, bns, pac, l_query, query, &a->a[i]);
+		kputs(bns->anns[t.rid].name, &aln[j]);
+		kputc(',', &aln[j]); kputc("+-"[t.is_rev], &aln[j]); kputl(t.pos + 1, &aln[j]);
+		kputc(',', &aln[j]);
+		for (k = 0; k < t.n_cigar; ++k) {
+			kputw(t.cigar[k]>>4, &aln[j]);
+			kputc("MIDSHN"[t.cigar[k]&0xf], &aln[j]);
+		}
+		kputc(',', &aln[j]); kputw(t.NM, &aln[j]);
+		kputc(';', &aln[j]);
+		free(t.cigar);
+	}
+	XA = calloc(a->n, sizeof(char*));
+	for (k = 0; k < a->n; ++k)
+		XA[k] = aln[k].s;
+
+end_gen_alt:
+	free(cnt); free(aln);
+	return XA;
+}
diff --git a/bwamem_pair.c b/bwamem_pair.c
index 4a7cdf3..a3aeb80 100644
--- a/bwamem_pair.c
+++ b/bwamem_pair.c
@@ -58,6 +58,7 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
 		if (r[0]->n == 0 || r[1]->n == 0) continue;
 		if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) continue;
 		if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) continue;
+		if (r[0]->a[0].rid != r[1]->a[0].rid) continue; // not on the same chr
 		dir = mem_infer_dir(l_pac, r[0]->a[0].rb, r[1]->a[0].rb, &is);
 		if (is && is <= opt->max_ins) kv_push(uint64_t, isize[dir], is);
 	}
@@ -106,10 +107,11 @@ void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *
 		}
 }
 
-int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma)
+int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma)
 {
-	extern int mem_sort_and_dedup(int n, mem_alnreg_t *a, float mask_level_redun);
-	int i, r, skip[4], n = 0;
+	extern int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int n, mem_alnreg_t *a);
+	int64_t l_pac = bns->l_pac;
+	int i, r, skip[4], n = 0, rid;
 	for (r = 0; r < 4; ++r)
 		skip[r] = pes[r].failed? 1 : 0;
 	for (i = 0; i < ma->n; ++i) { // check which orinentation has been found
@@ -121,8 +123,8 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
 	if (skip[0] + skip[1] + skip[2] + skip[3] == 4) return 0; // consistent pair exist; no need to perform SW
 	for (r = 0; r < 4; ++r) {
 		int is_rev, is_larger;
-		uint8_t *seq, *rev = 0, *ref;
-		int64_t rb, re, len;
+		uint8_t *seq, *rev = 0, *ref = 0;
+		int64_t rb, re;
 		if (skip[r]) continue;
 		is_rev = (r>>1 != (r&1)); // whether to reverse complement the mate
 		is_larger = !(r>>1); // whether the mate has larger coordinate
@@ -140,14 +142,15 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
 		}
 		if (rb < 0) rb = 0;
 		if (re > l_pac<<1) re = l_pac<<1;
-		ref = bns_get_seq(l_pac, pac, rb, re, &len);
-		if (len == re - rb) { // no funny things happening
+		if (rb < re) ref = bns_fetch_seq(bns, pac, &rb, (rb+re)>>1, &re, &rid);
+		if (a->rid == rid && re - rb >= opt->min_seed_len) { // no funny things happening
 			kswr_t aln;
 			mem_alnreg_t b;
 			int tmp, xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a);
-			aln = ksw_align2(l_ms, seq, len, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0);
+			aln = ksw_align2(l_ms, seq, re - rb, ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, xtra, 0);
 			memset(&b, 0, sizeof(mem_alnreg_t));
 			if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0
+				b.rid = a->rid;
 				b.qb = is_rev? l_ms - (aln.qe + 1) : aln.qb;                                                                                                                                                                              
 				b.qe = is_rev? l_ms - aln.qb : aln.qe + 1; 
 				b.rb = is_rev? (l_pac<<1) - (rb + aln.te + 1) : rb + aln.tb;
@@ -167,23 +170,25 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
 			}
 			++n;
 		}
-		if (n) ma->n = mem_sort_and_dedup(ma->n, ma->a, opt->mask_level_redun);
+		if (n) ma->n = mem_sort_dedup_patch(opt, 0, 0, 0, ma->n, ma->a);
 		if (rev) free(rev);
 		free(ref);
 	}
 	return n;
 }
 
-int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2])
+int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2])
 {
 	pair64_v v, u;
 	int r, i, k, y[4], ret; // y[] keeps the last hit
+	int64_t l_pac = bns->l_pac;
 	kv_init(v); kv_init(u);
 	for (r = 0; r < 2; ++r) { // loop through read number
 		for (i = 0; i < a[r].n; ++i) {
 			pair64_t key;
 			mem_alnreg_t *e = &a[r].a[i];
 			key.x = e->rb < l_pac? e->rb : (l_pac<<1) - 1 - e->rb; // forward position
+			key.x = (uint64_t)e->rid<<32 | (key.x - bns->anns[e->rid].offset);
 			key.y = (uint64_t)e->score << 32 | i << 2 | (e->rb >= l_pac)<<1 | r;
 			kv_push(pair64_t, v, key);
 		}
@@ -242,7 +247,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
 	extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
 	extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
 	extern void mem_reg2sam_se(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
-	extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m);
+	extern void mem_aln2sam(const bntseq_t *bns, kstring_t *str, bseq1_t *s, int n, const mem_aln_t *list, int which, const mem_aln_t *m, int softclip_all);
+	extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);
 
 	int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1;
 	kstring_t str;
@@ -258,15 +264,16 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
 					kv_push(mem_alnreg_t, b[i], a[i].a[j]);
 		for (i = 0; i < 2; ++i)
 			for (j = 0; j < b[i].n && j < opt->max_matesw; ++j)
-				n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
+				n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
 		free(b[0].a); free(b[1].a);
 	}
 	mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0);
 	mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1);
 	if (opt->flag&MEM_F_NOPAIRING) goto no_pairing;
 	// pairing single-end hits
-	if (a[0].n && a[1].n && (o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, &n_sub, z)) > 0) {
+	if (a[0].n && a[1].n && (o = mem_pair(opt, bns, pac, pes, s, a, id, &subo, &n_sub, z)) > 0) {
 		int is_multi[2], q_pe, score_un, q_se[2];
+		char **XA[2];
 		// check if an end has multiple hits even after mate-SW
 		for (i = 0; i < 2; ++i) {
 			for (j = 1; j < a[i].n; ++j)
@@ -282,6 +289,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
 		if (n_sub > 0) q_pe -= (int)(4.343 * log(n_sub+1) + .499);
 		if (q_pe < 0) q_pe = 0;
 		if (q_pe > 60) q_pe = 60;
+		q_pe = (int)(q_pe * (1. - .5 * (a[0].a[0].frac_rep + a[1].a[0].frac_rep)) + .499);
 		// the following assumes no split hits
 		if (o > score_un) { // paired alignment is preferred
 			mem_alnreg_t *c[2];
@@ -302,13 +310,35 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
 			q_se[0] = mem_approx_mapq_se(opt, &a[0].a[0]);
 			q_se[1] = mem_approx_mapq_se(opt, &a[1].a[0]);
 		}
+		// suboptimal hits
+		if (!(opt->flag & MEM_F_ALL)) {
+			for (i = 0; i < 2; ++i) {
+				int k = a[i].a[z[i]].secondary;
+				if (k >= 0) { // switch secondary and primary
+					assert(a[i].a[k].secondary < 0);
+					for (j = 0; j < a[i].n; ++j)
+						if (a[i].a[j].secondary == k || j == k)
+							a[i].a[j].secondary = z[i];
+					a[i].a[z[i]].secondary = -1;
+				}
+				XA[i] = mem_gen_alt(opt, bns, pac, &a[i], s[i].l_seq, s[i].seq);
+			}
+		} else XA[0] = XA[1] = 0;
 		// write SAM
-		h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag;
-		h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag;
-		mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1]); s[0].sam = strdup(str.s); str.l = 0;
-		mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0]); s[1].sam = str.s;
+		h[0] = mem_reg2aln(opt, bns, pac, s[0].l_seq, s[0].seq, &a[0].a[z[0]]); h[0].mapq = q_se[0]; h[0].flag |= 0x40 | extra_flag; h[0].XA = XA[0]? XA[0][z[0]] : 0;
+		h[1] = mem_reg2aln(opt, bns, pac, s[1].l_seq, s[1].seq, &a[1].a[z[1]]); h[1].mapq = q_se[1]; h[1].flag |= 0x80 | extra_flag; h[1].XA = XA[1]? XA[1][z[1]] : 0;
+		mem_aln2sam(bns, &str, &s[0], 1, &h[0], 0, &h[1], opt->flag&MEM_F_SOFTCLIP); s[0].sam = strdup(str.s); str.l = 0;
+		mem_aln2sam(bns, &str, &s[1], 1, &h[1], 0, &h[0], opt->flag&MEM_F_SOFTCLIP); s[1].sam = str.s;
 		if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
-		free(h[0].cigar); free(h[1].cigar);
+		free(h[0].cigar); // h[0].XA will be freed in the following block
+		free(h[1].cigar);
+		// free XA
+		for (i = 0; i < 2; ++i) {
+			if (XA[i]) {
+				for (j = 0; j < a[i].n; ++j) free(XA[i][j]);
+				free(XA[i]);
+			}
+		}
 	} else goto no_pairing;
 	return n;
 
diff --git a/bwape.c b/bwape.c
index 82fc50b..a5dc3ad 100644
--- a/bwape.c
+++ b/bwape.c
@@ -297,6 +297,7 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
 				p[j]->seQ = p[j]->mapQ = bwa_approx_mapQ(p[j], max_diff);
 				p[j]->pos = bwa_sa2pos(bns, bwt, p[j]->sa, p[j]->len + p[j]->ref_shift, &strand);
 				p[j]->strand = strand;
+				if (p[j]->pos == (bwtint_t)-1) p[j]->type = BWA_TYPE_NO_MATCH;
 			}
 		}
 	}
diff --git a/bwase.c b/bwase.c
index 30f306e..cb912ec 100644
--- a/bwase.c
+++ b/bwase.c
@@ -559,7 +559,7 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f
 		fprintf(stderr, "[bwa_aln_core] print alignments... ");
 		for (i = 0; i < n_seqs; ++i)
 			bwa_print_sam1(bns, seqs + i, 0, opt.mode, opt.max_top2);
-		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
 
 		bwa_free_read_seq(n_seqs, seqs);
 		fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
diff --git a/bwt_lite.c b/bwt_lite.c
index 9b47270..f7946f5 100644
--- a/bwt_lite.c
+++ b/bwt_lite.c
@@ -7,7 +7,7 @@
 #  include "malloc_wrap.h"
 #endif
 
-int is_sa(const uint8_t *T, uint32_t *SA, int n);
+int is_sa(const uint8_t *T, int *SA, int n);
 int is_bwt(uint8_t *T, int n);
 
 bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq)
@@ -20,7 +20,7 @@ bwtl_t *bwtl_seq2bwtl(int len, const uint8_t *seq)
 	{ // calculate b->bwt
 		uint8_t *s;
 		b->sa = (uint32_t*)calloc(len + 1, 4);
-		is_sa(seq, b->sa, len);
+		is_sa(seq, (int*)b->sa, len);
 		s = (uint8_t*)calloc(len + 1, 1);
 		for (i = 0; i <= len; ++i) {
 			if (b->sa[i] == 0) b->primary = i;
diff --git a/bwtaln.c b/bwtaln.c
index 68d0274..20b01cd 100644
--- a/bwtaln.c
+++ b/bwtaln.c
@@ -206,7 +206,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
 		bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
 #endif
 
-		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
 
 		t = clock();
 		fprintf(stderr, "[bwa_aln_core] write to the disk... ");
@@ -215,7 +215,7 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
 			err_fwrite(&p->n_aln, 4, 1, stdout);
 			if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
 		}
-		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
+		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
 
 		bwa_free_read_seq(n_seqs, seqs);
 		fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
diff --git a/fastmap.c b/fastmap.c
index 093fb7b..479e878 100644
--- a/fastmap.c
+++ b/fastmap.c
@@ -18,6 +18,22 @@ extern unsigned char nst_nt4_table[256];
 void *kopen(const char *fn, int *_fd);
 int kclose(void *a);
 
+static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)
+{
+	if (opt0->a) { // matching score is changed
+		if (!opt0->b) opt->b *= opt->a;
+		if (!opt0->T) opt->T *= opt->a;
+		if (!opt0->o_del) opt->o_del *= opt->a;
+		if (!opt0->e_del) opt->e_del *= opt->a;
+		if (!opt0->o_ins) opt->o_ins *= opt->a;
+		if (!opt0->e_ins) opt->e_ins *= opt->a;
+		if (!opt0->zdrop) opt->zdrop *= opt->a;
+		if (!opt0->pen_clip5) opt->pen_clip5 *= opt->a;
+		if (!opt0->pen_clip3) opt->pen_clip3 *= opt->a;
+		if (!opt0->pen_unpaired) opt->pen_unpaired *= opt->a;
+	}
+}
+
 int main_mem(int argc, char *argv[])
 {
 	mem_opt_t *opt, opt0;
@@ -27,6 +43,7 @@ int main_mem(int argc, char *argv[])
 	bseq1_t *seqs;
 	bwaidx_t *idx;
 	char *p, *rg_line = 0;
+	const char *mode = 0;
 	void *ko = 0, *ko2 = 0;
 	int64_t n_processed = 0;
 	mem_pestat_t pes[4], *pes0 = 0;
@@ -35,11 +52,11 @@ int main_mem(int argc, char *argv[])
 	for (i = 0; i < 4; ++i) pes[i].failed = 1;
 
 	opt = mem_opt_init();
-	opt0.a = opt0.b = opt0.o_del = opt0.e_del = opt0.o_ins = opt0.e_ins = opt0.pen_unpaired = -1;
-	opt0.pen_clip5 = opt0.pen_clip3 = opt0.zdrop = opt0.T = -1;
-	while ((c = getopt(argc, argv, "epaMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:")) >= 0) {
-		if (c == 'k') opt->min_seed_len = atoi(optarg);
-		else if (c == 'w') opt->w = atoi(optarg);
+	memset(&opt0, 0, sizeof(mem_opt_t));
+	while ((c = getopt(argc, argv, "epaFMCSPHYk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:")) >= 0) {
+		if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
+		else if (c == 'x') mode = optarg;
+		else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1;
 		else if (c == 'A') opt->a = atoi(optarg), opt0.a = 1;
 		else if (c == 'B') opt->b = atoi(optarg), opt0.b = 1;
 		else if (c == 'T') opt->T = atoi(optarg), opt0.T = 1;
@@ -50,16 +67,23 @@ int main_mem(int argc, char *argv[])
 		else if (c == 'p') opt->flag |= MEM_F_PE;
 		else if (c == 'M') opt->flag |= MEM_F_NO_MULTI;
 		else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE;
-		else if (c == 'e') opt->flag |= MEM_F_NO_EXACT;
-		else if (c == 'c') opt->max_occ = atoi(optarg);
+		else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP;
+		else if (c == 'F') opt->flag |= MEM_F_ALN_REG;
+		else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP;
+		else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
 		else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1;
 		else if (c == 'v') bwa_verbose = atoi(optarg);
-		else if (c == 'r') opt->split_factor = atof(optarg);
-		else if (c == 'D') opt->chain_drop_ratio = atof(optarg);
-		else if (c == 'm') opt->max_matesw = atoi(optarg);
-		else if (c == 's') opt->split_width = atoi(optarg);
+		else if (c == 'r') opt->split_factor = atof(optarg), opt0.split_factor = 1.;
+		else if (c == 'D') opt->drop_ratio = atof(optarg), opt0.drop_ratio = 1.;
+		else if (c == 'm') opt->max_matesw = atoi(optarg), opt0.max_matesw = 1;
+		else if (c == 'h') opt->max_hits = atoi(optarg), opt0.max_hits = 1;
+		else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1;
+		else if (c == 'G') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1;
+		else if (c == 'N') opt->max_chain_extend = atoi(optarg), opt0.max_chain_extend = 1;
+		else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1;
 		else if (c == 'C') copy_comment = 1;
 		else if (c == 'Q') {
+			opt0.mapQ_coef_len = 1;
 			opt->mapQ_coef_len = atoi(optarg);
 			opt->mapQ_coef_fac = opt->mapQ_coef_len > 0? log(opt->mapQ_coef_len) : 0;
 		} else if (c == 'O') {
@@ -111,49 +135,73 @@ int main_mem(int argc, char *argv[])
 		fprintf(stderr, "       -r FLOAT      look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
 //		fprintf(stderr, "       -s INT        look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
 		fprintf(stderr, "       -c INT        skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
-		fprintf(stderr, "       -D FLOAT      drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->chain_drop_ratio);
+		fprintf(stderr, "       -D FLOAT      drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->drop_ratio);
+		fprintf(stderr, "       -W INT        discard a chain if seeded bases shorter than INT [0]\n");
 		fprintf(stderr, "       -m INT        perform at most INT rounds of mate rescues for each read [%d]\n", opt->max_matesw);
 		fprintf(stderr, "       -S            skip mate rescue\n");
 		fprintf(stderr, "       -P            skip pairing; mate rescue performed unless -S also in use\n");
 		fprintf(stderr, "       -e            discard full-length exact matches\n");
-		fprintf(stderr, "       -A INT        score for a sequence match, which scales [-TdBOELU] unless overridden [%d]\n", opt->a);
+		fprintf(stderr, "       -A INT        score for a sequence match, which scales options -TdBOELU unless overridden [%d]\n", opt->a);
 		fprintf(stderr, "       -B INT        penalty for a mismatch [%d]\n", opt->b);
 		fprintf(stderr, "       -O INT[,INT]  gap open penalties for deletions and insertions [%d,%d]\n", opt->o_del, opt->o_ins);
 		fprintf(stderr, "       -E INT[,INT]  gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [%d,%d]\n", opt->e_del, opt->e_ins);
 		fprintf(stderr, "       -L INT[,INT]  penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3);
 		fprintf(stderr, "       -U INT        penalty for an unpaired read pair [%d]\n", opt->pen_unpaired);
+		fprintf(stderr, "       -x STR        read type. Setting -x changes multiple parameters unless overriden [null]\n");
+		fprintf(stderr, "                     pacbio: -k17 -W40 -r10 -A2 -B5 -O2 -E1 -L0\n");
+		fprintf(stderr, "                     pbread: -k13 -W40 -c1000 -r10 -A2 -B5 -O2 -E1 -N25 -FeaD.001\n");
 		fprintf(stderr, "\nInput/output options:\n\n");
 		fprintf(stderr, "       -p            first query file consists of interleaved paired-end sequences\n");
 		fprintf(stderr, "       -R STR        read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
 		fprintf(stderr, "\n");
 		fprintf(stderr, "       -v INT        verbose level: 1=error, 2=warning, 3=message, 4+=debugging [%d]\n", bwa_verbose);
 		fprintf(stderr, "       -T INT        minimum score to output [%d]\n", opt->T);
+		fprintf(stderr, "       -h INT        if there are <INT hits with score >80%% of the max score, output all in XA [%d]\n", opt->max_hits);
 		fprintf(stderr, "       -a            output all alignments for SE or unpaired PE\n");
 		fprintf(stderr, "       -C            append FASTA/FASTQ comment to SAM output\n");
+		fprintf(stderr, "       -Y            use soft clipping for supplementary alignments\n");
 		fprintf(stderr, "       -M            mark shorter split hits as secondary\n\n");
 		fprintf(stderr, "       -I FLOAT[,FLOAT[,INT[,INT]]]\n");
 		fprintf(stderr, "                     specify the mean, standard deviation (10%% of the mean if absent), max\n");
 		fprintf(stderr, "                     (4 sigma from the mean if absent) and min of the insert size distribution.\n");
 		fprintf(stderr, "                     FR orientation only. [inferred]\n");
-		fprintf(stderr, "\nNote: Please read the man page for detailed description of the command line and options.\n");
+		fprintf(stderr, "\n");
+		fprintf(stderr, "Note: Please read the man page for detailed description of the command line and options.\n");
 		fprintf(stderr, "\n");
 		free(opt);
 		return 1;
 	}
 
-	if (opt0.a == 1) { // matching score is changed
-		if (opt0.b != 1) opt->b *= opt->a;
-		if (opt0.T != 1) opt->T *= opt->a;
-		if (opt0.o_del != 1) opt->o_del *= opt->a;
-		if (opt0.e_del != 1) opt->e_del *= opt->a;
-		if (opt0.o_ins != 1) opt->o_ins *= opt->a;
-		if (opt0.e_ins != 1) opt->e_ins *= opt->a;
-		if (opt0.zdrop != 1) opt->zdrop *= opt->a;
-		if (opt0.pen_clip5 != 1) opt->pen_clip5 *= opt->a;
-		if (opt0.pen_clip3 != 1) opt->pen_clip3 *= opt->a;
-		if (opt0.pen_unpaired != 1) opt->pen_unpaired *= opt->a;
-	}
+	if (mode) {
+		if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) {
+			if (!opt0.a) opt->a = 2, opt0.a = 1;
+			update_a(opt, &opt0);
+			if (!opt0.o_del) opt->o_del = 2;
+			if (!opt0.e_del) opt->e_del = 1;
+			if (!opt0.o_ins) opt->o_ins = 2;
+			if (!opt0.e_ins) opt->e_ins = 1;
+			if (!opt0.b) opt->b = 5;
+			if (opt0.split_factor == 0.) opt->split_factor = 10.;
+			if (!opt0.min_chain_weight) opt->min_chain_weight = 40;
+			if (strcmp(mode, "pbread1") == 0 || strcmp(mode, "pbread") == 0) {
+				opt->flag |= MEM_F_ALL | MEM_F_SELF_OVLP | MEM_F_ALN_REG;
+				if (!opt0.max_occ) opt->max_occ = 1000;
+				if (!opt0.min_seed_len) opt->min_seed_len = 13;
+				if (!opt0.max_chain_extend) opt->max_chain_extend = 25;
+				if (opt0.drop_ratio == 0.) opt->drop_ratio = .001;
+			} else {
+				if (!opt0.min_seed_len) opt->min_seed_len = 17;
+				if (!opt0.pen_clip5) opt->pen_clip5 = 0;
+				if (!opt0.pen_clip3) opt->pen_clip3 = 0;
+			}
+		} else {
+			fprintf(stderr, "[E::%s] unknown read type '%s'\n", __func__, mode);
+			return 1; // FIXME memory leak
+		}
+	} else update_a(opt, &opt0);
+//	if (opt->T < opt->min_HSP_score) opt->T = opt->min_HSP_score; // TODO: tie ->T to MEM_HSP_COEF
 	bwa_fill_scmat(opt->a, opt->b, opt->mat);
+
 	if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
 
 	ko = kopen(argv[optind + 1], &fd);
@@ -178,7 +226,8 @@ int main_mem(int argc, char *argv[])
 			opt->flag |= MEM_F_PE;
 		}
 	}
-	bwa_print_sam_hdr(idx->bns, rg_line);
+	if (!(opt->flag & MEM_F_ALN_REG))
+		bwa_print_sam_hdr(idx->bns, rg_line);
 	while ((seqs = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
 		int64_t size = 0;
 		if ((opt->flag & MEM_F_PE) && (n&1) == 1) {
@@ -196,7 +245,7 @@ int main_mem(int argc, char *argv[])
 		mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n_processed, n, seqs, pes0);
 		n_processed += n;
 		for (i = 0; i < n; ++i) {
-			err_fputs(seqs[i].sam, stdout);
+			if (seqs[i].sam) err_fputs(seqs[i].sam, stdout);
 			free(seqs[i].name); free(seqs[i].comment); free(seqs[i].seq); free(seqs[i].qual); free(seqs[i].sam);
 		}
 		free(seqs);
@@ -215,7 +264,7 @@ int main_mem(int argc, char *argv[])
 
 int main_fastmap(int argc, char *argv[])
 {
-	int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, split_width = 0;
+	int c, i, min_iwidth = 20, min_len = 17, print_seq = 0;
 	kseq_t *seq;
 	bwtint_t k;
 	gzFile fp;
@@ -223,9 +272,8 @@ int main_fastmap(int argc, char *argv[])
 	const bwtintv_v *a;
 	bwaidx_t *idx;
 
-	while ((c = getopt(argc, argv, "w:l:ps:")) >= 0) {
+	while ((c = getopt(argc, argv, "w:l:p")) >= 0) {
 		switch (c) {
-			case 's': split_width = atoi(optarg); break;
 			case 'p': print_seq = 1; break;
 			case 'w': min_iwidth = atoi(optarg); break;
 			case 'l': min_len = atoi(optarg); break;
@@ -233,7 +281,7 @@ int main_fastmap(int argc, char *argv[])
 		}
 	}
 	if (optind + 1 >= argc) {
-		fprintf(stderr, "Usage: bwa fastmap [-p] [-s splitWidth=%d] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\n", split_width, min_len, min_iwidth);
+		fprintf(stderr, "Usage: bwa fastmap [-p] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\n", min_len, min_iwidth);
 		return 1;
 	}
 
@@ -250,7 +298,7 @@ int main_fastmap(int argc, char *argv[])
 		for (i = 0; i < seq->seq.l; ++i)
 			seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
 		smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s);
-		while ((a = smem_next(itr, min_len<<1, split_width)) != 0) {
+		while ((a = smem_next(itr)) != 0) {
 			for (i = 0; i < a->n; ++i) {
 				bwtintv_t *p = &a->a[i];
 				if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
diff --git a/kbtree.h b/kbtree.h
index 2b76953..0da101d 100644
--- a/kbtree.h
+++ b/kbtree.h
@@ -77,7 +77,7 @@ typedef struct {
 			*top++ = (b)->root;											\
 			while (top != stack) {										\
 				x = *--top;												\
-				if (x->is_internal == 0) { free(x); continue; }			\
+				if (x == 0 || x->is_internal == 0) { free(x); continue; } \
 				for (i = 0; i <= x->n; ++i)								\
 					if (__KB_PTR(b, x)[i]) {							\
 						if (top - stack == max) {						\
diff --git a/ksw.c b/ksw.c
index bb055bb..9793e5e 100644
--- a/ksw.c
+++ b/ksw.c
@@ -25,6 +25,7 @@
 
 #include <stdlib.h>
 #include <stdint.h>
+#include <assert.h>
 #include <emmintrin.h>
 #include "ksw.h"
 
@@ -381,7 +382,7 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
 	eh_t *eh; // score array
 	int8_t *qp; // query profile
 	int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
-	if (h0 < 0) h0 = 0;
+	assert(h0 > 0);
 	// allocate memory
 	qp = malloc(qlen * m);
 	eh = calloc(qlen + 1, 8);
@@ -411,13 +412,15 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
 	for (i = 0; LIKELY(i < tlen); ++i) {
 		int t, f = 0, h1, m = 0, mj = -1;
 		int8_t *q = &qp[target[i] * qlen];
-		// compute the first column
-		h1 = h0 - (o_del + e_del * (i + 1));
-		if (h1 < 0) h1 = 0;
 		// apply the band and the constraint (if provided)
 		if (beg < i - w) beg = i - w;
 		if (end > i + w + 1) end = i + w + 1;
 		if (end > qlen) end = qlen;
+		// compute the first column
+		if (beg == 0) {
+			h1 = h0 - (o_del + e_del * (i + 1));
+			if (h1 < 0) h1 = 0;
+		} else h1 = 0;
 		for (j = beg; LIKELY(j < end); ++j) {
 			// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
 			// Similar to SSE2-SW, cells are computed in the following order:
@@ -425,20 +428,20 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
 			//   E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape
 			//   F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape
 			eh_t *p = &eh[j];
-			int h = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
+			int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
 			p->h = h1;          // set H(i,j-1) for the next row
-			h += q[j];
-			h = h > e? h : e;
+			M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M"
+			h = M > e? M : e;   // e and f are guaranteed to be non-negative, so h>=0 even if M<0
 			h = h > f? h : f;
 			h1 = h;             // save H(i,j) to h1 for the next column
 			mj = m > h? mj : j; // record the position where max score is achieved
 			m = m > h? m : h;   // m is stored at eh[mj+1]
-			t = h - oe_del;
+			t = M - oe_del;
 			t = t > 0? t : 0;
 			e -= e_del;
 			e = e > t? e : t;   // computed E(i+1,j)
 			p->e = e;           // save E(i+1,j) for the next row
-			t = h - oe_ins;
+			t = M - oe_ins;
 			t = t > 0? t : 0;
 			f -= e_ins;
 			f = f > t? f : t;   // computed F(i,j+1)
@@ -460,10 +463,10 @@ int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
 			}
 		}
 		// update beg and end for the next round
-		for (j = mj; j >= beg && eh[j].h; --j);
-		beg = j + 1;
-		for (j = mj + 2; j <= end && eh[j].h; ++j);
-		end = j;
+		for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j);
+		beg = j;
+		for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j);
+		end = j + 2 < qlen? j + 2 : qlen;
 		//beg = 0; end = qlen; // uncomment this line for debugging
 	}
 	free(eh); free(qp);
@@ -507,7 +510,7 @@ int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
 	if (n_cigar_) *n_cigar_ = 0;
 	// allocate memory
 	n_col = qlen < 2*w+1? qlen : 2*w+1; // maximum #columns of the backtrack matrix
-	z = malloc(n_col * tlen);
+	z = n_cigar_ && cigar_? malloc((long)n_col * tlen) : 0;
 	qp = malloc(qlen * m);
 	eh = calloc(qlen + 1, 8);
 	// generate the query profile
@@ -524,41 +527,60 @@ int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
 	for (i = 0; LIKELY(i < tlen); ++i) { // target sequence is in the outer loop
 		int32_t f = MINUS_INF, h1, beg, end, t;
 		int8_t *q = &qp[target[i] * qlen];
-		uint8_t *zi = &z[i * n_col];
 		beg = i > w? i - w : 0;
 		end = i + w + 1 < qlen? i + w + 1 : qlen; // only loop through [beg,end) of the query sequence
 		h1 = beg == 0? -(o_del + e_del * (i + 1)) : MINUS_INF;
-		for (j = beg; LIKELY(j < end); ++j) {
-			// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
-			// Cells are computed in the following order:
-			//   M(i,j)   = H(i-1,j-1) + S(i,j)
-			//   H(i,j)   = max{M(i,j), E(i,j), F(i,j)}
-			//   E(i+1,j) = max{M(i,j)-gapo, E(i,j)} - gape
-			//   F(i,j+1) = max{M(i,j)-gapo, F(i,j)} - gape
-			// We have to separate M(i,j); otherwise the direction may not be recorded correctly.
-			// However, a CIGAR like "10M3I3D10M" allowed by local() and extend() is disallowed by global().
-			// Such a CIGAR may occur, in theory, if mismatch_penalty > 2*gap_ext_penalty + 2*gap_open_penalty/k.
-			// In practice, this should happen very rarely given a reasonable scoring system.
-			eh_t *p = &eh[j];
-			int32_t h, m = p->h, e = p->e;
-			uint8_t d; // direction
-			p->h = h1;
-			m += q[j];
-			d = m >= e? 0 : 1;
-			h = m >= e? m : e;
-			d = h >= f? d : 2;
-			h = h >= f? h : f;
-			h1 = h;
-			t = m - oe_del;
-			e -= e_del;
-			d |= e > t? 1<<2 : 0;
-			e  = e > t? e    : t;
-			p->e = e;
-			t = m - oe_ins;
-			f -= e_ins;
-			d |= f > t? 2<<4 : 0; // if we want to halve the memory, use one bit only, instead of two
-			f  = f > t? f    : t;
-			zi[j - beg] = d; // z[i,j] keeps h for the current cell and e/f for the next cell
+		if (n_cigar_ && cigar_) {
+			uint8_t *zi = &z[(long)i * n_col];
+			for (j = beg; LIKELY(j < end); ++j) {
+				// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
+				// Cells are computed in the following order:
+				//   M(i,j)   = H(i-1,j-1) + S(i,j)
+				//   H(i,j)   = max{M(i,j), E(i,j), F(i,j)}
+				//   E(i+1,j) = max{M(i,j)-gapo, E(i,j)} - gape
+				//   F(i,j+1) = max{M(i,j)-gapo, F(i,j)} - gape
+				// We have to separate M(i,j); otherwise the direction may not be recorded correctly.
+				// However, a CIGAR like "10M3I3D10M" allowed by local() is disallowed by global().
+				// Such a CIGAR may occur, in theory, if mismatch_penalty > 2*gap_ext_penalty + 2*gap_open_penalty/k.
+				// In practice, this should happen very rarely given a reasonable scoring system.
+				eh_t *p = &eh[j];
+				int32_t h, m = p->h, e = p->e;
+				uint8_t d; // direction
+				p->h = h1;
+				m += q[j];
+				d = m >= e? 0 : 1;
+				h = m >= e? m : e;
+				d = h >= f? d : 2;
+				h = h >= f? h : f;
+				h1 = h;
+				t = m - oe_del;
+				e -= e_del;
+				d |= e > t? 1<<2 : 0;
+				e  = e > t? e    : t;
+				p->e = e;
+				t = m - oe_ins;
+				f -= e_ins;
+				d |= f > t? 2<<4 : 0; // if we want to halve the memory, use one bit only, instead of two
+				f  = f > t? f    : t;
+				zi[j - beg] = d; // z[i,j] keeps h for the current cell and e/f for the next cell
+			}
+		} else {
+			for (j = beg; LIKELY(j < end); ++j) {
+				eh_t *p = &eh[j];
+				int32_t h, m = p->h, e = p->e;
+				p->h = h1;
+				m += q[j];
+				h = m >= e? m : e;
+				h = h >= f? h : f;
+				h1 = h;
+				t = m - oe_del;
+				e -= e_del;
+				e  = e > t? e : t;
+				p->e = e;
+				t = m - oe_ins;
+				f -= e_ins;
+				f  = f > t? f : t;
+			}
 		}
 		eh[end].h = h1; eh[end].e = MINUS_INF;
 	}
@@ -568,7 +590,7 @@ int ksw_global2(int qlen, const uint8_t *query, int tlen, const uint8_t *target,
 		uint32_t *cigar = 0, tmp;
 		i = tlen - 1; k = (i + w + 1 < qlen? i + w + 1 : qlen) - 1; // (i,k) points to the last cell
 		while (i >= 0 && k >= 0) {
-			which = z[i * n_col + (k - (i > w? i - w : 0))] >> (which<<1) & 3;
+			which = z[(long)i * n_col + (k - (i > w? i - w : 0))] >> (which<<1) & 3;
 			if (which == 0)      cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k;
 			else if (which == 1) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i;
 			else                 cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k;
diff --git a/main.c b/main.c
index 0ae7978..b3ce197 100644
--- a/main.c
+++ b/main.c
@@ -4,7 +4,7 @@
 #include "utils.h"
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.7.8-r455"
+#define PACKAGE_VERSION "0.7.9a-r786"
 #endif
 
 int bwa_fa2pac(int argc, char *argv[]);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bwa.git