[med-svn] [bwa] 01/05: Imported Upstream version 0.7.13

Andreas Tille tille at debian.org
Mon Mar 14 12:47:46 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository bwa.

commit 11ce18b50c81cfdf58973d93cc1931d1c4daa659
Author: Andreas Tille <tille at debian.org>
Date:   Mon Mar 14 13:42:48 2016 +0100

    Imported Upstream version 0.7.13
---
 .gitignore            |   7 ++
 .travis.yml           |   5 +
 Makefile              |  15 ++-
 NEWS.md               |  29 +++++
 README-alt.md         |   2 +-
 bwa.1                 |   7 +-
 bwa.h                 |   7 ++
 bwakit/README.md      |   2 +-
 bwakit/bwa-postalt.js |   2 +-
 bwakit/run-gen-ref    |   2 +-
 bwamem.c              |  19 +--
 bwamem.h              |   2 -
 bwamem_extra.c        |  25 ----
 bwape.c               |   2 +-
 bwase.c               |   1 +
 bwtindex.c            |  91 ++++++++++-----
 fastmap.c             |  22 +---
 kthread.c             |  19 +--
 main.c                |   4 +-
 maxk.c                |  67 +++++++++++
 qualfa2fq.pl          |   2 +-
 rle.c                 | 191 ++++++++++++++++++++++++++++++
 rle.h                 |  77 ++++++++++++
 rope.c                | 318 ++++++++++++++++++++++++++++++++++++++++++++++++++
 rope.h                |  58 +++++++++
 xa2multi.pl           |   2 +-
 26 files changed, 864 insertions(+), 114 deletions(-)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fba548e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+*.[oa]
+bwa
+test
+test64
+.*.swp
+Makefile.bak
+bwamem-lite
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..e386b27
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,5 @@
+language: c
+compiler:
+  - gcc
+  - clang
+script: make
diff --git a/Makefile b/Makefile
index 1d6ae44..7151435 100644
--- a/Makefile
+++ b/Makefile
@@ -4,9 +4,10 @@ CFLAGS=		-g -Wall -Wno-unused-function -O2
 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
 AR=			ar
 DFLAGS=		-DHAVE_PTHREAD $(WRAP_MALLOC)
-LOBJS=		utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o
-AOBJS=		QSufSort.o bwt_gen.o bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
-			is.o bwtindex.o bwape.o kopen.o pemerge.o \
+LOBJS=		utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
+			QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
+AOBJS=		bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
+			bwape.o kopen.o pemerge.o maxk.o \
 			bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
 			bwtsw2_chain.o fastmap.o bwtsw2_pair.o
 PROG=		bwa
@@ -45,7 +46,8 @@ depend:
 QSufSort.o: QSufSort.h
 bamlite.o: bamlite.h malloc_wrap.h
 bntseq.o: bntseq.h utils.h kseq.h malloc_wrap.h khash.h
-bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kstring.h malloc_wrap.h kseq.h
+bwa.o: bntseq.h bwa.h bwt.h ksw.h utils.h kstring.h malloc_wrap.h kvec.h
+bwa.o: kseq.h
 bwamem.o: kstring.h malloc_wrap.h bwamem.h bwt.h bntseq.h bwa.h ksw.h kvec.h
 bwamem.o: ksort.h utils.h kbtree.h
 bwamem_extra.o: bwa.h bntseq.h bwt.h bwamem.h kstring.h malloc_wrap.h
@@ -62,7 +64,7 @@ bwt_gen.o: QSufSort.h malloc_wrap.h
 bwt_lite.o: bwt_lite.h malloc_wrap.h
 bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h
 bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h
-bwtindex.o: bntseq.h bwt.h utils.h malloc_wrap.h
+bwtindex.o: bntseq.h bwa.h bwt.h utils.h rle.h rope.h malloc_wrap.h
 bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h
 bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h
 bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h
@@ -79,5 +81,8 @@ kstring.o: kstring.h malloc_wrap.h
 ksw.o: ksw.h malloc_wrap.h
 main.o: kstring.h malloc_wrap.h utils.h
 malloc_wrap.o: malloc_wrap.h
+maxk.o: bwa.h bntseq.h bwt.h bwamem.h kseq.h malloc_wrap.h
 pemerge.o: ksw.h kseq.h malloc_wrap.h kstring.h bwa.h bntseq.h bwt.h utils.h
+rle.o: rle.h
+rope.o: rle.h rope.h
 utils.o: utils.h ksort.h malloc_wrap.h kseq.h
diff --git a/NEWS.md b/NEWS.md
index 4692889..15c2762 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,32 @@
+Release 0.7.13 (23 Feburary 2016)
+---------------------------------
+
+This release fixes a few minor bugs in the previous version and adds a few
+minor features. All BWA algorithms should produce identical output to 0.7.12
+when there are no ALT contigs.
+
+Detailed changes:
+
+ * Fixed a bug in "bwa-postalt.js". The old version may produce 0.5% of wrong
+   bases for reads mapped to the ALT contigs.
+
+ * Fixed a potential bug in the multithreading mode. It may occur when mapping
+   is much faster than file reading, which should almost never happen in
+   practice.
+
+ * Changed the download URL of GRCh38.
+
+ * Removed the read overlap mode. It is not working well.
+
+ * Added the ropebwt2 algorithm as an alternative to index large genomes.
+   Ropebwt2 is slower than the "bwtsw" algorithm, but it has a permissive
+   license. This allows us to create an Apache2-licensed BWA (in the "Apache2"
+   branch) for commercial users who are concerned with GPL.
+
+(0.7.13: 23 Feburary 2016, r1126)
+
+
+
 Release 0.7.12 (28 December 2014)
 ---------------------------------
 
diff --git a/README-alt.md b/README-alt.md
index 4f72042..058ab7a 100644
--- a/README-alt.md
+++ b/README-alt.md
@@ -2,7 +2,7 @@
 
 ```sh
 # Download bwakit (or from <http://sourceforge.net/projects/bio-bwa/files/bwakit/> manually)
-wget -O- http://sourceforge.net/projects/bio-bwa/files/bwakit/bwakit-0.7.11_x64-linux.tar.bz2/download \
+wget -O- http://sourceforge.net/projects/bio-bwa/files/bwakit/bwakit-0.7.12_x64-linux.tar.bz2/download \
   | gzip -dc | tar xf -
 # Generate the GRCh38+ALT+decoy+HLA and create the BWA index
 bwa.kit/run-gen-ref hs38DH   # download GRCh38 and write hs38DH.fa
diff --git a/bwa.1 b/bwa.1
index e30f382..994f96a 100644
--- a/bwa.1
+++ b/bwa.1
@@ -60,11 +60,12 @@ Index database sequences in the FASTA format.
 Prefix of the output database [same as db filename]
 .TP
 .BI -a \ STR
-Algorithm for constructing BWT index. BWA implements two algorithms for BWT
+Algorithm for constructing BWT index. BWA implements three algorithms for BWT
 construction:
-.B is
+.BR is ,
+.B bwtsw
 and
-.BR bwtsw .
+.BR rb2 .
 The first algorithm is a little faster for small database but requires large
 RAM and does not work for databases with total length longer than 2GB. The
 second algorithm is adapted from the BWT-SW source code. It in theory works
diff --git a/bwa.h b/bwa.h
index 1541c1c..aa21725 100644
--- a/bwa.h
+++ b/bwa.h
@@ -12,6 +12,11 @@
 
 #define BWA_CTL_SIZE 0x10000
 
+#define BWTALGO_AUTO  0
+#define BWTALGO_RB2   1
+#define BWTALGO_BWTSW 2
+#define BWTALGO_IS    3
+
 typedef struct {
 	bwt_t    *bwt; // FM-index
 	bntseq_t *bns; // information on the reference sequences
@@ -41,6 +46,8 @@ extern "C" {
 	uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
 	uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
 
+	int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size);
+
 	char *bwa_idx_infer_prefix(const char *hint);
 	bwt_t *bwa_idx_load_bwt(const char *hint);
 
diff --git a/bwakit/README.md b/bwakit/README.md
index b3a4033..b7a67ea 100644
--- a/bwakit/README.md
+++ b/bwakit/README.md
@@ -18,7 +18,7 @@ how to use bwakit:
 
 ```sh
 # Download the bwa-0.7.11 binary package (download link may change)
-wget -O- http://sourceforge.net/projects/bio-bwa/files/bwakit/bwakit-0.7.11_x64-linux.tar.bz2/download \
+wget -O- http://sourceforge.net/projects/bio-bwa/files/bwakit/bwakit-0.7.12_x64-linux.tar.bz2/download \
   | gzip -dc | tar xf -
 # Generate the GRCh38+ALT+decoy+HLA and create the BWA index
 bwa.kit/run-gen-ref hs38DH   # download GRCh38 and write hs38DH.fa
diff --git a/bwakit/bwa-postalt.js b/bwakit/bwa-postalt.js
index 5717045..bfc4190 100644
--- a/bwakit/bwa-postalt.js
+++ b/bwakit/bwa-postalt.js
@@ -78,7 +78,7 @@ Bytes.prototype.revcomp = function()
 		this[this.length - i - 1] = Bytes.rctab[this[i]];
 		this[i] = Bytes.rctab[tmp];
 	}
-	if (this.length>>1)
+	if (this.length&1)
 		this[this.length>>1] = Bytes.rctab[this[this.length>>1]];
 }
 
diff --git a/bwakit/run-gen-ref b/bwakit/run-gen-ref
index e129b62..3ed63b2 100755
--- a/bwakit/run-gen-ref
+++ b/bwakit/run-gen-ref
@@ -2,7 +2,7 @@
 
 root=`dirname $0`
 
-url38="ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/vertebrates_mammals/Homo_sapiens/GRCh38/seqs_for_alignment_pipelines/GCA_000001405.15_GRCh38_full_analysis_set.fna.gz"
+url38="ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_full_analysis_set.fna.gz"
 url37d5="ftp://ftp.ncbi.nlm.nih.gov/1000genomes/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz"
 
 if [ $# -eq 0 ]; then
diff --git a/bwamem.c b/bwamem.c
index b32c166..99a6d88 100644
--- a/bwamem.c
+++ b/bwamem.c
@@ -114,7 +114,7 @@ static void smem_aux_destroy(smem_aux_t *a)
 static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq, smem_aux_t *a)
 {
 	int i, k, x = 0, old_n;
-	int start_width = (opt->flag & MEM_F_SELF_OVLP)? 2 : 1;
+	int start_width = 1;
 	int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
 	a->mem.n = 0;
 	// first pass: find all SMEMs
@@ -488,13 +488,6 @@ int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_
 	return m;
 }
 
-int mem_test_and_remove_exact(const mem_opt_t *opt, int n, mem_alnreg_t *a, int qlen)
-{
-	if (!(opt->flag & MEM_F_SELF_OVLP) || n == 0 || a->truesc != qlen * opt->a) return n;
-	memmove(a, a + 1, (n - 1) * sizeof(mem_alnreg_t));
-	return n - 1;
-}
-
 typedef kvec_t(int) int_v;
 
 static void mem_mark_primary_se_core(const mem_opt_t *opt, int n, mem_alnreg_t *a, int_v *z)
@@ -1046,8 +1039,6 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse
 	}
 	free(chn.a);
 	regs.n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regs.n, regs.a);
-	if (opt->flag & MEM_F_SELF_OVLP)
-		regs.n = mem_test_and_remove_exact(opt, regs.n, regs.a, l_seq);
 	if (bwa_verbose >= 4) {
 		err_printf("* %ld chains remain after removing duplicated chains\n", regs.n);
 		for (i = 0; i < regs.n; ++i) {
@@ -1168,12 +1159,8 @@ static void worker2(void *data, int i, int tid)
 	worker_t *w = (worker_t*)data;
 	if (!(w->opt->flag&MEM_F_PE)) {
 		if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name);
-		if (w->opt->flag & MEM_F_ALN_REG) {
-			mem_reg2ovlp(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i]);
-		} else {
-			mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
-			mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
-		}
+		mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
+		mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
 		free(w->regs[i].a);
 	} else {
 		if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name);
diff --git a/bwamem.h b/bwamem.h
index 8ffe729..730b603 100644
--- a/bwamem.h
+++ b/bwamem.h
@@ -16,8 +16,6 @@ typedef struct __smem_i smem_i;
 #define MEM_F_ALL       0x8
 #define MEM_F_NO_MULTI  0x10
 #define MEM_F_NO_RESCUE 0x20
-#define MEM_F_SELF_OVLP 0x40
-#define MEM_F_ALN_REG   0x80
 #define MEM_F_REF_HDR	0x100
 #define MEM_F_SOFTCLIP  0x200
 #define MEM_F_SMARTPE   0x400
diff --git a/bwamem_extra.c b/bwamem_extra.c
index 02e817a..d2e37f4 100644
--- a/bwamem_extra.c
+++ b/bwamem_extra.c
@@ -87,31 +87,6 @@ mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *
 	return ar;
 }
 
-void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a)
-{
-	int i;
-	kstring_t str = {0,0,0};
-	for (i = 0; i < a->n; ++i) {
-		const mem_alnreg_t *p = &a->a[i];
-		int is_rev, rid, qb = p->qb, qe = p->qe;
-		int64_t pos, rb = p->rb, re = p->re;
-		pos = bns_depos(bns, rb < bns->l_pac? rb : re - 1, &is_rev);
-		rid = bns_pos2rid(bns, pos);
-		assert(rid == p->rid);
-		pos -= bns->anns[rid].offset;
-		kputs(s->name, &str); kputc('\t', &str);
-		kputw(s->l_seq, &str); kputc('\t', &str);
-		if (is_rev) qb ^= qe, qe ^= qb, qb ^= qe; // swap
-		kputw(qb, &str); kputc('\t', &str); kputw(qe, &str); kputc('\t', &str);
-		kputs(bns->anns[rid].name, &str); kputc('\t', &str);
-		kputw(bns->anns[rid].len, &str); kputc('\t', &str);
-		kputw(pos, &str); kputc('\t', &str); kputw(pos + (re - rb), &str); kputc('\t', &str);
-		ksprintf(&str, "%.3f", (double)p->truesc / opt->a / (qe - qb > re - rb? qe - qb : re - rb));
-		kputc('\n', &str);
-	}
-	s->sam = str.s;
-}
-
 static inline int get_pri_idx(double XA_drop_ratio, const mem_alnreg_t *a, int i)
 {
 	int k = a[i].secondary_all;
diff --git a/bwape.c b/bwape.c
index a5dc3ad..f2d3d8e 100644
--- a/bwape.c
+++ b/bwape.c
@@ -379,7 +379,7 @@ int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bw
 						bwt_multi1_t *q = p[j]->multi + k;
 						q->pos = bwa_sa2pos(bns, bwt, q->pos, p[j]->len + q->ref_shift, &strand);
 						q->strand = strand;
-						if (q->pos != p[j]->pos)
+						if (q->pos != p[j]->pos && q->pos != (bwtint_t)-1)
 							p[j]->multi[n_multi++] = *q;
 					}
 					p[j]->n_multi = n_multi;
diff --git a/bwase.c b/bwase.c
index cb912ec..77c50db 100644
--- a/bwase.c
+++ b/bwase.c
@@ -113,6 +113,7 @@ bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int r
 {
 	bwtint_t pos_f;
 	int is_rev;
+	*strand = 0; // initialise strand to 0 otherwise we could return without setting it
 	pos_f = bwt_sa(bwt, sapos); // position on the forward-reverse coordinate
 	if (pos_f < bns->l_pac && bns->l_pac < pos_f + ref_len) return (bwtint_t)-1;
 	pos_f = bns_depos(bns, pos_f, &is_rev); // position on the forward strand; this may be the first base or the last base
diff --git a/bwtindex.c b/bwtindex.c
index 2bfd667..fb32231 100644
--- a/bwtindex.c
+++ b/bwtindex.c
@@ -32,8 +32,11 @@
 #include <time.h>
 #include <zlib.h>
 #include "bntseq.h"
+#include "bwa.h"
 #include "bwt.h"
 #include "utils.h"
+#include "rle.h"
+#include "rope.h"
 
 #ifdef _DIVBWT
 #include "divsufsort.h"
@@ -63,7 +66,7 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
 {
 	bwt_t *bwt;
 	ubyte_t *buf, *buf2;
-	int i, pac_size;
+	int64_t i, pac_size;
 	FILE *fp;
 
 	// initialization
@@ -90,11 +93,31 @@ bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
 	if (use_is) {
 		bwt->primary = is_bwt(buf, bwt->seq_len);
 	} else {
-#ifdef _DIVBWT
-		bwt->primary = divbwt(buf, buf, 0, bwt->seq_len);
-#else
-		err_fatal_simple("libdivsufsort is not compiled in.");
-#endif
+		rope_t *r;
+		int64_t x;
+		rpitr_t itr;
+		const uint8_t *blk;
+
+		r = rope_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN);
+		for (i = bwt->seq_len - 1, x = 0; i >= 0; --i) {
+			int c = buf[i] + 1;
+			x = rope_insert_run(r, x, c, 1, 0) + 1;
+			while (--c >= 0) x += r->c[c];
+		}
+		bwt->primary = x;
+		rope_itr_first(r, &itr);
+		x = 0;
+		while ((blk = rope_itr_next_block(&itr)) != 0) {
+			const uint8_t *q = blk + 2, *end = blk + 2 + *rle_nptr(blk);
+			while (q < end) {
+				int c = 0;
+				int64_t l;
+				rle_dec1(q, c, l);
+				for (i = 0; i < l; ++i)
+					buf[x++] = c - 1;
+			}
+		}
+		rope_destroy(r);
 	}
 	bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4);
 	for (i = 0; i < bwt->seq_len; ++i)
@@ -186,19 +209,14 @@ int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command
 
 int bwa_index(int argc, char *argv[]) // the "index" command
 {
-	extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
-
-	char *prefix = 0, *str, *str2, *str3;
-	int c, algo_type = 0, is_64 = 0, block_size = 10000000;
-	clock_t t;
-	int64_t l_pac;
-
+	int c, algo_type = BWTALGO_AUTO, is_64 = 0, block_size = 10000000;
+	char *prefix = 0, *str;
 	while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
 		switch (c) {
 		case 'a': // if -a is not set, algo_type will be determined later
-			if (strcmp(optarg, "div") == 0) algo_type = 1;
-			else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
-			else if (strcmp(optarg, "is") == 0) algo_type = 3;
+			if (strcmp(optarg, "rb2") == 0) algo_type = BWTALGO_RB2;
+			else if (strcmp(optarg, "bwtsw") == 0) algo_type = BWTALGO_BWTSW;
+			else if (strcmp(optarg, "is") == 0) algo_type = BWTALGO_IS;
 			else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
 			break;
 		case 'p': prefix = strdup(optarg); break;
@@ -216,7 +234,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
 	if (optind + 1 > argc) {
 		fprintf(stderr, "\n");
 		fprintf(stderr, "Usage:   bwa index [options] <in.fasta>\n\n");
-		fprintf(stderr, "Options: -a STR    BWT construction algorithm: bwtsw or is [auto]\n");
+		fprintf(stderr, "Options: -a STR    BWT construction algorithm: bwtsw, is or rb2 [auto]\n");
 		fprintf(stderr, "         -p STR    prefix of the index [same as fasta name]\n");
 		fprintf(stderr, "         -b INT    block size for the bwtsw algorithm (effective with -a bwtsw) [%d]\n", block_size);
 		fprintf(stderr, "         -6        index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
@@ -230,16 +248,29 @@ int bwa_index(int argc, char *argv[]) // the "index" command
 		strcpy(prefix, argv[optind]);
 		if (is_64) strcat(prefix, ".64");
 	}
+	bwa_idx_build(argv[optind], prefix, algo_type, block_size);
+	free(prefix);
+	return 0;
+}
+
+int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size)
+{
+	extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
+
+	char *str, *str2, *str3;
+	clock_t t;
+	int64_t l_pac;
+
 	str  = (char*)calloc(strlen(prefix) + 10, 1);
 	str2 = (char*)calloc(strlen(prefix) + 10, 1);
 	str3 = (char*)calloc(strlen(prefix) + 10, 1);
 
 	{ // nucleotide indexing
-		gzFile fp = xzopen(argv[optind], "r");
+		gzFile fp = xzopen(fa, "r");
 		t = clock();
-		fprintf(stderr, "[bwa_index] Pack FASTA... ");
+		if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack FASTA... ");
 		l_pac = bns_fasta2bntseq(fp, prefix, 0);
-		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
 		err_gzclose(fp);
 	}
 	if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
@@ -247,7 +278,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
 		strcpy(str, prefix); strcat(str, ".pac");
 		strcpy(str2, prefix); strcat(str2, ".bwt");
 		t = clock();
-		fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
+		if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
 		if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
 		else if (algo_type == 1 || algo_type == 3) {
 			bwt_t *bwt;
@@ -255,25 +286,25 @@ int bwa_index(int argc, char *argv[]) // the "index" command
 			bwt_dump_bwt(str2, bwt);
 			bwt_destroy(bwt);
 		}
-		fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
 	}
 	{
 		bwt_t *bwt;
 		strcpy(str, prefix); strcat(str, ".bwt");
 		t = clock();
-		fprintf(stderr, "[bwa_index] Update BWT... ");
+		if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Update BWT... ");
 		bwt = bwt_restore_bwt(str);
 		bwt_bwtupdate_core(bwt);
 		bwt_dump_bwt(str, bwt);
 		bwt_destroy(bwt);
-		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
 	}
 	{
-		gzFile fp = xzopen(argv[optind], "r");
+		gzFile fp = xzopen(fa, "r");
 		t = clock();
-		fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
+		if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
 		l_pac = bns_fasta2bntseq(fp, prefix, 1);
-		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
 		err_gzclose(fp);
 	}
 	{
@@ -281,13 +312,13 @@ int bwa_index(int argc, char *argv[]) // the "index" command
 		strcpy(str, prefix); strcat(str, ".bwt");
 		strcpy(str3, prefix); strcat(str3, ".sa");
 		t = clock();
-		fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
+		if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
 		bwt = bwt_restore_bwt(str);
 		bwt_cal_sa(bwt, 32);
 		bwt_dump_sa(str3, bwt);
 		bwt_destroy(bwt);
-		fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
+		if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
 	}
-	free(str3); free(str2); free(str); free(prefix);
+	free(str3); free(str2); free(str);
 	return 0;
 }
diff --git a/fastmap.c b/fastmap.c
index 3cca4de..1e49ccb 100644
--- a/fastmap.c
+++ b/fastmap.c
@@ -130,7 +130,7 @@ int main_mem(int argc, char *argv[])
 
 	aux.opt = opt = mem_opt_init();
 	memset(&opt0, 0, sizeof(mem_opt_t));
-	while ((c = getopt(argc, argv, "1epaFMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) {
+	while ((c = getopt(argc, argv, "1paMCSPVYjk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:G:h:y:K:X:H:")) >= 0) {
 		if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
 		else if (c == '1') no_mt_io = 1;
 		else if (c == 'x') mode = optarg;
@@ -145,8 +145,6 @@ int main_mem(int argc, char *argv[])
 		else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE;
 		else if (c == 'M') opt->flag |= MEM_F_NO_MULTI;
 		else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE;
-		else if (c == 'e') opt->flag |= MEM_F_SELF_OVLP;
-		else if (c == 'F') opt->flag |= MEM_F_ALN_REG;
 		else if (c == 'Y') opt->flag |= MEM_F_SOFTCLIP;
 		else if (c == 'V') opt->flag |= MEM_F_REF_HDR;
 		else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1;
@@ -251,7 +249,6 @@ int main_mem(int argc, char *argv[])
 		fprintf(stderr, "       -m INT        perform at most INT rounds of mate rescues for each read [%d]\n", opt->max_matesw);
 		fprintf(stderr, "       -S            skip mate rescue\n");
 		fprintf(stderr, "       -P            skip pairing; mate rescue performed unless -S also in use\n");
-		fprintf(stderr, "       -e            discard full-length exact matches\n");
 		fprintf(stderr, "\nScoring options:\n\n");
 		fprintf(stderr, "       -A INT        score for a sequence match, which scales options -TdBOELU unless overridden [%d]\n", opt->a);
 		fprintf(stderr, "       -B INT        penalty for a mismatch [%d]\n", opt->b);
@@ -263,7 +260,6 @@ int main_mem(int argc, char *argv[])
 		fprintf(stderr, "                     pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0  (PacBio reads to ref)\n");
 		fprintf(stderr, "                     ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0  (Oxford Nanopore 2D-reads to ref)\n");
 		fprintf(stderr, "                     intractg: -B9 -O16 -L5  (intra-species contigs to ref)\n");
-//		fprintf(stderr, "                     pbread: -k13 -W40 -c1000 -r10 -A1 -B1 -O1 -E1 -N25 -FeaD.001\n");
 		fprintf(stderr, "\nInput/output options:\n\n");
 		fprintf(stderr, "       -p            smart pairing (ignoring in2.fq)\n");
 		fprintf(stderr, "       -R STR        read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n");
@@ -296,21 +292,14 @@ int main_mem(int argc, char *argv[])
 			if (!opt0.b) opt->b = 9;
 			if (!opt0.pen_clip5) opt->pen_clip5 = 5;
 			if (!opt0.pen_clip3) opt->pen_clip3 = 5;
-		} else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "pbread") == 0 || strcmp(mode, "ont2d") == 0) {
+		} else if (strcmp(mode, "pacbio") == 0 || strcmp(mode, "pbref") == 0 || strcmp(mode, "ont2d") == 0) {
 			if (!opt0.o_del) opt->o_del = 1;
 			if (!opt0.e_del) opt->e_del = 1;
 			if (!opt0.o_ins) opt->o_ins = 1;
 			if (!opt0.e_ins) opt->e_ins = 1;
 			if (!opt0.b) opt->b = 1;
 			if (opt0.split_factor == 0.) opt->split_factor = 10.;
-			if (strcmp(mode, "pbread") == 0) { // pacbio read-to-read setting; NOT working well!
-				opt->flag |= MEM_F_ALL | MEM_F_SELF_OVLP | MEM_F_ALN_REG;
-				if (!opt0.min_chain_weight) opt->min_chain_weight = 40;
-				if (!opt0.max_occ) opt->max_occ = 1000;
-				if (!opt0.min_seed_len) opt->min_seed_len = 13;
-				if (!opt0.max_chain_extend) opt->max_chain_extend = 25;
-				if (opt0.drop_ratio == 0.) opt->drop_ratio = .001;
-			} else if (strcmp(mode, "ont2d") == 0) {
+			if (strcmp(mode, "ont2d") == 0) {
 				if (!opt0.min_chain_weight) opt->min_chain_weight = 20;
 				if (!opt0.min_seed_len) opt->min_seed_len = 14;
 				if (!opt0.pen_clip5) opt->pen_clip5 = 0;
@@ -359,8 +348,7 @@ int main_mem(int argc, char *argv[])
 			opt->flag |= MEM_F_PE;
 		}
 	}
-	if (!(opt->flag & MEM_F_ALN_REG))
-		bwa_print_sam_hdr(aux.idx->bns, hdr_line);
+	bwa_print_sam_hdr(aux.idx->bns, hdr_line);
 	aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
 	kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3);
 	free(hdr_line);
@@ -403,7 +391,7 @@ int main_fastmap(int argc, char *argv[])
 		fprintf(stderr, "Options: -l INT    min SMEM length to output [%d]\n", min_len);
 		fprintf(stderr, "         -w INT    max interval size to find coordiantes [%d]\n", min_iwidth);
 		fprintf(stderr, "         -i INT    min SMEM interval size [%d]\n", min_intv);
-		fprintf(stderr, "         -l INT    max MEM length [%d]\n", max_len);
+		fprintf(stderr, "         -L INT    max MEM length [%d]\n", max_len);
 		fprintf(stderr, "         -I INT    stop if MEM is longer than -l with a size less than INT [%ld]\n", (long)max_intv);
 		fprintf(stderr, "\n");
 		return 1;
diff --git a/kthread.c b/kthread.c
index 80f84cb..1b13494 100644
--- a/kthread.c
+++ b/kthread.c
@@ -67,13 +67,15 @@ struct ktp_t;
 
 typedef struct {
 	struct ktp_t *pl;
-	int step, running;
+	int64_t index;
+	int step;
 	void *data;
 } ktp_worker_t;
 
 typedef struct ktp_t {
 	void *shared;
 	void *(*func)(void*, int, void*);
+	int64_t index;
 	int n_workers, n_steps;
 	ktp_worker_t *workers;
 	pthread_mutex_t mutex;
@@ -92,13 +94,12 @@ static void *ktp_worker(void *data)
 			// test whether another worker is doing the same step
 			for (i = 0; i < p->n_workers; ++i) {
 				if (w == &p->workers[i]) continue; // ignore itself
-				if (p->workers[i].running && p->workers[i].step == w->step)
+				if (p->workers[i].step <= w->step && p->workers[i].index < w->index)
 					break;
 			}
-			if (i == p->n_workers) break; // no other workers doing w->step; then this worker will
+			if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps
 			pthread_cond_wait(&p->cv, &p->mutex);
 		}
-		w->running = 1;
 		pthread_mutex_unlock(&p->mutex);
 
 		// working on w->step
@@ -107,7 +108,7 @@ static void *ktp_worker(void *data)
 		// update step and let other workers know
 		pthread_mutex_lock(&p->mutex);
 		w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps;
-		w->running = 0;
+		if (w->step == 0) w->index = p->index++;
 		pthread_cond_broadcast(&p->cv);
 		pthread_mutex_unlock(&p->mutex);
 	}
@@ -125,16 +126,18 @@ void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_d
 	aux.n_steps = n_steps;
 	aux.func = func;
 	aux.shared = shared_data;
+	aux.index = 0;
 	pthread_mutex_init(&aux.mutex, 0);
 	pthread_cond_init(&aux.cv, 0);
 
-	aux.workers = alloca(n_threads * sizeof(ktp_worker_t));
+	aux.workers = (ktp_worker_t*)alloca(n_threads * sizeof(ktp_worker_t));
 	for (i = 0; i < n_threads; ++i) {
 		ktp_worker_t *w = &aux.workers[i];
-		w->step = w->running = 0; w->pl = &aux; w->data = 0;
+		w->step = 0; w->pl = &aux; w->data = 0;
+		w->index = aux.index++;
 	}
 
-	tid = alloca(n_threads * sizeof(pthread_t));
+	tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t));
 	for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]);
 	for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
 
diff --git a/main.c b/main.c
index 9f776d4..a5ae5fa 100644
--- a/main.c
+++ b/main.c
@@ -4,7 +4,7 @@
 #include "utils.h"
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.7.12-r1039"
+#define PACKAGE_VERSION "0.7.13-r1126"
 #endif
 
 int bwa_fa2pac(int argc, char *argv[]);
@@ -25,6 +25,7 @@ int main_mem(int argc, char *argv[]);
 int main_shm(int argc, char *argv[]);
 
 int main_pemerge(int argc, char *argv[]);
+int main_maxk(int argc, char *argv[]);
 	
 static int usage()
 {
@@ -84,6 +85,7 @@ int main(int argc, char *argv[])
 	else if (strcmp(argv[1], "mem") == 0) ret = main_mem(argc-1, argv+1);
 	else if (strcmp(argv[1], "shm") == 0) ret = main_shm(argc-1, argv+1);
 	else if (strcmp(argv[1], "pemerge") == 0) ret = main_pemerge(argc-1, argv+1);
+	else if (strcmp(argv[1], "maxk") == 0) ret = main_maxk(argc-1, argv+1);
 	else {
 		fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
 		return 1;
diff --git a/maxk.c b/maxk.c
new file mode 100644
index 0000000..fee5765
--- /dev/null
+++ b/maxk.c
@@ -0,0 +1,67 @@
+#include <zlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+#include "bwa.h"
+#include "bwamem.h"
+#include "kseq.h"
+KSEQ_DECLARE(gzFile)
+
+int main_maxk(int argc, char *argv[])
+{
+	int i, c, self = 0, max_len = 0;
+	uint8_t *cnt = 0;
+	uint64_t hist[256];
+	bwt_t *bwt;
+	kseq_t *ks;
+	smem_i *itr;
+	gzFile fp;
+
+	while ((c = getopt(argc, argv, "s")) >= 0) {
+		if (c == 's') self = 1;
+	}
+	if (optind + 2 > argc) {
+		fprintf(stderr, "Usage: bwa maxk [-s] <index.prefix> <seq.fa>\n");
+		return 1;
+	}
+	fp = strcmp(argv[optind+1], "-")? gzopen(argv[optind+1], "rb") : gzdopen(fileno(stdin), "rb");
+	ks = kseq_init(fp);
+	bwt = bwt_restore_bwt(argv[optind]);
+	itr = smem_itr_init(bwt);
+	if (self) smem_config(itr, 2, INT_MAX, 0);
+	memset(hist, 0, 8 * 256);
+
+	while (kseq_read(ks) >= 0) {
+		const bwtintv_v *a;
+		if (ks->seq.l > max_len) {
+			max_len = ks->seq.l;
+			kroundup32(max_len);
+			cnt = realloc(cnt, max_len);
+		}
+		memset(cnt, 0, ks->seq.l);
+		for (i = 0; i < ks->seq.l; ++i)
+			ks->seq.s[i] = nst_nt4_table[(int)ks->seq.s[i]];
+		smem_set_query(itr, ks->seq.l, (uint8_t*)ks->seq.s);
+		while ((a = smem_next(itr)) != 0) {
+			for (i = 0; i < a->n; ++i) {
+				bwtintv_t *p = &a->a[i];
+				int j, l, start = p->info>>32, end = (uint32_t)p->info;
+				l = end - start < 255? end - start : 255;
+				for (j = start; j < end; ++j)
+					cnt[j] = cnt[j] > l? cnt[j] : l;
+			}
+		}
+		for (i = 0; i < ks->seq.l; ++i) ++hist[cnt[i]];
+	}
+	for (i = 0; i < 256; ++i)
+		printf("%d\t%lld\n", i, (long long)hist[i]);
+	free(cnt);
+
+	smem_itr_destroy(itr);
+	bwt_destroy(bwt);
+	kseq_destroy(ks);
+	gzclose(fp);
+	return 0;
+}
diff --git a/qualfa2fq.pl b/qualfa2fq.pl
index 31e1974..2fcb280 100755
--- a/qualfa2fq.pl
+++ b/qualfa2fq.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 
 use strict;
 use warnings;
diff --git a/rle.c b/rle.c
new file mode 100644
index 0000000..221e1cd
--- /dev/null
+++ b/rle.c
@@ -0,0 +1,191 @@
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "rle.h"
+
+const uint8_t rle_auxtab[8] = { 0x01, 0x11, 0x21, 0x31, 0x03, 0x13, 0x07, 0x17 };
+
+// insert symbol $a after $x symbols in $str; marginal counts added to $cnt; returns the size increase
+int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6])
+{
+	uint16_t *nptr = (uint16_t*)block;
+	int diff;
+
+	block += 2; // skip the first 2 counting bytes
+	if (*nptr == 0) {
+		memset(cnt, 0, 48);
+		diff = rle_enc1(block, a, rl);
+	} else {
+		uint8_t *p, *end = block + *nptr, *q;
+		int64_t pre, z, l = 0, tot, beg_l;
+		int c = -1, n_bytes = 0, n_bytes2, t = 0;
+		uint8_t tmp[24];
+		beg_l = bc[0] + bc[1] + bc[2] + bc[3] + bc[4] + bc[5];
+		tot   = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5];
+		if (x < beg_l) {
+			beg_l = 0, *beg = 0;
+			memset(bc, 0, 48);
+		}
+		if (x == beg_l) {
+			p = q = block + (*beg); z = beg_l;
+			memcpy(cnt, bc, 48);
+		} else if (x - beg_l <= ((tot-beg_l)>>1) + ((tot-beg_l)>>3)) { // forward
+			z = beg_l; p = block + (*beg);
+			memcpy(cnt, bc, 48);
+			while (z < x) {
+				rle_dec1(p, c, l);
+				z += l; cnt[c] += l;
+			}
+			for (q = p - 1; *q>>6 == 2; --q);
+		} else { // backward
+			memcpy(cnt, ec, 48);
+			z = tot; p = end;
+			while (z >= x) {
+				--p;
+				if (*p>>6 != 2) {
+					l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3;
+					z -= l; cnt[*p&7] -= l;
+					l = 0; t = 0;
+				} else {
+					l |= (*p&0x3fL) << t;
+					t += 6;
+				}
+			}
+			q = p;
+			rle_dec1(p, c, l);
+			z += l; cnt[c] += l;
+		}
+		*beg = q - block;
+		memcpy(bc, cnt, 48);
+		bc[c] -= l;
+		n_bytes = p - q;
+		if (x == z && a != c && p < end) { // then try the next run
+			int tc;
+			int64_t tl;
+			q = p;
+			rle_dec1(q, tc, tl);
+			if (a == tc)
+				c = tc, n_bytes = q - p, l = tl, z += l, p = q, cnt[tc] += tl;
+		}
+		if (z != x) cnt[c] -= z - x;
+		pre = x - (z - l); p -= n_bytes;
+		if (a == c) { // insert to the same run
+			n_bytes2 = rle_enc1(tmp, c, l + rl);
+		} else if (x == z) { // at the end; append to the existing run
+			p += n_bytes; n_bytes = 0;
+			n_bytes2 = rle_enc1(tmp, a, rl);
+		} else { // break the current run
+			n_bytes2 = rle_enc1(tmp, c, pre);
+			n_bytes2 += rle_enc1(tmp + n_bytes2, a, rl);
+			n_bytes2 += rle_enc1(tmp + n_bytes2, c, l - pre);
+		}
+		if (n_bytes != n_bytes2 && end != p + n_bytes) // size changed
+			memmove(p + n_bytes2, p + n_bytes, end - p - n_bytes);
+		memcpy(p, tmp, n_bytes2);
+		diff = n_bytes2 - n_bytes;
+	}
+	return (*nptr += diff);
+}
+
+int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6])
+{
+	int beg = 0;
+	int64_t bc[6];
+	memset(bc, 0, 48);
+	return rle_insert_cached(block, x, a, rl, cnt, ec, &beg, bc);
+}
+
+void rle_split(uint8_t *block, uint8_t *new_block)
+{
+	int n = *(uint16_t*)block;
+	uint8_t *end = block + 2 + n, *q = block + 2 + (n>>1);
+	while (*q>>6 == 2) --q;
+	memcpy(new_block + 2, q, end - q);
+	*(uint16_t*)new_block = end - q;
+	*(uint16_t*)block = q - block - 2;
+}
+
+void rle_count(const uint8_t *block, int64_t cnt[6])
+{
+	const uint8_t *q = block + 2, *end = q + *(uint16_t*)block;
+	while (q < end) {
+		int c;
+		int64_t l;
+		rle_dec1(q, c, l);
+		cnt[c] += l;
+	}
+}
+
+void rle_print(const uint8_t *block, int expand)
+{
+	const uint16_t *p = (const uint16_t*)block;
+	const uint8_t *q = block + 2, *end = block + 2 + *p;
+	while (q < end) {
+		int c;
+		int64_t l, x;
+		rle_dec1(q, c, l);
+		if (expand) for (x = 0; x < l; ++x) putchar("$ACGTN"[c]);
+		else printf("%c%ld", "$ACGTN"[c], (long)l);
+	}
+	putchar('\n');
+}
+
+void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6])
+{
+	int a;
+	int64_t tot, cnt[6];
+	const uint8_t *p;
+
+	y = y >= x? y : x;
+	tot = ec[0] + ec[1] + ec[2] + ec[3] + ec[4] + ec[5];
+	if (tot == 0) return;
+	if (x <= (tot - y) + (tot>>3)) {
+		int c = 0;
+		int64_t l, z = 0;
+		memset(cnt, 0, 48);
+		p = block + 2;
+		while (z < x) {
+			rle_dec1(p, c, l);
+			z += l; cnt[c] += l;
+		}
+		for (a = 0; a != 6; ++a) cx[a] += cnt[a];
+		cx[c] -= z - x;
+		if (cy) {
+			while (z < y) {
+				rle_dec1(p, c, l);
+				z += l; cnt[c] += l;
+			}
+			for (a = 0; a != 6; ++a) cy[a] += cnt[a];
+			cy[c] -= z - y;
+		}
+	} else {
+#define move_backward(_x) \
+		while (z >= (_x)) { \
+			--p; \
+			if (*p>>6 != 2) { \
+				l |= *p>>7? (int64_t)rle_auxtab[*p>>3&7]>>4 << t : *p>>3; \
+				z -= l; cnt[*p&7] -= l; \
+				l = 0; t = 0; \
+			} else { \
+				l |= (*p&0x3fL) << t; \
+				t += 6; \
+			} \
+		} \
+
+		int t = 0;
+		int64_t l = 0, z = tot;
+		memcpy(cnt, ec, 48);
+		p = block + 2 + *(const uint16_t*)block;
+		if (cy) {
+			move_backward(y)
+			for (a = 0; a != 6; ++a) cy[a] += cnt[a];
+			cy[*p&7] += y - z;
+		}
+		move_backward(x)
+		for (a = 0; a != 6; ++a) cx[a] += cnt[a];
+		cx[*p&7] += x - z;
+
+#undef move_backward
+	}
+}
diff --git a/rle.h b/rle.h
new file mode 100644
index 0000000..0d59484
--- /dev/null
+++ b/rle.h
@@ -0,0 +1,77 @@
+#ifndef RLE6_H_
+#define RLE6_H_
+
+#include <stdint.h>
+
+#ifdef __GNUC__
+#define LIKELY(x) __builtin_expect((x),1)
+#else
+#define LIKELY(x) (x)
+#endif
+#ifdef __cplusplus
+
+extern "C" {
+#endif
+
+	int rle_insert_cached(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t ec[6], int *beg, int64_t bc[6]);
+	int rle_insert(uint8_t *block, int64_t x, int a, int64_t rl, int64_t cnt[6], const int64_t end_cnt[6]);
+	void rle_split(uint8_t *block, uint8_t *new_block);
+	void rle_count(const uint8_t *block, int64_t cnt[6]);
+	void rle_rank2a(const uint8_t *block, int64_t x, int64_t y, int64_t *cx, int64_t *cy, const int64_t ec[6]);
+	#define rle_rank1a(block, x, cx, ec) rle_rank2a(block, x, -1, cx, 0, ec)
+
+	void rle_print(const uint8_t *block, int expand);
+
+#ifdef __cplusplus
+}
+#endif
+
+/******************
+ *** 43+3 codec ***
+ ******************/
+
+const uint8_t rle_auxtab[8];
+
+#define RLE_MIN_SPACE 18
+#define rle_nptr(block) ((uint16_t*)(block))
+
+// decode one run (c,l) and move the pointer p
+#define rle_dec1(p, c, l) do { \
+		(c) = *(p) & 7; \
+		if (LIKELY((*(p)&0x80) == 0)) { \
+			(l) = *(p)++ >> 3; \
+		} else if (LIKELY(*(p)>>5 == 6)) { \
+			(l) = (*(p)&0x18L)<<3L | ((p)[1]&0x3fL); \
+			(p) += 2; \
+		} else { \
+			int n = ((*(p)&0x10) >> 2) + 4; \
+			(l) = *(p)++ >> 3 & 1; \
+			while (--n) (l) = ((l)<<6) | (*(p)++&0x3fL); \
+		} \
+	} while (0)
+
+static inline int rle_enc1(uint8_t *p, int c, int64_t l)
+{
+	if (l < 1LL<<4) {
+		*p = l << 3 | c;
+		return 1;
+	} else if (l < 1LL<<8) {
+		*p = 0xC0 | l >> 6 << 3 | c;
+		p[1] = 0x80 | (l & 0x3f);
+		return 2;
+	} else if (l < 1LL<<19) {
+		*p = 0xE0 | l >> 18 << 3 | c;
+		p[1] = 0x80 | (l >> 12 & 0x3f);
+		p[2] = 0x80 | (l >> 6 & 0x3f);
+		p[3] = 0x80 | (l & 0x3f);
+		return 4;
+	} else {
+		int i, shift = 36;
+		*p = 0xF0 | l >> 42 << 3 | c;
+		for (i = 1; i < 8; ++i, shift -= 6)
+			p[i] = 0x80 | (l>>shift & 0x3f);
+		return 8;
+	}
+}
+
+#endif
diff --git a/rope.c b/rope.c
new file mode 100644
index 0000000..3d85981
--- /dev/null
+++ b/rope.c
@@ -0,0 +1,318 @@
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include <zlib.h>
+#include "rle.h"
+#include "rope.h"
+
+/*******************
+ *** Memory Pool ***
+ *******************/
+
+#define MP_CHUNK_SIZE 0x100000 // 1MB per chunk
+
+typedef struct { // memory pool for fast and compact memory allocation (no free)
+	int size, i, n_elems;
+	int64_t top, max;
+	uint8_t **mem;
+} mempool_t;
+
+static mempool_t *mp_init(int size)
+{
+	mempool_t *mp;
+	mp = calloc(1, sizeof(mempool_t));
+	mp->size = size;
+	mp->i = mp->n_elems = MP_CHUNK_SIZE / size;
+	mp->top = -1;
+	return mp;
+}
+
+static void mp_destroy(mempool_t *mp)
+{
+	int64_t i;
+	for (i = 0; i <= mp->top; ++i) free(mp->mem[i]);
+	free(mp->mem); free(mp);
+}
+
+static inline void *mp_alloc(mempool_t *mp)
+{
+	if (mp->i == mp->n_elems) {
+		if (++mp->top == mp->max) {
+			mp->max = mp->max? mp->max<<1 : 1;
+			mp->mem = realloc(mp->mem, sizeof(void*) * mp->max);
+		}
+		mp->mem[mp->top] = calloc(mp->n_elems, mp->size);
+		mp->i = 0;
+	}
+	return mp->mem[mp->top] + (mp->i++) * mp->size;
+}
+
+/***************
+ *** B+ rope ***
+ ***************/
+
+rope_t *rope_init(int max_nodes, int block_len)
+{
+	rope_t *rope;
+	rope = calloc(1, sizeof(rope_t));
+	if (block_len < 32) block_len = 32;
+	rope->max_nodes = (max_nodes+ 1)>>1<<1;
+	rope->block_len = (block_len + 7) >> 3 << 3;
+	rope->node = mp_init(sizeof(rpnode_t) * rope->max_nodes);
+	rope->leaf = mp_init(rope->block_len);
+	rope->root = mp_alloc(rope->node);
+	rope->root->n = 1;
+	rope->root->is_bottom = 1;
+	rope->root->p = mp_alloc(rope->leaf);
+	return rope;
+}
+
+void rope_destroy(rope_t *rope)
+{
+	mp_destroy(rope->node);
+	mp_destroy(rope->leaf);
+	free(rope);
+}
+
+static inline rpnode_t *split_node(rope_t *rope, rpnode_t *u, rpnode_t *v)
+{ // split $v's child. $u is the first node in the bucket. $v and $u are in the same bucket. IMPORTANT: there is always enough room in $u
+	int j, i = v - u;
+	rpnode_t *w; // $w is the sibling of $v
+	if (u == 0) { // only happens at the root; add a new root
+		u = v = mp_alloc(rope->node);
+		v->n = 1; v->p = rope->root; // the new root has the old root as the only child
+		memcpy(v->c, rope->c, 48);
+		for (j = 0; j < 6; ++j) v->l += v->c[j];
+		rope->root = v;
+	}
+	if (i != u->n - 1) // then make room for a new node
+		memmove(v + 2, v + 1, sizeof(rpnode_t) * (u->n - i - 1));
+	++u->n; w = v + 1;
+	memset(w, 0, sizeof(rpnode_t));
+	w->p = mp_alloc(u->is_bottom? rope->leaf : rope->node);
+	if (u->is_bottom) { // we are at the bottom level; $v->p is a string instead of a node
+		uint8_t *p = (uint8_t*)v->p, *q = (uint8_t*)w->p;
+		rle_split(p, q);
+		rle_count(q, w->c);
+	} else { // $v->p is a node, not a string
+		rpnode_t *p = v->p, *q = w->p; // $v and $w are siblings and thus $p and $q are cousins
+		p->n -= rope->max_nodes>>1;
+		memcpy(q, p + p->n, sizeof(rpnode_t) * (rope->max_nodes>>1));
+		q->n = rope->max_nodes>>1; // NB: this line must below memcpy() as $q->n and $q->is_bottom are modified by memcpy()
+		q->is_bottom = p->is_bottom;
+		for (i = 0; i < q->n; ++i)
+			for (j = 0; j < 6; ++j)
+				w->c[j] += q[i].c[j];
+	}
+	for (j = 0; j < 6; ++j) // compute $w->l and update $v->c
+		w->l += w->c[j], v->c[j] -= w->c[j];
+	v->l -= w->l; // update $v->c
+	return v;
+}
+
+int64_t rope_insert_run(rope_t *rope, int64_t x, int a, int64_t rl, rpcache_t *cache)
+{ // insert $a after $x symbols in $rope and the returns rank(a, x)
+	rpnode_t *u = 0, *v = 0, *p = rope->root; // $v is the parent of $p; $u and $v are at the same level and $u is the first node in the bucket
+	int64_t y = 0, z = 0, cnt[6];
+	int n_runs;
+	do { // top-down update. Searching and node splitting are done together in one pass.
+		if (p->n == rope->max_nodes) { // node is full; split
+			v = split_node(rope, u, v); // $v points to the parent of $p; when a new root is added, $v points to the root
+			if (y + v->l < x) // if $v is not long enough after the split, we need to move both $p and its parent $v
+				y += v->l, z += v->c[a], ++v, p = v->p;
+		}
+		u = p;
+		if (v && x - y > v->l>>1) { // then search backwardly for the right node to descend
+			p += p->n - 1; y += v->l; z += v->c[a];
+			for (; y >= x; --p) y -= p->l, z -= p->c[a];
+			++p;
+		} else for (; y + p->l < x; ++p) y += p->l, z += p->c[a]; // then search forwardly
+		assert(p - u < u->n);
+		if (v) v->c[a] += rl, v->l += rl; // we should not change p->c[a] because this may cause troubles when p's child is split
+		v = p; p = p->p; // descend
+	} while (!u->is_bottom);
+	rope->c[a] += rl; // $rope->c should be updated after the loop as adding a new root needs the old $rope->c counts
+	if (cache) {
+		if (cache->p != (uint8_t*)p) memset(cache, 0, sizeof(rpcache_t));
+		n_runs = rle_insert_cached((uint8_t*)p, x - y, a, rl, cnt, v->c, &cache->beg, cache->bc);
+		cache->p = (uint8_t*)p;
+	} else n_runs = rle_insert((uint8_t*)p, x - y, a, rl, cnt, v->c);
+	z += cnt[a];
+	v->c[a] += rl; v->l += rl; // this should be after rle_insert(); otherwise rle_insert() won't work
+	if (n_runs + RLE_MIN_SPACE > rope->block_len) {
+		split_node(rope, u, v);
+		if (cache) memset(cache, 0, sizeof(rpcache_t));
+	}
+	return z;
+}
+
+static rpnode_t *rope_count_to_leaf(const rope_t *rope, int64_t x, int64_t cx[6], int64_t *rest)
+{
+	rpnode_t *u, *v = 0, *p = rope->root;
+	int64_t y = 0;
+	int a;
+
+	memset(cx, 0, 48);
+	do {
+		u = p;
+		if (v && x - y > v->l>>1) {
+			p += p->n - 1; y += v->l;
+			for (a = 0; a != 6; ++a) cx[a] += v->c[a];
+			for (; y >= x; --p) {
+				y -= p->l;
+				for (a = 0; a != 6; ++a) cx[a] -= p->c[a];
+			}
+			++p;
+		} else {
+			for (; y + p->l < x; ++p) {
+				y += p->l;
+				for (a = 0; a != 6; ++a) cx[a] += p->c[a];
+			}
+		}
+		v = p; p = p->p;
+	} while (!u->is_bottom);
+	*rest = x - y;
+	return v;
+}
+
+void rope_rank2a(const rope_t *rope, int64_t x, int64_t y, int64_t *cx, int64_t *cy)
+{
+	rpnode_t *v;
+	int64_t rest;
+	v = rope_count_to_leaf(rope, x, cx, &rest);
+	if (y < x || cy == 0) {
+		rle_rank1a((const uint8_t*)v->p, rest, cx, v->c);
+	} else if (rest + (y - x) <= v->l) {
+		memcpy(cy, cx, 48);
+		rle_rank2a((const uint8_t*)v->p, rest, rest + (y - x), cx, cy, v->c);
+	} else {
+		rle_rank1a((const uint8_t*)v->p, rest, cx, v->c);
+		v = rope_count_to_leaf(rope, y, cy, &rest);
+		rle_rank1a((const uint8_t*)v->p, rest, cy, v->c);
+	}
+}
+
+/*********************
+ *** Rope iterator ***
+ *********************/
+
+void rope_itr_first(const rope_t *rope, rpitr_t *i)
+{
+	memset(i, 0, sizeof(rpitr_t));
+	i->rope = rope;
+	for (i->pa[i->d] = rope->root; !i->pa[i->d]->is_bottom;) // descend to the leftmost leaf
+		++i->d, i->pa[i->d] = i->pa[i->d - 1]->p;
+}
+
+const uint8_t *rope_itr_next_block(rpitr_t *i)
+{
+	const uint8_t *ret;
+	assert(i->d < ROPE_MAX_DEPTH); // a B+ tree should not be that tall
+	if (i->d < 0) return 0;
+	ret = (uint8_t*)i->pa[i->d][i->ia[i->d]].p;
+	while (i->d >= 0 && ++i->ia[i->d] == i->pa[i->d]->n) i->ia[i->d--] = 0; // backtracking
+	if (i->d >= 0)
+		while (!i->pa[i->d]->is_bottom) // descend to the leftmost leaf
+			++i->d, i->pa[i->d] = i->pa[i->d - 1][i->ia[i->d - 1]].p;
+	return ret;
+}
+
+/***********
+ *** I/O ***
+ ***********/
+
+void rope_print_node(const rpnode_t *p)
+{
+	if (p->is_bottom) {
+		int i;
+		putchar('(');
+		for (i = 0; i < p->n; ++i) {
+			uint8_t *block = (uint8_t*)p[i].p;
+			const uint8_t *q = block + 2, *end = block + 2 + *rle_nptr(block);
+			if (i) putchar(',');
+			while (q < end) {
+				int c = 0;
+				int64_t j, l;
+				rle_dec1(q, c, l);
+				for (j = 0; j < l; ++j) putchar("$ACGTN"[c]);
+			}
+		}
+		putchar(')');
+	} else {
+		int i;
+		putchar('(');
+		for (i = 0; i < p->n; ++i) {
+			if (i) putchar(',');
+			rope_print_node(p[i].p);
+		}
+		putchar(')');
+	}
+}
+
+void rope_dump_node(const rpnode_t *p, FILE *fp)
+{
+	int16_t i, n = p->n;
+	uint8_t is_bottom = p->is_bottom;
+	fwrite(&is_bottom, 1, 1, fp);
+	fwrite(&n, 2, 1, fp);
+	if (is_bottom) {
+		for (i = 0; i < n; ++i) {
+			fwrite(p[i].c, 8, 6, fp);
+			fwrite(p[i].p, 1, *rle_nptr(p[i].p) + 2, fp);
+		}
+	} else {
+		for (i = 0; i < p->n; ++i)
+			rope_dump_node(p[i].p, fp);
+	}
+}
+
+void rope_dump(const rope_t *r, FILE *fp)
+{
+	fwrite(&r->max_nodes, 4, 1, fp);
+	fwrite(&r->block_len, 4, 1, fp);
+	rope_dump_node(r->root, fp);
+}
+
+rpnode_t *rope_restore_node(const rope_t *r, FILE *fp, int64_t c[6])
+{
+	uint8_t is_bottom, a;
+	int16_t i, n;
+	rpnode_t *p;
+	fread(&is_bottom, 1, 1, fp);
+	fread(&n, 2, 1, fp);
+	p = mp_alloc(r->node);
+	p->is_bottom = is_bottom, p->n = n;
+	if (is_bottom) {
+		for (i = 0; i < n; ++i) {
+			uint16_t *q;
+			p[i].p = mp_alloc(r->leaf);
+			q = rle_nptr(p[i].p);
+			fread(p[i].c, 8, 6, fp);
+			fread(q, 2, 1, fp);
+			fread(q + 1, 1, *q, fp);
+		}
+	} else {
+		for (i = 0; i < n; ++i)
+			p[i].p = rope_restore_node(r, fp, p[i].c);
+	}
+	memset(c, 0, 48);
+	for (i = 0; i < n; ++i) {
+		p[i].l = 0;
+		for (a = 0; a < 6; ++a)
+			c[a] += p[i].c[a], p[i].l += p[i].c[a];
+	}
+	return p;
+}
+
+rope_t *rope_restore(FILE *fp)
+{
+	rope_t *r;
+	r = calloc(1, sizeof(rope_t));
+	fread(&r->max_nodes, 4, 1, fp);
+	fread(&r->block_len, 4, 1, fp);
+	r->node = mp_init(sizeof(rpnode_t) * r->max_nodes);
+	r->leaf = mp_init(r->block_len);
+	r->root = rope_restore_node(r, fp, r->c);
+	return r;
+}
diff --git a/rope.h b/rope.h
new file mode 100644
index 0000000..843a408
--- /dev/null
+++ b/rope.h
@@ -0,0 +1,58 @@
+#ifndef ROPE_H_
+#define ROPE_H_
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define ROPE_MAX_DEPTH 80
+#define ROPE_DEF_MAX_NODES 64
+#define ROPE_DEF_BLOCK_LEN 512
+
+typedef struct rpnode_s {
+	struct rpnode_s *p; // child; at the bottom level, $p points to a string with the first 2 bytes giving the number of runs (#runs)
+	uint64_t l:54, n:9, is_bottom:1; // $n and $is_bottom are only set for the first node in a bucket
+	int64_t c[6]; // marginal counts
+} rpnode_t;
+
+typedef struct {
+	int32_t max_nodes, block_len; // both MUST BE even numbers
+	int64_t c[6]; // marginal counts
+	rpnode_t *root;
+	void *node, *leaf; // memory pool
+} rope_t;
+
+typedef struct {
+	const rope_t *rope; // the rope
+	const rpnode_t *pa[ROPE_MAX_DEPTH]; // parent nodes
+	int ia[ROPE_MAX_DEPTH]; // index in the parent nodes
+	int d; // the current depth in the B+-tree
+} rpitr_t;
+
+typedef struct {
+	int beg;
+	int64_t bc[6];
+	uint8_t *p;
+} rpcache_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	rope_t *rope_init(int max_nodes, int block_len);
+	void rope_destroy(rope_t *rope);
+	int64_t rope_insert_run(rope_t *rope, int64_t x, int a, int64_t rl, rpcache_t *cache);
+	void rope_rank2a(const rope_t *rope, int64_t x, int64_t y, int64_t *cx, int64_t *cy);
+	#define rope_rank1a(rope, x, cx) rope_rank2a(rope, x, -1, cx, 0)
+
+	void rope_itr_first(const rope_t *rope, rpitr_t *i);
+	const uint8_t *rope_itr_next_block(rpitr_t *i);
+
+	void rope_print_node(const rpnode_t *p);
+	void rope_dump(const rope_t *r, FILE *fp);
+	rope_t *rope_restore(FILE *fp);
+	
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/xa2multi.pl b/xa2multi.pl
index 2409c29..fff50fd 100755
--- a/xa2multi.pl
+++ b/xa2multi.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -w
+#!/usr/bin/env perl
 
 use strict;
 use warnings;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bwa.git



More information about the debian-med-commit mailing list