[med-svn] [minimap] 01/02: Imported Upstream version 0.2

Fri Jan 8 13:52:43 UTC 2016

This is an automated email from the git hooks/post-receive script.

sascha-guest pushed a commit to branch master
in repository minimap.

commit e97a5a4a3728ecf82c659bc110d8f090af431f84
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date:   Fri Jan 8 10:56:18 2016 +0000

    Imported Upstream version 0.2
---
 .gitignore  |   3 +
 LICENSE.txt |  23 +++
 Makefile    |  46 +++++
 README.md   |  98 ++++++++++
 bseq.c      |  65 +++++++
 bseq.h      |  19 ++
 example.c   |  52 +++++
 index.c     | 352 ++++++++++++++++++++++++++++++++++
 kdq.h       | 128 +++++++++++++
 khash.h     | 619 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 kseq.h      | 248 ++++++++++++++++++++++++
 ksort.h     | 159 ++++++++++++++++
 kthread.c   | 146 ++++++++++++++
 kvec.h      | 110 +++++++++++
 main.c      | 145 ++++++++++++++
 map.c       | 374 ++++++++++++++++++++++++++++++++++++
 minimap.1   | 222 ++++++++++++++++++++++
 minimap.h   | 104 ++++++++++
 misc.c      |  26 +++
 sdust.c     | 209 ++++++++++++++++++++
 sdust.h     |  23 +++
 sketch.c    | 102 ++++++++++
 22 files changed, 3273 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f98c8b2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.*.swp
+*.o
+*.a
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..beb04ef
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,23 @@
+The MIT License
+
+Copyright (c) 2015 Broad Institute
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..00bdae6
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,46 @@
+CC=			gcc
+CFLAGS=		-g -Wall -O2 -Wc++-compat -Wno-unused-function
+CPPFLAGS=
+INCLUDES=	-I.
+OBJS=		kthread.o misc.o bseq.o sketch.o sdust.o index.o map.o
+PROG=		minimap
+PROG_EXTRA=	sdust minimap-lite
+LIBS=		-lm -lz -lpthread
+
+.SUFFIXES:.c .o
+
+.c.o:
+		$(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
+
+all:$(PROG)
+
+extra:all $(PROG_EXTRA)
+
+minimap:main.o libminimap.a
+		$(CC) $(CFLAGS) $< -o $@ -L. -lminimap $(LIBS)
+
+minimap-lite:example.o libminimap.a
+		$(CC) $(CFLAGS) $< -o $@ -L. -lminimap $(LIBS)
+
+libminimap.a:$(OBJS)
+		$(AR) -csru $@ $(OBJS)
+
+sdust:sdust.c kdq.h kvec.h kseq.h sdust.h
+		$(CC) -D_SDUST_MAIN $(CFLAGS) $< -o $@ -lz
+
+clean:
+		rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session*
+
+depend:
+		(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c)
+
+# DO NOT DELETE
+
+bseq.o: bseq.h kseq.h
+example.o: minimap.h bseq.h kseq.h
+index.o: minimap.h bseq.h kvec.h khash.h
+main.o: minimap.h bseq.h
+map.o: bseq.h kvec.h minimap.h sdust.h ksort.h
+misc.o: minimap.h bseq.h ksort.h
+sdust.o: kdq.h kvec.h sdust.h
+sketch.o: kvec.h minimap.h bseq.h
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a0f2ce7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,98 @@
+## Introduction
+
+Minimap is an *experimental* tool to efficiently find multiple approximate
+mapping positions between two sets of long sequences, such as between reads and
+reference genomes, between genomes and between long noisy reads. By default, it
+is tuned to have high sensitivity to 2kb matches around 20% divergence but with
+low specificity. Minimap does not generate alignments as of now and because of
+this, it is usually tens of times faster than mainstream *aligners*. With four
+CPU cores, minimap can map 1.6Gbp PacBio reads to human in 2.5 minutes, 1Gbp
+PacBio E. coli reads to pre-indexed 9.6Gbp bacterial genomes in 3 minutes, to
+pre-indexed >100Gbp nt database in ~1 hour (of which ~20 minutes are spent on
+loading index from the network filesystem; peak RAM: 10GB), map 2800 bacteria
+to themselves in 1 hour, and map 1Gbp E. coli reads against themselves in a
+couple of minutes.
+
+Minimap does not replace mainstream aligners, but it can be useful when you
+want to quickly identify long approximate matches at moderate divergence among
+a huge collection of sequences. For this task, it is much faster than most
+existing tools.
+
+## Usage
+
+* Map two sets of long sequences:
+  ```sh
+  minimap target.fa.gz query.fa.gz > out.mini
+  ```
+  The output is TAB-delimited with each line consisting of query name, length,
+  0-based start, end, strand, target name, length, start, end, the number of
+  matching bases, the number of co-linear minimizers in the match and the
+  fraction of matching bases.
+
+* All-vs-all PacBio read self-mapping for [miniasm][miniasm]:
+  ```sh
+  minimap -Sw5 -L100 -m0 reads.fa reads.fa | gzip -1 > reads.paf.gz
+  ```
+
+* Prebuild index and then map:
+  ```sh
+  minimap -d target.mmi target.fa.gz
+  minimap -l target.mmi query.fa.gz > out.mini
+  ```
+  Minimap indexing is very fast (1 minute for human genome; 50 minutes for >100Gbp
+  nt database retrieved on 2015-09-30), but for huge
+  repeatedly used databases, prebuilding index is still preferred.
+
+* Map sequences against themselve without diagnal matches:
+  ```sh
+  minimap -S sequences.fa sequences.fa > self-match.mini
+  ```
+  The output may still contain overlapping matches in repetitive regions.
+
+## Algorithm Overview
+
+1. Indexing. Collect all [(*w*,*k*)-minimizers][mini] in a batch (**-I**=4
+   billion bp) of target sequences and store them in a hash table. Mark top
+   **-f**=0.1% of most frequent minimizers as repeats. Minimap
+   uses [invertible hash function][invhash] to avoid taking ploy-A as
+   minimizers.
+
+2. For each query, collect all (*w*,*k*)-minimizers and look up the hash table for
+   matches (*q<sub>i</sub>*,*t<sub>i</sub>*,*s<sub>i</sub>*), where
+   *q<sub>i</sub>* is the query position, *t<sub>i</sub>* the target position
+   and *s<sub>i</sub>* indicates whether the minimizer match is on the same
+   strand.
+
+3. For matches on the same strand, sort by {*q<sub>i</sub>*-*t<sub>i</sub>*}
+   and then cluster matches within a **-r**=500bp window. Minimap merges
+   two windows if **-m**=50% of minimizer matches overlap. For matches on different
+   strands, sort {*q<sub>i</sub>*+*t<sub>i</sub>*} and apply a similar
+   clustering procedure. This is inspired by the [Hough transformation][hough].
+
+4. For each cluster, sort (*q<sub>i</sub>*,*t<sub>i</sub>*) by *q<sub>i</sub>*
+   and solve a [longest increasing sequence problem][lis] for *t<sub>i</sub>*. This
+   finds the longest co-linear matching chain. Break the chain whenever there
+   is a gap longer than **-g**=10000.
+
+5. Output the start and end of the chain if it contains **-c**=4 or more
+   minimizer matches and the matching length is no less than **-L**=40.
+
+6. Go to 1 and rewind to the first record of query if there are more target
+   sequences; otherwise stop.
+
+To increase sensitivity, we may decrease **-w** to index more minimizers;
+we may also decrease **-k**, though this may greatly impact performance for
+mammalian genomes.
+
+Also note that by default, if the total length of target sequences is less than
+4Gbp (1G=1 billion; controlled by **-I**), minimap creates one index and stream
+all the query sequences in one go. The multiple hits of a query sequence is
+adjacent to each other in the output. If the total length is greater than
+4Gbp, minimap needs to read query sequences multiple times. The multiple hits
+of a query may not be adjacent.
+
+[mini]: http://bioinformatics.oxfordjournals.org/content/20/18/3363.abstract
+[lis]: https://en.wikipedia.org/wiki/Longest_increasing_subsequence
+[hough]: https://en.wikipedia.org/wiki/Hough_transform
+[invhash]: https://gist.github.com/lh3/974ced188be2f90422cc
+[miniasm]: https://github.com/lh3/miniasm
diff --git a/bseq.c b/bseq.c
new file mode 100644
index 0000000..3cde6c8
--- /dev/null
+++ b/bseq.c
@@ -0,0 +1,65 @@
+#include <zlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "bseq.h"
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+extern unsigned char seq_nt4_table[256];
+
+struct bseq_file_s {
+	int is_eof;
+	gzFile fp;
+	kseq_t *ks;
+};
+
+bseq_file_t *bseq_open(const char *fn)
+{
+	bseq_file_t *fp;
+	gzFile f;
+	f = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
+	if (f == 0) return 0;
+	fp = (bseq_file_t*)calloc(1, sizeof(bseq_file_t));
+	fp->fp = f;
+	fp->ks = kseq_init(fp->fp);
+	return fp;
+}
+
+void bseq_close(bseq_file_t *fp)
+{
+	kseq_destroy(fp->ks);
+	gzclose(fp->fp);
+	free(fp);
+}
+
+bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int *n_)
+{
+	int size = 0, m, n;
+	bseq1_t *seqs;
+	kseq_t *ks = fp->ks;
+	m = n = 0; seqs = 0;
+	while (kseq_read(ks) >= 0) {
+		bseq1_t *s;
+		assert(ks->seq.l <= INT32_MAX);
+		if (n >= m) {
+			m = m? m<<1 : 256;
+			seqs = (bseq1_t*)realloc(seqs, m * sizeof(bseq1_t));
+		}
+		s = &seqs[n];
+		s->name = strdup(ks->name.s);
+		s->seq = strdup(ks->seq.s);
+		s->l_seq = ks->seq.l;
+		size += seqs[n++].l_seq;
+		if (size >= chunk_size) break;
+	}
+	if (n == 0) fp->is_eof = 1;
+	*n_ = n;
+	return seqs;
+}
+
+int bseq_eof(bseq_file_t *fp)
+{
+	return fp->is_eof;
+}
diff --git a/bseq.h b/bseq.h
new file mode 100644
index 0000000..daf1fc5
--- /dev/null
+++ b/bseq.h
@@ -0,0 +1,19 @@
+#ifndef MM_BSEQ_H
+#define MM_BSEQ_H
+
+#include <stdint.h>
+
+struct bseq_file_s;
+typedef struct bseq_file_s bseq_file_t;
+
+typedef struct {
+	int l_seq, rid;
+	char *name, *seq;
+} bseq1_t;
+
+bseq_file_t *bseq_open(const char *fn);
+void bseq_close(bseq_file_t *fp);
+bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int *n_);
+int bseq_eof(bseq_file_t *fp);
+
+#endif
diff --git a/example.c b/example.c
new file mode 100644
index 0000000..ecc5463
--- /dev/null
+++ b/example.c
@@ -0,0 +1,52 @@
+// To compile:
+//   gcc -g -O2 example.c libminimap.a -lz
+
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <zlib.h>
+#include "minimap.h"
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+int main(int argc, char *argv[])
+{
+	if (argc < 3) {
+		fprintf(stderr, "Usage: minimap-lite <target.fa> <query.fa>\n");
+		return 1;
+	}
+	
+	// open query file for reading; you may use your favorite FASTA/Q parser
+	gzFile f = gzopen(argv[2], "r");
+	assert(f);
+	kseq_t *ks = kseq_init(f);
+
+	// create index for target; we are creating one index for all target sequence
+	int n_threads = 4, w = 10, k = 15;
+	mm_idx_t *mi = mm_idx_build(argv[1], w, k, n_threads);
+	assert(mi);
+
+	// mapping
+	mm_mapopt_t opt;
+	mm_mapopt_init(&opt); // initialize mapping parameters
+	mm_tbuf_t *tbuf = mm_tbuf_init(); // thread buffer; for multi-threading, allocate one tbuf for each thread
+	while (kseq_read(ks) >= 0) { // each kseq_read() call reads one query sequence
+		const mm_reg1_t *reg;
+		int j, n_reg;
+		// get all hits for the query
+		reg = mm_map(mi, ks->seq.l, ks->seq.s, &n_reg, tbuf, &opt, 0);
+		// traverse hits and print them out
+		for (j = 0; j < n_reg; ++j) {
+			const mm_reg1_t *r = &reg[j];
+			printf("%s\t%d\t%d\t%d\t%c\t", ks->name.s, ks->seq.l, r->qs, r->qe, "+-"[r->rev]);
+			printf("%s\t%d\t%d\t%d\t%d\t%d\n", mi->name[r->rid], mi->len[r->rid], r->rs, r->re, r->len, r->cnt);
+		}
+	}
+	mm_tbuf_destroy(tbuf);
+
+	// deallocate index and close the query file
+	mm_idx_destroy(mi);
+	kseq_destroy(ks);
+	gzclose(f);
+	return 0;
+}
diff --git a/index.c b/index.c
new file mode 100644
index 0000000..4edc433
--- /dev/null
+++ b/index.c
@@ -0,0 +1,352 @@
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include "minimap.h"
+#include "kvec.h"
+#include "khash.h"
+
+#define idx_hash(a) ((a)>>1)
+#define idx_eq(a, b) ((a)>>1 == (b)>>1)
+KHASH_INIT(idx, uint64_t, uint64_t, 1, idx_hash, idx_eq)
+typedef khash_t(idx) idxhash_t;
+
+void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n);
+
+mm_idx_t *mm_idx_init(int w, int k, int b)
+{
+	mm_idx_t *mi;
+	if (k*2 < b) b = k * 2;
+	if (w < 1) w = 1;
+	mi = (mm_idx_t*)calloc(1, sizeof(mm_idx_t));
+	mi->w = w, mi->k = k, mi->b = b;
+	mi->max_occ = UINT32_MAX;
+	mi->B = (mm_idx_bucket_t*)calloc(1<<b, sizeof(mm_idx_bucket_t));
+	return mi;
+}
+
+void mm_idx_destroy(mm_idx_t *mi)
+{
+	int i;
+	if (mi == 0) return;
+	for (i = 0; i < 1<<mi->b; ++i) {
+		free(mi->B[i].p);
+		free(mi->B[i].a.a);
+		kh_destroy(idx, (idxhash_t*)mi->B[i].h);
+	}
+	free(mi->B);
+	if (mi->name)
+		for (i = 0; i < mi->n; ++i) free(mi->name[i]);
+	free(mi->len); free(mi->name);
+	free(mi);
+}
+
+const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n)
+{
+	int mask = (1<<mi->b) - 1;
+	khint_t k;
+	mm_idx_bucket_t *b = &mi->B[minier&mask];
+	idxhash_t *h = (idxhash_t*)b->h;
+	*n = 0;
+	if (h == 0) return 0;
+	k = kh_get(idx, h, minier>>mi->b<<1);
+	if (k == kh_end(h)) return 0;
+	if (kh_key(h, k)&1) {
+		*n = 1;
+		return &kh_val(h, k);
+	} else {
+		*n = (uint32_t)kh_val(h, k);
+		return &b->p[kh_val(h, k)>>32];
+	}
+}
+
+uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f)
+{
+	int i;
+	size_t n = 0;
+	uint32_t thres;
+	khint_t *a, k;
+	if (f <= 0.) return UINT32_MAX;
+	for (i = 0; i < 1<<mi->b; ++i)
+		if (mi->B[i].h) n += kh_size((idxhash_t*)mi->B[i].h);
+	a = (uint32_t*)malloc(n * 4);
+	for (i = n = 0; i < 1<<mi->b; ++i) {
+		idxhash_t *h = (idxhash_t*)mi->B[i].h;
+		if (h == 0) continue;
+		for (k = 0; k < kh_end(h); ++k) {
+			if (!kh_exist(h, k)) continue;
+			a[n++] = kh_key(h, k)&1? 1 : (uint32_t)kh_val(h, k);
+		}
+	}
+	thres = ks_ksmall_uint32_t(n, a, (uint32_t)((1. - f) * n)) + 1;
+	free(a);
+	return thres;
+}
+
+void mm_idx_set_max_occ(mm_idx_t *mi, float f)
+{
+	mi->freq_thres = f;
+	mi->max_occ = mm_idx_cal_max_occ(mi, f);
+}
+
+/*********************************
+ * Sort and generate hash tables *
+ *********************************/
+
+static void worker_post(void *g, long i, int tid)
+{
+	int j, start_a, start_p, n, n_keys;
+	idxhash_t *h;
+	mm_idx_t *mi = (mm_idx_t*)g;
+	mm_idx_bucket_t *b = &mi->B[i];
+	if (b->a.n == 0) return;
+
+	// sort by minimizer
+	radix_sort_128x(b->a.a, b->a.a + b->a.n);
+
+	// count and preallocate
+	for (j = 1, n = 1, n_keys = 0, b->n = 0; j <= b->a.n; ++j) {
+		if (j == b->a.n || b->a.a[j].x != b->a.a[j-1].x) {
+			++n_keys;
+			if (n > 1) b->n += n;
+			n = 1;
+		} else ++n;
+	}
+	h = kh_init(idx);
+	kh_resize(idx, h, n_keys);
+	b->p = (uint64_t*)calloc(b->n, 8);
+
+	// create the hash table
+	for (j = 1, n = 1, start_a = start_p = 0; j <= b->a.n; ++j) {
+		if (j == b->a.n || b->a.a[j].x != b->a.a[j-1].x) {
+			khint_t itr;
+			int absent;
+			mm128_t *p = &b->a.a[j-1];
+			itr = kh_put(idx, h, p->x>>mi->b<<1, &absent);
+			assert(absent && j - start_a == n);
+			if (n == 1) {
+				kh_key(h, itr) |= 1;
+				kh_val(h, itr) = p->y;
+			} else {
+				int k;
+				for (k = 0; k < n; ++k)
+					b->p[start_p + k] = b->a.a[start_a + k].y;
+				kh_val(h, itr) = (uint64_t)start_p<<32 | n;
+				start_p += n;
+			}
+			start_a = j, n = 1;
+		} else ++n;
+	}
+	b->h = h;
+	assert(b->n == start_p);
+
+	// deallocate and clear b->a
+	free(b->a.a);
+	b->a.n = b->a.m = 0, b->a.a = 0;
+}
+ 
+static void mm_idx_post(mm_idx_t *mi, int n_threads)
+{
+	kt_for(n_threads, worker_post, mi, 1<<mi->b);
+}
+
+/******************
+ * Generate index *
+ ******************/
+
+#include <string.h>
+#include <zlib.h>
+#include "bseq.h"
+
+void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps);
+
+typedef struct {
+	int tbatch_size, n_processed, keep_name;
+	bseq_file_t *fp;
+	uint64_t ibatch_size, n_read;
+	mm_idx_t *mi;
+} pipeline_t;
+
+typedef struct {
+    int n_seq;
+	bseq1_t *seq;
+	mm128_v a;
+} step_t;
+
+static void mm_idx_add(mm_idx_t *mi, int n, const mm128_t *a)
+{
+	int i, mask = (1<<mi->b) - 1;
+	for (i = 0; i < n; ++i) {
+		mm128_v *p = &mi->B[a[i].x&mask].a;
+		kv_push(mm128_t, *p, a[i]);
+	}
+}
+
+static void *worker_pipeline(void *shared, int step, void *in)
+{
+	int i;
+    pipeline_t *p = (pipeline_t*)shared;
+    if (step == 0) { // step 0: read sequences
+        step_t *s;
+		if (p->n_read > p->ibatch_size) return 0;
+        s = (step_t*)calloc(1, sizeof(step_t));
+		s->seq = bseq_read(p->fp, p->tbatch_size, &s->n_seq);
+		if (s->seq) {
+			uint32_t old_m = p->mi->n, m, n;
+			assert((uint64_t)p->n_processed + s->n_seq <= INT32_MAX);
+			m = n = p->mi->n + s->n_seq;
+			kroundup32(m); kroundup32(old_m);
+			if (old_m != m) {
+				if (p->keep_name)
+					p->mi->name = (char**)realloc(p->mi->name, m * sizeof(char*));
+				p->mi->len = (int*)realloc(p->mi->len, m * sizeof(int));
+			}
+			for (i = 0; i < s->n_seq; ++i) {
+				if (p->keep_name) {
+					assert(strlen(s->seq[i].name) <= 254);
+					p->mi->name[p->mi->n] = strdup(s->seq[i].name);
+				}
+				p->mi->len[p->mi->n++] = s->seq[i].l_seq;
+				s->seq[i].rid = p->n_processed++;
+				p->n_read += s->seq[i].l_seq;
+			}
+			return s;
+		} else free(s);
+    } else if (step == 1) { // step 1: compute sketch
+        step_t *s = (step_t*)in;
+		for (i = 0; i < s->n_seq; ++i) {
+			bseq1_t *t = &s->seq[i];
+			mm_sketch(t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, &s->a);
+			free(t->seq); free(t->name);
+		}
+		free(s->seq); s->seq = 0;
+		return s;
+    } else if (step == 2) { // dispatch sketch to buckets
+        step_t *s = (step_t*)in;
+		mm_idx_add(p->mi, s->a.n, s->a.a);
+		free(s->a.a); free(s);
+	}
+    return 0;
+}
+
+mm_idx_t *mm_idx_gen(bseq_file_t *fp, int w, int k, int b, int tbatch_size, int n_threads, uint64_t ibatch_size, int keep_name)
+{
+	pipeline_t pl;
+	memset(&pl, 0, sizeof(pipeline_t));
+	pl.tbatch_size = tbatch_size;
+	pl.keep_name = keep_name;
+	pl.ibatch_size = ibatch_size;
+	pl.fp = fp;
+	if (pl.fp == 0) return 0;
+	pl.mi = mm_idx_init(w, k, b);
+
+	kt_pipeline(n_threads < 3? n_threads : 3, worker_pipeline, &pl, 3);
+	if (mm_verbose >= 3)
+		fprintf(stderr, "[M::%s::%.3f*%.2f] collected minimizers\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0));
+
+	mm_idx_post(pl.mi, n_threads);
+	if (mm_verbose >= 3)
+		fprintf(stderr, "[M::%s::%.3f*%.2f] sorted minimizers\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0));
+
+	return pl.mi;
+}
+
+mm_idx_t *mm_idx_build(const char *fn, int w, int k, int n_threads) // a simpler interface
+{
+	bseq_file_t *fp;
+	mm_idx_t *mi;
+	fp = bseq_open(fn);
+	if (fp == 0) return 0;
+	mi = mm_idx_gen(fp, w, k, MM_IDX_DEF_B, 1<<18, n_threads, UINT64_MAX, 1);
+	mm_idx_set_max_occ(mi, 0.001);
+	bseq_close(fp);
+	return mi;
+}
+
+/*************
+ * index I/O *
+ *************/
+
+#define MM_IDX_MAGIC "MMI\1"
+
+void mm_idx_dump(FILE *fp, const mm_idx_t *mi)
+{
+	uint32_t x[6];
+	int i;
+	x[0] = mi->w, x[1] = mi->k, x[2] = mi->b, x[3] = mi->n, x[4] = mi->name? 1 : 0, x[5] = mi->max_occ;
+	fwrite(MM_IDX_MAGIC, 1, 4, fp);
+	fwrite(x, 4, 6, fp);
+	fwrite(&mi->freq_thres, sizeof(float), 1, fp);
+	fwrite(mi->len, 4, mi->n, fp);
+	if (mi->name) {
+		for (i = 0; i < mi->n; ++i) {
+			uint8_t l;
+			l = strlen(mi->name[i]);
+			fwrite(&l, 1, 1, fp);
+			fwrite(mi->name[i], 1, l, fp);
+		}
+	}
+	for (i = 0; i < 1<<mi->b; ++i) {
+		mm_idx_bucket_t *b = &mi->B[i];
+		khint_t k;
+		idxhash_t *h = (idxhash_t*)b->h;
+		uint32_t size = h? h->size : 0;
+		fwrite(&b->n, 4, 1, fp);
+		fwrite(b->p, 8, b->n, fp);
+		fwrite(&size, 4, 1, fp);
+		if (size == 0) continue;
+		for (k = 0; k < kh_end(h); ++k) {
+			uint64_t x[2];
+			if (!kh_exist(h, k)) continue;
+			x[0] = kh_key(h, k), x[1] = kh_val(h, k);
+			fwrite(x, 8, 2, fp);
+		}
+	}
+}
+
+mm_idx_t *mm_idx_load(FILE *fp)
+{
+	int i;
+	char magic[4];
+	uint32_t x[6];
+	mm_idx_t *mi;
+	if (fread(magic, 1, 4, fp) != 4) return 0;
+	if (strncmp(magic, MM_IDX_MAGIC, 4) != 0) return 0;
+	if (fread(x, 4, 6, fp) != 6) return 0;
+	mi = mm_idx_init(x[0], x[1], x[2]);
+	mi->n = x[3], mi->max_occ = x[5];
+	fread(&mi->freq_thres, sizeof(float), 1, fp);
+	mi->len = (int32_t*)malloc(mi->n * 4);
+	fread(mi->len, 4, mi->n, fp);
+	if (x[4]) { // has names
+		mi->name = (char**)calloc(mi->n, sizeof(char*));
+		for (i = 0; i < mi->n; ++i) {
+			uint8_t l;
+			fread(&l, 1, 1, fp);
+			mi->name[i] = (char*)malloc(l + 1);
+			fread(mi->name[i], 1, l, fp);
+			mi->name[i][l] = 0;
+		}
+	}
+	for (i = 0; i < 1<<mi->b; ++i) {
+		mm_idx_bucket_t *b = &mi->B[i];
+		uint32_t j, size;
+		khint_t k;
+		idxhash_t *h;
+		fread(&b->n, 4, 1, fp);
+		b->p = (uint64_t*)malloc(b->n * 8);
+		fread(b->p, 8, b->n, fp);
+		fread(&size, 4, 1, fp);
+		if (size == 0) continue;
+		b->h = h = kh_init(idx);
+		kh_resize(idx, h, size);
+		for (j = 0; j < size; ++j) {
+			uint64_t x[2];
+			int absent;
+			fread(x, 8, 2, fp);
+			k = kh_put(idx, h, x[0], &absent);
+			assert(absent);
+			kh_val(h, k) = x[1];
+		}
+	}
+	return mi;
+}
diff --git a/kdq.h b/kdq.h
new file mode 100644
index 0000000..edd55b5
--- /dev/null
+++ b/kdq.h
@@ -0,0 +1,128 @@
+#ifndef __AC_KDQ_H
+#define __AC_KDQ_H
+
+#include <stdlib.h>
+#include <string.h>
+
+#define __KDQ_TYPE(type) \
+	typedef struct { \
+		size_t front:58, bits:6, count, mask; \
+		type *a; \
+	} kdq_##type##_t;
+
+#define kdq_t(type) kdq_##type##_t
+#define kdq_size(q) ((q)->count)
+#define kdq_first(q) ((q)->a[(q)->front])
+#define kdq_last(q) ((q)->a[((q)->front + (q)->count - 1) & (q)->mask])
+#define kdq_at(q, i) ((q)->a[((q)->front + (i)) & (q)->mask])
+
+#define __KDQ_IMPL(type, SCOPE) \
+	SCOPE kdq_##type##_t *kdq_init_##type() \
+	{ \
+		kdq_##type##_t *q; \
+		q = (kdq_##type##_t*)calloc(1, sizeof(kdq_##type##_t)); \
+		q->bits = 2, q->mask = (1ULL<<q->bits) - 1; \
+		q->a = (type*)malloc((1<<q->bits) * sizeof(type)); \
+		return q; \
+	} \
+	SCOPE void kdq_destroy_##type(kdq_##type##_t *q) \
+	{ \
+		if (q == 0) return; \
+		free(q->a); free(q); \
+	} \
+	SCOPE int kdq_resize_##type(kdq_##type##_t *q, int new_bits) \
+	{ \
+		size_t new_size = 1ULL<<new_bits, old_size = 1ULL<<q->bits; \
+		if (new_size < q->count) { /* not big enough */ \
+			int i; \
+			for (i = 0; i < 64; ++i) \
+				if (1ULL<<i > q->count) break; \
+			new_bits = i, new_size = 1ULL<<new_bits; \
+		} \
+		if (new_bits == q->bits) return q->bits; /* unchanged */ \
+		if (new_bits > q->bits) q->a = (type*)realloc(q->a, (1ULL<<new_bits) * sizeof(type)); \
+		if (q->front + q->count <= old_size) { /* unwrapped */ \
+			if (q->front + q->count > new_size) /* only happens for shrinking */ \
+				memmove(q->a, q->a + new_size, (q->front + q->count - new_size) * sizeof(type)); \
+		} else { /* wrapped */ \
+			memmove(q->a + (new_size - (old_size - q->front)), q->a + q->front, (old_size - q->front) * sizeof(type)); \
+			q->front = new_size - (old_size - q->front); \
+		} \
+		q->bits = new_bits, q->mask = (1ULL<<q->bits) - 1; \
+		if (new_bits < q->bits) q->a = (type*)realloc(q->a, (1ULL<<new_bits) * sizeof(type)); \
+		return q->bits; \
+	} \
+	SCOPE type *kdq_pushp_##type(kdq_##type##_t *q) \
+	{ \
+		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
+		return &q->a[((q->count++) + q->front) & (q)->mask]; \
+	} \
+	SCOPE void kdq_push_##type(kdq_##type##_t *q, type v) \
+	{ \
+		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
+		q->a[((q->count++) + q->front) & (q)->mask] = v; \
+	} \
+	SCOPE type *kdq_unshiftp_##type(kdq_##type##_t *q) \
+	{ \
+		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
+		++q->count; \
+		q->front = q->front? q->front - 1 : (1ULL<<q->bits) - 1; \
+		return &q->a[q->front]; \
+	} \
+	SCOPE void kdq_unshift_##type(kdq_##type##_t *q, type v) \
+	{ \
+		type *p; \
+		p = kdq_unshiftp_##type(q); \
+		*p = v; \
+	} \
+	SCOPE type *kdq_pop_##type(kdq_##type##_t *q) \
+	{ \
+		return q->count? &q->a[((--q->count) + q->front) & q->mask] : 0; \
+	} \
+	SCOPE type *kdq_shift_##type(kdq_##type##_t *q) \
+	{ \
+		type *d = 0; \
+		if (q->count == 0) return 0; \
+		d = &q->a[q->front++]; \
+		q->front &= q->mask; \
+		--q->count; \
+		return d; \
+	}
+
+#define KDQ_INIT2(type, SCOPE) \
+	__KDQ_TYPE(type) \
+	__KDQ_IMPL(type, SCOPE)
+
+#ifndef klib_unused
+#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__ ((__unused__))
+#else
+#define klib_unused
+#endif
+#endif /* klib_unused */
+
+#define KDQ_INIT(type) KDQ_INIT2(type, static inline klib_unused)
+
+#define KDQ_DECLARE(type) \
+	__KDQ_TYPE(type) \
+	kdq_##type##_t *kdq_init_##type(); \
+	void kdq_destroy_##type(kdq_##type##_t *q); \
+	int kdq_resize_##type(kdq_##type##_t *q, int new_bits); \
+	type *kdq_pushp_##type(kdq_##type##_t *q); \
+	void kdq_push_##type(kdq_##type##_t *q, type v); \
+	type *kdq_unshiftp_##type(kdq_##type##_t *q); \
+	void kdq_unshift_##type(kdq_##type##_t *q, type v); \
+	type *kdq_pop_##type(kdq_##type##_t *q); \
+	type *kdq_shift_##type(kdq_##type##_t *q);
+
+#define kdq_init(type) kdq_init_##type()
+#define kdq_destroy(type, q) kdq_destroy_##type(q)
+#define kdq_resize(type, q, new_bits) kdq_resize_##type(q, new_bits)
+#define kdq_pushp(type, q) kdq_pushp_##type(q)
+#define kdq_push(type, q, v) kdq_push_##type(q, v)
+#define kdq_pop(type, q) kdq_pop_##type(q)
+#define kdq_unshiftp(type, q) kdq_unshiftp_##type(q)
+#define kdq_unshift(type, q, v) kdq_unshift_##type(q, v)
+#define kdq_shift(type, q) kdq_shift_##type(q)
+
+#endif
diff --git a/khash.h b/khash.h
new file mode 100644
index 0000000..5e55088
--- /dev/null
+++ b/khash.h
@@ -0,0 +1,619 @@
+/* The MIT License
+
+   Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  An example:
+
+#include "khash.h"
+KHASH_MAP_INIT_INT(32, char)
+int main() {
+	int ret, is_missing;
+	khiter_t k;
+	khash_t(32) *h = kh_init(32);
+	k = kh_put(32, h, 5, &ret);
+	kh_value(h, k) = 10;
+	k = kh_get(32, h, 10);
+	is_missing = (k == kh_end(h));
+	k = kh_get(32, h, 5);
+	kh_del(32, h, k);
+	for (k = kh_begin(h); k != kh_end(h); ++k)
+		if (kh_exist(h, k)) kh_value(h, k) = 1;
+	kh_destroy(32, h);
+	return 0;
+}
+*/
+
+/*
+  2013-05-02 (0.2.8):
+
+	* Use quadratic probing. When the capacity is power of 2, stepping function
+	  i*(i+1)/2 guarantees to traverse each bucket. It is better than double
+	  hashing on cache performance and is more robust than linear probing.
+
+	  In theory, double hashing should be more robust than quadratic probing.
+	  However, my implementation is probably not for large hash tables, because
+	  the second hash function is closely tied to the first hash function,
+	  which reduce the effectiveness of double hashing.
+
+	Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
+
+  2011-12-29 (0.2.7):
+
+    * Minor code clean up; no actual effect.
+
+  2011-09-16 (0.2.6):
+
+	* The capacity is a power of 2. This seems to dramatically improve the
+	  speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
+
+	   - http://code.google.com/p/ulib/
+	   - http://nothings.org/computer/judy/
+
+	* Allow to optionally use linear probing which usually has better
+	  performance for random input. Double hashing is still the default as it
+	  is more robust to certain non-random input.
+
+	* Added Wang's integer hash function (not used by default). This hash
+	  function is more robust to certain non-random input.
+
+  2011-02-14 (0.2.5):
+
+    * Allow to declare global functions.
+
+  2009-09-26 (0.2.4):
+
+    * Improve portability
+
+  2008-09-19 (0.2.3):
+
+	* Corrected the example
+	* Improved interfaces
+
+  2008-09-11 (0.2.2):
+
+	* Improved speed a little in kh_put()
+
+  2008-09-10 (0.2.1):
+
+	* Added kh_clear()
+	* Fixed a compiling error
+
+  2008-09-02 (0.2.0):
+
+	* Changed to token concatenation which increases flexibility.
+
+  2008-08-31 (0.1.2):
+
+	* Fixed a bug in kh_get(), which has not been tested previously.
+
+  2008-08-31 (0.1.1):
+
+	* Added destructor
+*/
+
+
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
+
+/*!
+  @header
+
+  Generic hash table library.
+ */
+
+#define AC_VERSION_KHASH_H "0.2.8"
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+/* compiler specific configuration */
+
+#if UINT_MAX == 0xffffffffu
+typedef unsigned int khint32_t;
+#elif ULONG_MAX == 0xffffffffu
+typedef unsigned long khint32_t;
+#endif
+
+#if ULONG_MAX == ULLONG_MAX
+typedef unsigned long khint64_t;
+#else
+typedef unsigned long long khint64_t;
+#endif
+
+#ifndef kh_inline
+#ifdef _MSC_VER
+#define kh_inline __inline
+#else
+#define kh_inline inline
+#endif
+#endif /* kh_inline */
+
+typedef khint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+
+#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#ifndef kcalloc
+#define kcalloc(N,Z) calloc(N,Z)
+#endif
+#ifndef kmalloc
+#define kmalloc(Z) malloc(Z)
+#endif
+#ifndef krealloc
+#define krealloc(P,Z) realloc(P,Z)
+#endif
+#ifndef kfree
+#define kfree(P) free(P)
+#endif
+
+static const double __ac_HASH_UPPER = 0.77;
+
+#define __KHASH_TYPE(name, khkey_t, khval_t) \
+	typedef struct kh_##name##_s { \
+		khint_t n_buckets, size, n_occupied, upper_bound; \
+		khint32_t *flags; \
+		khkey_t *keys; \
+		khval_t *vals; \
+	} kh_##name##_t;
+
+#define __KHASH_PROTOTYPES(name, khkey_t, khval_t)	 					\
+	extern kh_##name##_t *kh_init_##name(void);							\
+	extern void kh_destroy_##name(kh_##name##_t *h);					\
+	extern void kh_clear_##name(kh_##name##_t *h);						\
+	extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); 	\
+	extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
+	extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
+	extern void kh_del_##name(kh_##name##_t *h, khint_t x);
+
+#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	SCOPE kh_##name##_t *kh_init_##name(void) {							\
+		return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t));		\
+	}																	\
+	SCOPE void kh_destroy_##name(kh_##name##_t *h)						\
+	{																	\
+		if (h) {														\
+			kfree((void *)h->keys); kfree(h->flags);					\
+			kfree((void *)h->vals);										\
+			kfree(h);													\
+		}																\
+	}																	\
+	SCOPE void kh_clear_##name(kh_##name##_t *h)						\
+	{																	\
+		if (h && h->flags) {											\
+			memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
+			h->size = h->n_occupied = 0;								\
+		}																\
+	}																	\
+	SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) 	\
+	{																	\
+		if (h->n_buckets) {												\
+			khint_t k, i, last, mask, step = 0; \
+			mask = h->n_buckets - 1;									\
+			k = __hash_func(key); i = k & mask;							\
+			last = i; \
+			while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+				i = (i + (++step)) & mask; \
+				if (i == last) return h->n_buckets;						\
+			}															\
+			return __ac_iseither(h->flags, i)? h->n_buckets : i;		\
+		} else return 0;												\
+	}																	\
+	SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+	{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
+		khint32_t *new_flags = 0;										\
+		khint_t j = 1;													\
+		{																\
+			kroundup32(new_n_buckets); 									\
+			if (new_n_buckets < 4) new_n_buckets = 4;					\
+			if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	/* requested size is too small */ \
+			else { /* hash table size to be changed (shrink or expand); rehash */ \
+				new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));	\
+				if (!new_flags) return -1;								\
+				memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
+				if (h->n_buckets < new_n_buckets) {	/* expand */		\
+					khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+					if (!new_keys) { kfree(new_flags); return -1; }		\
+					h->keys = new_keys;									\
+					if (kh_is_map) {									\
+						khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+						if (!new_vals) { kfree(new_flags); return -1; }	\
+						h->vals = new_vals;								\
+					}													\
+				} /* otherwise shrink */								\
+			}															\
+		}																\
+		if (j) { /* rehashing is needed */								\
+			for (j = 0; j != h->n_buckets; ++j) {						\
+				if (__ac_iseither(h->flags, j) == 0) {					\
+					khkey_t key = h->keys[j];							\
+					khval_t val;										\
+					khint_t new_mask;									\
+					new_mask = new_n_buckets - 1; 						\
+					if (kh_is_map) val = h->vals[j];					\
+					__ac_set_isdel_true(h->flags, j);					\
+					while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+						khint_t k, i, step = 0; \
+						k = __hash_func(key);							\
+						i = k & new_mask;								\
+						while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
+						__ac_set_isempty_false(new_flags, i);			\
+						if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
+							{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+							if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
+							__ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
+						} else { /* write the element and jump out of the loop */ \
+							h->keys[i] = key;							\
+							if (kh_is_map) h->vals[i] = val;			\
+							break;										\
+						}												\
+					}													\
+				}														\
+			}															\
+			if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
+				h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+				if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+			}															\
+			kfree(h->flags); /* free the working space */				\
+			h->flags = new_flags;										\
+			h->n_buckets = new_n_buckets;								\
+			h->n_occupied = h->size;									\
+			h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+		}																\
+		return 0;														\
+	}																	\
+	SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
+	{																	\
+		khint_t x;														\
+		if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
+			if (h->n_buckets > (h->size<<1)) {							\
+				if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
+					*ret = -1; return h->n_buckets;						\
+				}														\
+			} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
+				*ret = -1; return h->n_buckets;							\
+			}															\
+		} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
+		{																\
+			khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
+			x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
+			if (__ac_isempty(h->flags, i)) x = i; /* for speed up */	\
+			else {														\
+				last = i; \
+				while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+					if (__ac_isdel(h->flags, i)) site = i;				\
+					i = (i + (++step)) & mask; \
+					if (i == last) { x = site; break; }					\
+				}														\
+				if (x == h->n_buckets) {								\
+					if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
+					else x = i;											\
+				}														\
+			}															\
+		}																\
+		if (__ac_isempty(h->flags, x)) { /* not present at all */		\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size; ++h->n_occupied;									\
+			*ret = 1;													\
+		} else if (__ac_isdel(h->flags, x)) { /* deleted */				\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size;													\
+			*ret = 2;													\
+		} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
+		return x;														\
+	}																	\
+	SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)				\
+	{																	\
+		if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {			\
+			__ac_set_isdel_true(h->flags, x);							\
+			--h->size;													\
+		}																\
+	}
+
+#define KHASH_DECLARE(name, khkey_t, khval_t)		 					\
+	__KHASH_TYPE(name, khkey_t, khval_t) 								\
+	__KHASH_PROTOTYPES(name, khkey_t, khval_t)
+
+#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	__KHASH_TYPE(name, khkey_t, khval_t) 								\
+	__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+/* --- BEGIN OF HASH FUNCTIONS --- */
+
+/*! @function
+  @abstract     Integer hash function
+  @param  key   The integer [khint32_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int_hash_func(key) (khint32_t)(key)
+/*! @function
+  @abstract     Integer comparison function
+ */
+#define kh_int_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     64-bit integer hash function
+  @param  key   The integer [khint64_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
+/*! @function
+  @abstract     64-bit integer comparison function
+ */
+#define kh_int64_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     const char* hash function
+  @param  s     Pointer to a null terminated string
+  @return       The hash value
+ */
+static kh_inline khint_t __ac_X31_hash_string(const char *s)
+{
+	khint_t h = (khint_t)*s;
+	if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
+	return h;
+}
+/*! @function
+  @abstract     Another interface to const char* hash function
+  @param  key   Pointer to a null terminated string [const char*]
+  @return       The hash value [khint_t]
+ */
+#define kh_str_hash_func(key) __ac_X31_hash_string(key)
+/*! @function
+  @abstract     Const char* comparison function
+ */
+#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
+
+static kh_inline khint_t __ac_Wang_hash(khint_t key)
+{
+    key += ~(key << 15);
+    key ^=  (key >> 10);
+    key +=  (key << 3);
+    key ^=  (key >> 6);
+    key += ~(key << 11);
+    key ^=  (key >> 16);
+    return key;
+}
+#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
+
+/* --- END OF HASH FUNCTIONS --- */
+
+/* Other convenient macros... */
+
+/*!
+  @abstract Type of the hash table.
+  @param  name  Name of the hash table [symbol]
+ */
+#define khash_t(name) kh_##name##_t
+
+/*! @function
+  @abstract     Initiate a hash table.
+  @param  name  Name of the hash table [symbol]
+  @return       Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_init(name) kh_init_##name()
+
+/*! @function
+  @abstract     Destroy a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_destroy(name, h) kh_destroy_##name(h)
+
+/*! @function
+  @abstract     Reset a hash table without deallocating memory.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_clear(name, h) kh_clear_##name(h)
+
+/*! @function
+  @abstract     Resize a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  s     New size [khint_t]
+ */
+#define kh_resize(name, h, s) kh_resize_##name(h, s)
+
+/*! @function
+  @abstract     Insert a key to the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @param  r     Extra return code: -1 if the operation failed;
+                0 if the key is present in the hash table;
+                1 if the bucket is empty (never used); 2 if the element in
+				the bucket has been deleted [int*]
+  @return       Iterator to the inserted element [khint_t]
+ */
+#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
+
+/*! @function
+  @abstract     Retrieve a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @return       Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
+ */
+#define kh_get(name, h, k) kh_get_##name(h, k)
+
+/*! @function
+  @abstract     Remove a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Iterator to the element to be deleted [khint_t]
+ */
+#define kh_del(name, h, k) kh_del_##name(h, k)
+
+/*! @function
+  @abstract     Test whether a bucket contains data.
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
+
+/*! @function
+  @abstract     Get key given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x])
+
+/*! @function
+  @abstract     Get value given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Value [type of values]
+  @discussion   For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Alias of kh_val()
+ */
+#define kh_value(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Get the start iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+  @abstract     Get the end iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The end iterator [khint_t]
+ */
+#define kh_end(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Get the number of elements in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->size)
+
+/*! @function
+  @abstract     Get the number of buckets in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of buckets in the hash table [khint_t]
+ */
+#define kh_n_buckets(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Iterate over the entries in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  kvar  Variable to which key will be assigned
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach(h, kvar, vvar, code) { khint_t __i;		\
+	for (__i = kh_begin(h); __i != kh_end(h); ++__i) {		\
+		if (!kh_exist(h,__i)) continue;						\
+		(kvar) = kh_key(h,__i);								\
+		(vvar) = kh_val(h,__i);								\
+		code;												\
+	} }
+
+/*! @function
+  @abstract     Iterate over the values in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach_value(h, vvar, code) { khint_t __i;		\
+	for (__i = kh_begin(h); __i != kh_end(h); ++__i) {		\
+		if (!kh_exist(h,__i)) continue;						\
+		(vvar) = kh_val(h,__i);								\
+		code;												\
+	} }
+
+/* More conenient interfaces */
+
+/*! @function
+  @abstract     Instantiate a hash set containing integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT(name)										\
+	KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT(name, khval_t)								\
+	KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT64(name)										\
+	KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT64(name, khval_t)								\
+	KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
+
+typedef const char *kh_cstr_t;
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_STR(name)										\
+	KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_STR(name, khval_t)								\
+	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+
+#endif /* __AC_KHASH_H */
diff --git a/kseq.h b/kseq.h
new file mode 100644
index 0000000..6b98e51
--- /dev/null
+++ b/kseq.h
@@ -0,0 +1,248 @@
+/* The MIT License
+
+   Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Last Modified: 05MAR2012 */
+
+#ifndef AC_KSEQ_H
+#define AC_KSEQ_H
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
+#define KS_SEP_TAB   1 // isspace() && !' '
+#define KS_SEP_LINE  2 // line separator: "\n" (Unix) or "\r\n" (Windows)
+#define KS_SEP_MAX   2
+
+#define __KS_TYPE(type_t) \
+	typedef struct __kstream_t { \
+		int begin, end; \
+		int is_eof:2, bufsize:30; \
+		type_t f; \
+		unsigned char *buf; \
+	} kstream_t;
+
+#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
+#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
+
+#define __KS_BASIC(SCOPE, type_t, __bufsize) \
+	SCOPE kstream_t *ks_init(type_t f) \
+	{ \
+		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
+		ks->f = f; ks->bufsize = __bufsize; \
+		ks->buf = (unsigned char*)malloc(__bufsize); \
+		return ks; \
+	} \
+	SCOPE void ks_destroy(kstream_t *ks) \
+	{ \
+		if (!ks) return; \
+		free(ks->buf); \
+		free(ks); \
+	}
+
+#define __KS_INLINED(__read) \
+	static inline int ks_getc(kstream_t *ks) \
+	{ \
+		if (ks->is_eof && ks->begin >= ks->end) return -1; \
+		if (ks->begin >= ks->end) { \
+			ks->begin = 0; \
+			ks->end = __read(ks->f, ks->buf, ks->bufsize); \
+			if (ks->end < ks->bufsize) ks->is_eof = 1; \
+			if (ks->end == 0) return -1; \
+		} \
+		return (int)ks->buf[ks->begin++]; \
+	} \
+	static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
+	{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	unsigned l, m;
+	char *s;
+} kstring_t;
+#endif
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#define __KS_GETUNTIL(SCOPE, __read) \
+	SCOPE int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append)  \
+	{ \
+		if (dret) *dret = 0; \
+		str->l = append? str->l : 0; \
+		if (ks->begin >= ks->end && ks->is_eof) return -1; \
+		for (;;) { \
+			int i; \
+			if (ks->begin >= ks->end) { \
+				if (!ks->is_eof) { \
+					ks->begin = 0; \
+					ks->end = __read(ks->f, ks->buf, ks->bufsize); \
+					if (ks->end < ks->bufsize) ks->is_eof = 1; \
+					if (ks->end == 0) break; \
+				} else break; \
+			} \
+			if (delimiter == KS_SEP_LINE) {  \
+				for (i = ks->begin; i < ks->end; ++i)  \
+					if (ks->buf[i] == '\n') break; \
+			} else if (delimiter > KS_SEP_MAX) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (ks->buf[i] == delimiter) break; \
+			} else if (delimiter == KS_SEP_SPACE) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (isspace(ks->buf[i])) break; \
+			} else if (delimiter == KS_SEP_TAB) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break;  \
+			} else i = 0; /* never come to here! */ \
+			if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
+				str->m = str->l + (i - ks->begin) + 1; \
+				kroundup32(str->m); \
+				str->s = (char*)realloc(str->s, str->m); \
+			} \
+			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin);  \
+			str->l = str->l + (i - ks->begin); \
+			ks->begin = i + 1; \
+			if (i < ks->end) { \
+				if (dret) *dret = ks->buf[i]; \
+				break; \
+			} \
+		} \
+		if (str->s == 0) { \
+			str->m = 1; \
+			str->s = (char*)calloc(1, 1); \
+		} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
+		str->s[str->l] = '\0';											\
+		return str->l; \
+	}
+
+#define KSTREAM_INIT2(SCOPE, type_t, __read, __bufsize) \
+	__KS_TYPE(type_t) \
+	__KS_BASIC(SCOPE, type_t, __bufsize) \
+	__KS_GETUNTIL(SCOPE, __read) \
+	__KS_INLINED(__read)
+
+#define KSTREAM_INIT(type_t, __read, __bufsize) KSTREAM_INIT2(static, type_t, __read, __bufsize)
+
+#define KSTREAM_DECLARE(type_t, __read) \
+	__KS_TYPE(type_t) \
+	extern int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append); \
+	extern kstream_t *ks_init(type_t f); \
+	extern void ks_destroy(kstream_t *ks); \
+	__KS_INLINED(__read)
+
+/******************
+ * FASTA/Q parser *
+ ******************/
+
+#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
+
+#define __KSEQ_BASIC(SCOPE, type_t)										\
+	SCOPE kseq_t *kseq_init(type_t fd)									\
+	{																	\
+		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));					\
+		s->f = ks_init(fd);												\
+		return s;														\
+	}																	\
+	SCOPE void kseq_destroy(kseq_t *ks)									\
+	{																	\
+		if (!ks) return;												\
+		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
+		ks_destroy(ks->f);												\
+		free(ks);														\
+	}
+
+/* Return value:
+   >=0  length of the sequence (normal)
+   -1   end-of-file
+   -2   truncated quality string
+ */
+#define __KSEQ_READ(SCOPE) \
+	SCOPE int kseq_read(kseq_t *seq) \
+	{ \
+		int c; \
+		kstream_t *ks = seq->f; \
+		if (seq->last_char == 0) { /* then jump to the next header line */ \
+			while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
+			if (c == -1) return -1; /* end of file */ \
+			seq->last_char = c; \
+		} /* else: the first header char has been read in the previous call */ \
+		seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
+		if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
+		if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
+		if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
+			seq->seq.m = 256; \
+			seq->seq.s = (char*)malloc(seq->seq.m); \
+		} \
+		while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+			if (c == '\n') continue; /* skip empty lines */ \
+			seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
+			ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
+		} \
+		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */	\
+		if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
+			seq->seq.m = seq->seq.l + 2; \
+			kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
+			seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
+		} \
+		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */ \
+		if (c != '+') return seq->seq.l; /* FASTA */ \
+		if (seq->qual.m < seq->seq.m) {	/* allocate memory for qual in case insufficient */ \
+			seq->qual.m = seq->seq.m; \
+			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
+		} \
+		while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
+		if (c == -1) return -2; /* error: no quality string */ \
+		while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
+		seq->last_char = 0;	/* we have not come to the next header line */ \
+		if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
+		return seq->seq.l; \
+	}
+
+#define __KSEQ_TYPE(type_t)						\
+	typedef struct {							\
+		kstring_t name, comment, seq, qual;		\
+		int last_char;							\
+		kstream_t *f;							\
+	} kseq_t;
+
+#define KSEQ_INIT2(SCOPE, type_t, __read)		\
+	KSTREAM_INIT2(SCOPE, type_t, __read, 16384)	\
+	__KSEQ_TYPE(type_t)							\
+	__KSEQ_BASIC(SCOPE, type_t)					\
+	__KSEQ_READ(SCOPE)
+
+#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
+
+#define KSEQ_DECLARE(type_t) \
+	__KS_TYPE(type_t) \
+	__KSEQ_TYPE(type_t) \
+	extern kseq_t *kseq_init(type_t fd); \
+	void kseq_destroy(kseq_t *ks); \
+	int kseq_read(kseq_t *seq);
+
+#endif
diff --git a/ksort.h b/ksort.h
new file mode 100644
index 0000000..0190f50
--- /dev/null
+++ b/ksort.h
@@ -0,0 +1,159 @@
+/* The MIT License
+
+   Copyright (c) 2008, 2011 Attractive Chaos <attractor at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+// This is a simplified version of ksort.h
+
+#ifndef AC_KSORT_H
+#define AC_KSORT_H
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+	void *left, *right;
+	int depth;
+} ks_isort_stack_t;
+
+#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
+
+#define KSORT_INIT(name, type_t, __sort_lt)								\
+	size_t ks_lis_##name(size_t n, const type_t *a, size_t *b, size_t *_p) \
+	{ /* translated from: http://www.algorithmist.com/index.php/Longest_Increasing_Subsequence.cpp */ \
+		size_t i, u, v, *top = b, *p; \
+		if (n == 0) return 0; \
+		p = _p? _p : (size_t*)malloc(n * sizeof(size_t)); \
+		*top++ = 0; \
+		for (i = 1; i < n; i++) { \
+			if (__sort_lt(a[*(top-1)], a[i])) { \
+				p[i] = *(top-1); \
+				*top++ = i; \
+				continue; \
+			} \
+			for (u = 0, v = top - b - 1; u < v;) { \
+				size_t c = (u + v) >> 1; \
+				if (__sort_lt(a[b[c]], a[i])) u = c + 1; \
+				else v = c; \
+			} \
+			if (__sort_lt(a[i], a[b[u]])) { \
+				if (u > 0) p[i] = b[u-1]; \
+				b[u] = i; \
+			} \
+		} \
+		for (u = top - b, v = *(top-1); u--; v = p[v]) b[u] = v; \
+		if (!_p) free(p); \
+		return top - b; \
+	} \
+	type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)			\
+	{																	\
+		type_t *low, *high, *k, *ll, *hh, *mid;							\
+		low = arr; high = arr + n - 1; k = arr + kk;					\
+		for (;;) {														\
+			if (high <= low) return *k;									\
+			if (high == low + 1) {										\
+				if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
+				return *k;												\
+			}															\
+			mid = low + (high - low) / 2;								\
+			if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
+			if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
+			if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);	\
+			KSORT_SWAP(type_t, *mid, *(low+1));							\
+			ll = low + 1; hh = high;									\
+			for (;;) {													\
+				do ++ll; while (__sort_lt(*ll, *low));					\
+				do --hh; while (__sort_lt(*low, *hh));					\
+				if (hh < ll) break;										\
+				KSORT_SWAP(type_t, *ll, *hh);							\
+			}															\
+			KSORT_SWAP(type_t, *low, *hh);								\
+			if (hh <= k) low = ll;										\
+			if (hh >= k) high = hh - 1;									\
+		}																\
+	}																	\
+
+#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
+
+#define ks_lt_generic(a, b) ((a) < (b))
+#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
+
+typedef const char *ksstr_t;
+
+#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
+#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
+
+#define RS_MIN_SIZE 64
+
+#define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \
+	typedef struct { \
+		rstype_t *b, *e; \
+	} rsbucket_##name##_t; \
+	void rs_insertsort_##name(rstype_t *beg, rstype_t *end) \
+	{ \
+		rstype_t *i; \
+		for (i = beg + 1; i < end; ++i) \
+			if (rskey(*i) < rskey(*(i - 1))) { \
+				rstype_t *j, tmp = *i; \
+				for (j = i; j > beg && rskey(tmp) < rskey(*(j-1)); --j) \
+					*j = *(j - 1); \
+				*j = tmp; \
+			} \
+	} \
+	void rs_sort_##name(rstype_t *beg, rstype_t *end, int n_bits, int s) \
+	{ \
+		rstype_t *i; \
+		int size = 1<<n_bits, m = size - 1; \
+		rsbucket_##name##_t *k, b[size], *be = b + size; \
+		for (k = b; k != be; ++k) k->b = k->e = beg; \
+		for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \
+		for (k = b + 1; k != be; ++k) \
+			k->e += (k-1)->e - beg, k->b = (k-1)->e; \
+		for (k = b; k != be;) { \
+			if (k->b != k->e) { \
+				rsbucket_##name##_t *l; \
+				if ((l = b + (rskey(*k->b)>>s&m)) != k) { \
+					rstype_t tmp = *k->b, swap; \
+					do { \
+						swap = tmp; tmp = *l->b; *l->b++ = swap; \
+						l = b + (rskey(tmp)>>s&m); \
+					} while (l != k); \
+					*k->b++ = tmp; \
+				} else ++k->b; \
+			} else ++k; \
+		} \
+		for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e; \
+		if (s) { \
+			s = s > n_bits? s - n_bits : 0; \
+			for (k = b; k != be; ++k) \
+				if (k->e - k->b > RS_MIN_SIZE) rs_sort_##name(k->b, k->e, n_bits, s); \
+				else if (k->e - k->b > 1) rs_insertsort_##name(k->b, k->e); \
+		} \
+	} \
+	void radix_sort_##name(rstype_t *beg, rstype_t *end) \
+	{ \
+		if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \
+		else rs_sort_##name(beg, end, 8, sizeof_key * 8 - 8); \
+	}
+
+#endif
diff --git a/kthread.c b/kthread.c
new file mode 100644
index 0000000..1b13494
--- /dev/null
+++ b/kthread.c
@@ -0,0 +1,146 @@
+#include <pthread.h>
+#include <stdlib.h>
+#include <limits.h>
+
+/************
+ * kt_for() *
+ ************/
+
+struct kt_for_t;
+
+typedef struct {
+	struct kt_for_t *t;
+	long i;
+} ktf_worker_t;
+
+typedef struct kt_for_t {
+	int n_threads;
+	long n;
+	ktf_worker_t *w;
+	void (*func)(void*,long,int);
+	void *data;
+} kt_for_t;
+
+static inline long steal_work(kt_for_t *t)
+{
+	int i, min_i = -1;
+	long k, min = LONG_MAX;
+	for (i = 0; i < t->n_threads; ++i)
+		if (min > t->w[i].i) min = t->w[i].i, min_i = i;
+	k = __sync_fetch_and_add(&t->w[min_i].i, t->n_threads);
+	return k >= t->n? -1 : k;
+}
+
+static void *ktf_worker(void *data)
+{
+	ktf_worker_t *w = (ktf_worker_t*)data;
+	long i;
+	for (;;) {
+		i = __sync_fetch_and_add(&w->i, w->t->n_threads);
+		if (i >= w->t->n) break;
+		w->t->func(w->t->data, i, w - w->t->w);
+	}
+	while ((i = steal_work(w->t)) >= 0)
+		w->t->func(w->t->data, i, w - w->t->w);
+	pthread_exit(0);
+}
+
+void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n)
+{
+	int i;
+	kt_for_t t;
+	pthread_t *tid;
+	t.func = func, t.data = data, t.n_threads = n_threads, t.n = n;
+	t.w = (ktf_worker_t*)alloca(n_threads * sizeof(ktf_worker_t));
+	tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t));
+	for (i = 0; i < n_threads; ++i)
+		t.w[i].t = &t, t.w[i].i = i;
+	for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktf_worker, &t.w[i]);
+	for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
+}
+
+/*****************
+ * kt_pipeline() *
+ *****************/
+
+struct ktp_t;
+
+typedef struct {
+	struct ktp_t *pl;
+	int64_t index;
+	int step;
+	void *data;
+} ktp_worker_t;
+
+typedef struct ktp_t {
+	void *shared;
+	void *(*func)(void*, int, void*);
+	int64_t index;
+	int n_workers, n_steps;
+	ktp_worker_t *workers;
+	pthread_mutex_t mutex;
+	pthread_cond_t cv;
+} ktp_t;
+
+static void *ktp_worker(void *data)
+{
+	ktp_worker_t *w = (ktp_worker_t*)data;
+	ktp_t *p = w->pl;
+	while (w->step < p->n_steps) {
+		// test whether we can kick off the job with this worker
+		pthread_mutex_lock(&p->mutex);
+		for (;;) {
+			int i;
+			// test whether another worker is doing the same step
+			for (i = 0; i < p->n_workers; ++i) {
+				if (w == &p->workers[i]) continue; // ignore itself
+				if (p->workers[i].step <= w->step && p->workers[i].index < w->index)
+					break;
+			}
+			if (i == p->n_workers) break; // no workers with smaller indices are doing w->step or the previous steps
+			pthread_cond_wait(&p->cv, &p->mutex);
+		}
+		pthread_mutex_unlock(&p->mutex);
+
+		// working on w->step
+		w->data = p->func(p->shared, w->step, w->step? w->data : 0); // for the first step, input is NULL
+
+		// update step and let other workers know
+		pthread_mutex_lock(&p->mutex);
+		w->step = w->step == p->n_steps - 1 || w->data? (w->step + 1) % p->n_steps : p->n_steps;
+		if (w->step == 0) w->index = p->index++;
+		pthread_cond_broadcast(&p->cv);
+		pthread_mutex_unlock(&p->mutex);
+	}
+	pthread_exit(0);
+}
+
+void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps)
+{
+	ktp_t aux;
+	pthread_t *tid;
+	int i;
+
+	if (n_threads < 1) n_threads = 1;
+	aux.n_workers = n_threads;
+	aux.n_steps = n_steps;
+	aux.func = func;
+	aux.shared = shared_data;
+	aux.index = 0;
+	pthread_mutex_init(&aux.mutex, 0);
+	pthread_cond_init(&aux.cv, 0);
+
+	aux.workers = (ktp_worker_t*)alloca(n_threads * sizeof(ktp_worker_t));
+	for (i = 0; i < n_threads; ++i) {
+		ktp_worker_t *w = &aux.workers[i];
+		w->step = 0; w->pl = &aux; w->data = 0;
+		w->index = aux.index++;
+	}
+
+	tid = (pthread_t*)alloca(n_threads * sizeof(pthread_t));
+	for (i = 0; i < n_threads; ++i) pthread_create(&tid[i], 0, ktp_worker, &aux.workers[i]);
+	for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
+
+	pthread_mutex_destroy(&aux.mutex);
+	pthread_cond_destroy(&aux.cv);
+}
diff --git a/kvec.h b/kvec.h
new file mode 100644
index 0000000..632fce4
--- /dev/null
+++ b/kvec.h
@@ -0,0 +1,110 @@
+/* The MIT License
+
+   Copyright (c) 2008, by Attractive Chaos <attractor at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  An example:
+
+#include "kvec.h"
+int main() {
+	kvec_t(int) array;
+	kv_init(array);
+	kv_push(int, array, 10); // append
+	kv_a(int, array, 20) = 5; // dynamic
+	kv_A(array, 20) = 4; // static
+	kv_destroy(array);
+	return 0;
+}
+*/
+
+/*
+  2008-09-22 (0.1.0):
+
+	* The initial version.
+
+*/
+
+#ifndef AC_KVEC_H
+#define AC_KVEC_H
+
+#include <stdlib.h>
+
+#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+
+#define kvec_t(type) struct { size_t n, m; type *a; }
+#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
+#define kv_destroy(v) free((v).a)
+#define kv_A(v, i) ((v).a[(i)])
+#define kv_pop(v) ((v).a[--(v).n])
+#define kv_size(v) ((v).n)
+#define kv_max(v) ((v).m)
+
+#define kv_resize(type, v, s) do { \
+		if ((v).m < (s)) { \
+			(v).m = (s); \
+			kv_roundup32((v).m); \
+			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
+		} \
+	} while (0)
+
+#define kv_copy(type, v1, v0) do {							\
+		if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n);	\
+		(v1).n = (v0).n;									\
+		memcpy((v1).a, (v0).a, sizeof(type) * (v0).n);		\
+	} while (0)												\
+
+#define kv_push(type, v, x) do {									\
+		if ((v).n == (v).m) {										\
+			(v).m = (v).m? (v).m<<1 : 2;							\
+			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m);	\
+		}															\
+		(v).a[(v).n++] = (x);										\
+	} while (0)
+
+#define kv_pushp(type, v, p) do { \
+		if ((v).n == (v).m) { \
+			(v).m = (v).m? (v).m<<1 : 2; \
+			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
+		} \
+		*(p) = &(v).a[(v).n++]; \
+	} while (0)
+
+#define kv_a(type, v, i) ((v).m <= (size_t)(i)?						\
+						  ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
+						   (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
+						  : (v).n <= (size_t)(i)? (v).n = (i)			\
+						  : 0), (v).a[(i)]
+
+#define kv_reverse(type, v, start) do { \
+		if ((v).m > 0 && (v).n > (start)) { \
+			size_t __i, __end = (v).n - (start); \
+			type *__a = (v).a + (start); \
+			for (__i = 0; __i < __end>>1; ++__i) { \
+				type __t = __a[__end - 1 - __i]; \
+				__a[__end - 1 - __i] = __a[__i]; __a[__i] = __t; \
+			} \
+		} \
+	} while (0)
+
+#endif
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..d2060be
--- /dev/null
+++ b/main.c
@@ -0,0 +1,145 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include "minimap.h"
+
+#define MM_VERSION "0.2-r123"
+
+void liftrlimit()
+{
+#ifdef __linux__
+	struct rlimit r;
+	getrlimit(RLIMIT_AS, &r);
+	r.rlim_cur = r.rlim_max;
+	setrlimit(RLIMIT_AS, &r);
+#endif
+}
+
+int main(int argc, char *argv[])
+{
+	mm_mapopt_t opt;
+	int i, c, k = 15, w = -1, b = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx = 0;
+	int tbatch_size = 100000000;
+	uint64_t ibatch_size = 4000000000ULL;
+	float f = 0.001;
+	bseq_file_t *fp = 0;
+	char *fnw = 0;
+	FILE *fpr = 0, *fpw = 0;
+
+	liftrlimit();
+	mm_realtime0 = realtime();
+	mm_mapopt_init(&opt);
+
+	while ((c = getopt(argc, argv, "w:k:B:b:t:r:c:f:Vv:NOg:I:d:lRPST:m:L:Dx:")) >= 0) {
+		if (c == 'w') w = atoi(optarg);
+		else if (c == 'k') k = atoi(optarg);
+		else if (c == 'b') b = atoi(optarg);
+		else if (c == 'r') opt.radius = atoi(optarg);
+		else if (c == 'c') opt.min_cnt = atoi(optarg);
+		else if (c == 'm') opt.merge_frac = atof(optarg);
+		else if (c == 'f') f = atof(optarg);
+		else if (c == 't') n_threads = atoi(optarg);
+		else if (c == 'v') mm_verbose = atoi(optarg);
+		else if (c == 'g') opt.max_gap = atoi(optarg);
+		else if (c == 'N') keep_name = 0;
+		else if (c == 'd') fnw = optarg;
+		else if (c == 'l') is_idx = 1;
+		else if (c == 'R') opt.flag |= MM_F_WITH_REP;
+		else if (c == 'P') opt.flag &= ~MM_F_WITH_REP;
+		else if (c == 'D') opt.flag |= MM_F_NO_SELF;
+		else if (c == 'O') opt.flag |= MM_F_NO_ISO;
+		else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
+		else if (c == 'T') opt.sdust_thres = atoi(optarg);
+		else if (c == 'L') opt.min_match = atoi(optarg);
+		else if (c == 'V') {
+			puts(MM_VERSION);
+			return 0;
+		} else if (c == 'B' || c == 'I') {
+			double x;
+			char *p;
+			x = strtod(optarg, &p);
+			if (*p == 'G' || *p == 'g') x *= 1e9;
+			else if (*p == 'M' || *p == 'm') x *= 1e6;
+			else if (*p == 'K' || *p == 'k') x *= 1e3;
+			if (c == 'B') tbatch_size = (uint64_t)(x + .499);
+			else ibatch_size = (uint64_t)(x + .499);
+		} else if (c == 'x') {
+			if (strcmp(optarg, "ava10k") == 0) {
+				opt.flag |= MM_F_AVA | MM_F_NO_SELF;
+				opt.min_match = 100;
+				opt.merge_frac = 0.0;
+				w = 5;
+			}
+		}
+	}
+	if (w < 0) w = (int)(.6666667 * k + .499);
+
+	if (argc == optind) {
+		fprintf(stderr, "Usage: minimap [options] <target.fa> [query.fa] [...]\n");
+		fprintf(stderr, "Options:\n");
+		fprintf(stderr, "  Indexing:\n");
+		fprintf(stderr, "    -k INT     k-mer size [%d]\n", k);
+		fprintf(stderr, "    -w INT     minizer window size [{-k}*2/3]\n");
+		fprintf(stderr, "    -I NUM     split index for every ~NUM input bases [4G]\n");
+		fprintf(stderr, "    -d FILE    dump index to FILE []\n");
+		fprintf(stderr, "    -l         the 1st argument is a index file (overriding -k, -w and -I)\n");
+//		fprintf(stderr, "    -b INT     bucket bits [%d]\n", b); // most users would care about this
+		fprintf(stderr, "  Mapping:\n");
+		fprintf(stderr, "    -f FLOAT   filter out top FLOAT fraction of repetitive minimizers [%.3f]\n", f);
+		fprintf(stderr, "    -r INT     bandwidth [%d]\n", opt.radius);
+		fprintf(stderr, "    -m FLOAT   merge two chains if FLOAT fraction of minimizers are shared [%.2f]\n", opt.merge_frac);
+		fprintf(stderr, "    -c INT     retain a mapping if it consists of >=INT minimizers [%d]\n", opt.min_cnt);
+		fprintf(stderr, "    -L INT     min matching length [%d]\n", opt.min_match);
+		fprintf(stderr, "    -g INT     split a mapping if there is a gap longer than INT [%d]\n", opt.max_gap);
+		fprintf(stderr, "    -T INT     SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres);
+//		fprintf(stderr, "    -D         skip self mappings but keep dual mappings\n"); // too confusing to expose to end users
+		fprintf(stderr, "    -S         skip self and dual mappings\n");
+		fprintf(stderr, "    -O         drop isolated hits before chaining (EXPERIMENTAL)\n");
+		fprintf(stderr, "    -P         filtering potential repeats after mapping (EXPERIMENTAL)\n");
+//		fprintf(stderr, "    -R         skip post-mapping repeat filtering\n"); // deprecated option for backward compatibility
+		fprintf(stderr, "    -x STR     preset (recommended to be applied before other options) []\n");
+		fprintf(stderr, "               ava10k: -Sw5 -L100 -m0 (PacBio/ONT all-vs-all read mapping)\n");
+		fprintf(stderr, "  Input/Output:\n");
+		fprintf(stderr, "    -t INT     number of threads [%d]\n", n_threads);
+//		fprintf(stderr, "    -B NUM     process ~NUM bp in each batch [100M]\n");
+//		fprintf(stderr, "    -v INT     verbose level [%d]\n", mm_verbose);
+//		fprintf(stderr, "    -N         use integer as target names\n");
+		fprintf(stderr, "    -V         show version number\n");
+		fprintf(stderr, "\nSee minimap.1 for detailed description of the command-line options.\n");
+		return 1;
+	}
+
+	if (is_idx) fpr = fopen(argv[optind], "rb");
+	else fp = bseq_open(argv[optind]);
+	if (fnw) fpw = fopen(fnw, "wb");
+	for (;;) {
+		mm_idx_t *mi = 0;
+		if (fpr) mi = mm_idx_load(fpr);
+		else if (!bseq_eof(fp))
+			mi = mm_idx_gen(fp, w, k, b, tbatch_size, n_threads, ibatch_size, keep_name);
+		if (mi == 0) break;
+		if (mm_verbose >= 3)
+			fprintf(stderr, "[M::%s::%.3f*%.2f] loaded/built the index for %d target sequence(s)\n",
+					__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n);
+		mm_idx_set_max_occ(mi, f);
+		if (mm_verbose >= 3)
+			fprintf(stderr, "[M::%s] max occurrences of a minimizer to consider: %d\n", __func__, mi->max_occ);
+		if (fpw) mm_idx_dump(fpw, mi);
+		for (i = optind + 1; i < argc; ++i)
+			mm_map_file(mi, argv[i], &opt, n_threads, tbatch_size);
+		mm_idx_destroy(mi);
+	}
+	if (fpw) fclose(fpw);
+	if (fpr) fclose(fpr);
+	if (fp)  bseq_close(fp);
+
+	fprintf(stderr, "[M::%s] Version: %s\n", __func__, MM_VERSION);
+	fprintf(stderr, "[M::%s] CMD:", __func__);
+	for (i = 0; i < argc; ++i)
+		fprintf(stderr, " %s", argv[i]);
+	fprintf(stderr, "\n[M::%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, realtime() - mm_realtime0, cputime());
+	return 0;
+}
diff --git a/map.c b/map.c
new file mode 100644
index 0000000..0c68595
--- /dev/null
+++ b/map.c
@@ -0,0 +1,374 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "bseq.h"
+#include "kvec.h"
+#include "minimap.h"
+#include "sdust.h"
+
+void mm_mapopt_init(mm_mapopt_t *opt)
+{
+	opt->radius = 500;
+	opt->max_gap = 10000;
+	opt->min_cnt = 4;
+	opt->min_match = 40;
+	opt->sdust_thres = 0;
+	opt->flag = MM_F_WITH_REP;
+	opt->merge_frac = .5;
+}
+
+/****************************
+ * Find approxiate mappings *
+ ****************************/
+
+struct mm_tbuf_s { // per-thread buffer
+	mm128_v mini; // query minimizers
+	mm128_v coef; // Hough transform coefficient
+	mm128_v intv; // intervals on sorted coef
+	uint32_v reg2mini;
+	uint32_v rep_aux;
+	sdust_buf_t *sdb;
+	// the following are for computing LIS
+	uint32_t n, m;
+	uint64_t *a;
+	size_t *b, *p;
+	// final output
+	kvec_t(mm_reg1_t) reg;
+};
+
+mm_tbuf_t *mm_tbuf_init()
+{
+	mm_tbuf_t *b;
+	b = (mm_tbuf_t*)calloc(1, sizeof(mm_tbuf_t));
+	b->sdb = sdust_buf_init();
+	return b;
+}
+
+void mm_tbuf_destroy(mm_tbuf_t *b)
+{
+	if (b == 0) return;
+	free(b->mini.a); free(b->coef.a); free(b->intv.a); free(b->reg.a); free(b->reg2mini.a); free(b->rep_aux.a);
+	free(b->a); free(b->b); free(b->p);
+	sdust_buf_destroy(b->sdb);
+	free(b);
+}
+
+#include "ksort.h"
+#define sort_key_64(a) (a)
+KRADIX_SORT_INIT(64, uint64_t, sort_key_64, 8) 
+#define lt_low32(a, b) ((uint32_t)(a) < (uint32_t)(b))
+KSORT_INIT(low32lt, uint64_t, lt_low32)
+#define gt_low32(a, b) ((uint32_t)(a) > (uint32_t)(b))
+KSORT_INIT(low32gt, uint64_t, gt_low32)
+
+/* TODO: drop_rep() is not robust. For all-vs-all mapping but without the -S
+ * flag, all minimizers have at least one hit. The _thres_ computed below will
+ * be highly skewed. Some improvements need to be made. */
+
+static void drop_rep(mm_tbuf_t *b, int min_cnt)
+{
+	int i, j, n, m;
+	uint32_t thres;
+	b->rep_aux.n = 0;
+	for (i = 0; i < b->mini.n; ++i)
+		if (b->mini.a[i].y>>32)
+			kv_push(uint32_t, b->rep_aux, b->mini.a[i].y>>32);
+	if (b->rep_aux.n < 3) return;
+	thres = (uint32_t)(ks_ksmall_uint32_t(b->rep_aux.n, b->rep_aux.a, b->rep_aux.n>>1) * MM_DEREP_Q50 + .499);
+	for (i = n = m = 0; i < b->reg.n; ++i) {
+		int cnt = 0, all_cnt = b->reg.a[i].cnt;
+		for (j = 0; j < all_cnt; ++j)
+			if (b->mini.a[b->reg2mini.a[m + j]].y>>32 <= thres)
+				++cnt;
+		if (cnt >= min_cnt)
+			b->reg.a[n++] = b->reg.a[i];
+		m += all_cnt;
+	}
+//	printf("%ld=>%d\t%d\n", b->reg.n, n, thres);
+	b->reg.n = n;
+}
+
+static void proc_intv(mm_tbuf_t *b, int which, int k, int min_cnt, int max_gap)
+{
+	int i, j, l_lis, rid = -1, rev = 0, start = b->intv.a[which].y, end = start + b->intv.a[which].x;
+
+	// make room for arrays needed by LIS (longest increasing sequence)
+	if (end - start > b->m) {
+		b->m = end - start;
+		kv_roundup32(b->m);
+		b->a = (uint64_t*)realloc(b->a, b->m * 8);
+		b->b = (size_t*)realloc(b->b, b->m * sizeof(size_t));
+		b->p = (size_t*)realloc(b->p, b->m * sizeof(size_t));
+	}
+
+	// prepare the input array _a_ for LIS
+	b->n = 0;
+	for (i = start; i < end; ++i)
+		if (b->coef.a[i].x != UINT64_MAX)
+			b->a[b->n++] = b->coef.a[i].y, rid = b->coef.a[i].x << 1 >> 33, rev = b->coef.a[i].x >> 63;
+	if (b->n < min_cnt) return;
+	radix_sort_64(b->a, b->a + b->n);
+
+	// find the longest increasing sequence
+	l_lis = rev? ks_lis_low32gt(b->n, b->a, b->b, b->p) : ks_lis_low32lt(b->n, b->a, b->b, b->p); // LIS
+	if (l_lis < min_cnt) return;
+	for (i = 1, j = 1; i < l_lis; ++i) // squeeze out minimizaers reused in the LIS sequence
+		if (b->a[b->b[i]]>>32 != b->a[b->b[i-1]]>>32)
+			b->a[b->b[j++]] = b->a[b->b[i]];
+	l_lis = j;
+	if (l_lis < min_cnt) return;
+
+	// convert LISes to regions; possibly break an LIS at a long gaps
+	for (i = 1, start = 0; i <= l_lis; ++i) {
+		int32_t qgap = i == l_lis? 0 : ((uint32_t)b->mini.a[b->a[b->b[i]]>>32].y>>1) - ((uint32_t)b->mini.a[b->a[b->b[i-1]]>>32].y>>1);
+		if (i == l_lis || (qgap > max_gap && abs((int32_t)b->a[b->b[i]] - (int32_t)b->a[b->b[i-1]]) > max_gap)) {
+			if (i - start >= min_cnt) {
+				uint32_t lq = 0, lr = 0, eq = 0, er = 0, sq = 0, sr = 0;
+				mm_reg1_t *r;
+				kv_pushp(mm_reg1_t, b->reg, &r);
+				r->rid = rid, r->rev = rev, r->cnt = i - start, r->rep = 0;
+				r->qs = ((uint32_t)b->mini.a[b->a[b->b[start]]>>32].y>>1) - (k - 1);
+				r->qe = ((uint32_t)b->mini.a[b->a[b->b[i-1]]>>32].y>>1) + 1;
+				r->rs = rev? (uint32_t)b->a[b->b[i-1]] : (uint32_t)b->a[b->b[start]];
+				r->re = rev? (uint32_t)b->a[b->b[start]] : (uint32_t)b->a[b->b[i-1]];
+				r->rs -= k - 1;
+				r->re += 1;
+				for (j = start; j < i; ++j) { // count the number of times each minimizer is used
+					int jj = b->a[b->b[j]]>>32;
+					b->mini.a[jj].y += 1ULL<<32;
+					kv_push(uint32_t, b->reg2mini, jj); // keep minimizer<=>reg mapping for derep
+				}
+				for (j = start; j < i; ++j) { // compute ->len
+					uint32_t q = ((uint32_t)b->mini.a[b->a[b->b[j]]>>32].y>>1) - (k - 1);
+					uint32_t r = (uint32_t)b->a[b->b[j]];
+					r = !rev? r - (k - 1) : (0x80000000U - r);
+					if (r > er) lr += er - sr, sr = r, er = sr + k;
+					else er = r + k;
+					if (q > eq) lq += eq - sq, sq = q, eq = sq + k;
+					else eq = q + k;
+				}
+				lr += er - sr, lq += eq - sq;
+				r->len = lr < lq? lr : lq;
+			}
+			start = i;
+		}
+	}
+}
+
+// merge or add a Hough interval; only used by get_reg()
+static inline void push_intv(mm128_v *intv, int start, int end, float merge_frac)
+{
+	mm128_t *p;
+	if (intv->n > 0) { // test overlap
+		int last_start, last_end, min;
+		p = &intv->a[intv->n-1];
+		last_start = p->y, last_end = p->x + last_start;
+		min = end - start < last_end - last_start? end - start : last_end - last_start;
+		if (last_end > start && last_end - start > min * merge_frac) { // large overlap; then merge
+			p->x = end - last_start;
+			return;
+		}
+	}
+	kv_pushp(mm128_t, *intv, &p); // a new interval
+	p->x = end - start, p->y = start;
+}
+
+// find mapping regions from a list of minimizer hits
+static void get_reg(mm_tbuf_t *b, int radius, int k, int min_cnt, int max_gap, float merge_frac, int flag)
+{
+	const uint64_t v_kept = ~(1ULL<<31), v_dropped = 1ULL<<31;
+	mm128_v *c = &b->coef;
+	int i, j, start = 0, iso_dist = radius * 2;
+
+	if (c->n < min_cnt) return;
+
+	// drop isolated minimizer hits
+	if (flag&MM_F_NO_ISO) {
+		for (i = 0; i < c->n; ++i) c->a[i].y |= v_dropped;
+		for (i = 1; i < c->n; ++i) {
+			uint64_t x = c->a[i].x;
+			int32_t rpos = (uint32_t)c->a[i].y;
+			for (j = i - 1; j >= 0 && x - c->a[j].x < radius; --j) {
+				int32_t y = c->a[j].y;
+				if (abs(y - rpos) < iso_dist) {
+					c->a[i].y &= v_kept, c->a[j].y &= v_kept;
+					break;
+				}
+			}
+		}
+		for (i = j = 0; i < c->n; ++i) // squeeze out hits still marked as v_dropped
+			if ((c->a[i].y&v_dropped) == 0)
+				c->a[j++] = c->a[i];
+		c->n = j;
+	}
+
+	// identify (possibly overlapping) intervals within _radius_; an interval is a cluster of hits
+	b->intv.n = 0;
+	for (i = 1; i < c->n; ++i) {
+		if (c->a[i].x - c->a[start].x > radius) {
+			if (i - start >= min_cnt) push_intv(&b->intv, start, i, merge_frac);
+			for (++start; start < i && c->a[i].x - c->a[start].x > radius; ++start);
+		}
+	}
+	if (i - start >= min_cnt) push_intv(&b->intv, start, i, merge_frac);
+
+	// sort by the size of the interval
+	radix_sort_128x(b->intv.a, b->intv.a + b->intv.n);
+
+	// generate hits, starting from the largest interval
+	b->reg2mini.n = 0;
+	for (i = b->intv.n - 1; i >= 0; --i) proc_intv(b, i, k, min_cnt, max_gap);
+
+	// post repeat removal
+	if (!(flag&MM_F_WITH_REP)) drop_rep(b, min_cnt);
+}
+
+const mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name)
+{
+	int j, n_dreg = 0, u = 0;
+	const uint64_t *dreg = 0;
+
+	b->mini.n = b->coef.n = 0;
+	mm_sketch(seq, l_seq, mi->w, mi->k, 0, &b->mini);
+	if (opt->sdust_thres > 0)
+		dreg = sdust_core((const uint8_t*)seq, l_seq, opt->sdust_thres, 64, &n_dreg, b->sdb);
+	for (j = 0; j < b->mini.n; ++j) {
+		int k, n;
+		const uint64_t *r;
+		int32_t qpos = (uint32_t)b->mini.a[j].y>>1, strand = b->mini.a[j].y&1;
+		b->mini.a[j].y = b->mini.a[j].y<<32>>32; // clear the rid field
+		if (dreg && n_dreg) { // test complexity
+			int s = qpos - (mi->k - 1), e = s + mi->k;
+			while (u < n_dreg && (uint32_t)dreg[u] <= s) ++u;
+			if (u < n_dreg && dreg[u]>>32 < e) {
+				int v, l = 0;
+				for (v = u; v < n_dreg && dreg[v]>>32 < e; ++v) { // iterate over LCRs overlapping this minimizer
+					int ss = s > dreg[v]>>32? s : dreg[v]>>32;
+					int ee = e < (uint32_t)dreg[v]? e : (uint32_t)dreg[v];
+					l += ee - ss;
+				}
+				if (l > mi->k>>1) continue;
+			}
+		}
+		r = mm_idx_get(mi, b->mini.a[j].x, &n);
+		if (n > mi->max_occ) continue;
+		for (k = 0; k < n; ++k) {
+			int32_t rpos = (uint32_t)r[k] >> 1;
+			mm128_t *p;
+			if (name && (opt->flag&MM_F_NO_SELF) && mi->name && strcmp(name, mi->name[r[k]>>32]) == 0 && rpos == qpos)
+				continue;
+			if (name && (opt->flag&MM_F_AVA) && mi->name && strcmp(name, mi->name[r[k]>>32]) > 0)
+				continue;
+			kv_pushp(mm128_t, b->coef, &p);
+			if ((r[k]&1) == strand) { // forward strand
+				p->x = (uint64_t)r[k] >> 32 << 32 | (0x80000000U + rpos - qpos);
+				p->y = (uint64_t)j << 32 | rpos;
+			} else { // reverse strand
+				p->x = (uint64_t)r[k] >> 32 << 32 | (rpos + qpos) | 1ULL<<63;
+				p->y = (uint64_t)j << 32 | rpos;
+			}
+		}
+	}
+	radix_sort_128x(b->coef.a, b->coef.a + b->coef.n);
+	b->reg.n = 0;
+	get_reg(b, opt->radius, mi->k, opt->min_cnt, opt->max_gap, opt->merge_frac, opt->flag);
+	*n_regs = b->reg.n;
+	return b->reg.a;
+}
+
+/**************************
+ * Multi-threaded mapping *
+ **************************/
+
+void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n);
+void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps);
+
+typedef struct {
+	int batch_size, n_processed, n_threads;
+	const mm_mapopt_t *opt;
+	bseq_file_t *fp;
+	const mm_idx_t *mi;
+} pipeline_t;
+
+typedef struct {
+	const pipeline_t *p;
+    int n_seq;
+	bseq1_t *seq;
+	int *n_reg;
+	mm_reg1_t **reg;
+	mm_tbuf_t **buf;
+} step_t;
+
+static void worker_for(void *_data, long i, int tid) // kt_for() callback
+{
+    step_t *step = (step_t*)_data;
+	const mm_reg1_t *regs;
+	int n_regs;
+
+	regs = mm_map(step->p->mi, step->seq[i].l_seq, step->seq[i].seq, &n_regs, step->buf[tid], step->p->opt, step->seq[i].name);
+	step->n_reg[i] = n_regs;
+	if (n_regs > 0) {
+		step->reg[i] = (mm_reg1_t*)malloc(n_regs * sizeof(mm_reg1_t));
+		memcpy(step->reg[i], regs, n_regs * sizeof(mm_reg1_t));
+	}
+}
+
+static void *worker_pipeline(void *shared, int step, void *in)
+{
+	int i, j;
+    pipeline_t *p = (pipeline_t*)shared;
+    if (step == 0) { // step 0: read sequences
+        step_t *s;
+        s = (step_t*)calloc(1, sizeof(step_t));
+		s->seq = bseq_read(p->fp, p->batch_size, &s->n_seq);
+		if (s->seq) {
+			s->p = p;
+			for (i = 0; i < s->n_seq; ++i)
+				s->seq[i].rid = p->n_processed++;
+			s->buf = (mm_tbuf_t**)calloc(p->n_threads, sizeof(mm_tbuf_t*));
+			for (i = 0; i < p->n_threads; ++i)
+				s->buf[i] = mm_tbuf_init();
+			s->n_reg = (int*)calloc(s->n_seq, sizeof(int));
+			s->reg = (mm_reg1_t**)calloc(s->n_seq, sizeof(mm_reg1_t**));
+			return s;
+		} else free(s);
+    } else if (step == 1) { // step 1: map
+		kt_for(p->n_threads, worker_for, in, ((step_t*)in)->n_seq);
+		return in;
+    } else if (step == 2) { // step 2: output
+        step_t *s = (step_t*)in;
+		const mm_idx_t *mi = p->mi;
+		for (i = 0; i < p->n_threads; ++i) mm_tbuf_destroy(s->buf[i]);
+		free(s->buf);
+		for (i = 0; i < s->n_seq; ++i) {
+			bseq1_t *t = &s->seq[i];
+			for (j = 0; j < s->n_reg[i]; ++j) {
+				mm_reg1_t *r = &s->reg[i][j];
+				if (r->len < p->opt->min_match) continue;
+				printf("%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]);
+				if (mi->name) fputs(mi->name[r->rid], stdout);
+				else printf("%d", r->rid + 1);
+				printf("\t%d\t%d\t%d\t%d\t%d\t255\tcm:i:%d\n", mi->len[r->rid], r->rs, r->re, r->len,
+						r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs, r->cnt);
+			}
+			free(s->reg[i]);
+			free(s->seq[i].seq); free(s->seq[i].name);
+		}
+		free(s->reg); free(s->n_reg); free(s->seq);
+		free(s);
+	}
+    return 0;
+}
+
+int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads, int tbatch_size)
+{
+	pipeline_t pl;
+	memset(&pl, 0, sizeof(pipeline_t));
+	pl.fp = bseq_open(fn);
+	if (pl.fp == 0) return -1;
+	pl.opt = opt, pl.mi = idx;
+	pl.n_threads = n_threads, pl.batch_size = tbatch_size;
+	kt_pipeline(n_threads == 1? 1 : 2, worker_pipeline, &pl, 3);
+	bseq_close(pl.fp);
+	return 0;
+}
diff --git a/minimap.1 b/minimap.1
new file mode 100644
index 0000000..03886fb
--- /dev/null
+++ b/minimap.1
@@ -0,0 +1,222 @@
+.TH minimap 1 "06 December 2015" "minimap-0.2" "Bioinformatics tools"
+
+.SH NAME
+.PP
+minimap - fast mapping between long DNA sequences
+
+.SH SYNOPSIS
+.PP
+minimap
+.RB [ -lSOV ]
+.RB [ -k
+.IR kmer ]
+.RB [ -w
+.IR winSize ]
+.RB [ -I
+.IR batchSize ]
+.RB [ -d
+.IR dumpFile ]
+.RB [ -f
+.IR occThres ]
+.RB [ -r
+.IR bandWidth ]
+.RB [ -m
+.IR minShared ]
+.RB [ -c
+.IR minCount ]
+.RB [ -L
+.IR minMatch ]
+.RB [ -g
+.IR maxGap ]
+.RB [ -T
+.IR dustThres ]
+.RB [ -t
+.IR nThreads ]
+.RB [ -x
+.IR preset ]
+.I target.fa
+.I query.fa
+>
+.I output.paf
+
+.SH DESCRIPTION
+.PP
+Minimap is a tool to efficiently find multiple approximate mapping positions
+between two sets of long sequences, such as between reads and reference
+genomes, between genomes and between long noisy reads. Minimap has an indexing
+and a mapping phase. In the indexing phase, it collects all minimizers of a
+large batch of target sequences in a hash table; in the mapping phase, it
+identifies good clusters of colinear minimizer hits. Minimap does not generate
+detailed alignments between the target and the query sequences. It only outputs
+the approximate start and the end coordinates of these clusters.
+
+.SH OPTIONS
+
+.SS Indexing options
+
+.TP 10
+.BI -k \ INT
+Minimizer k-mer length [15]
+
+.TP
+.BI -w \ INT
+Minimizer window size [2/3 of k-mer length]. A minimizer is the smallest k-mer
+in a window of w consecutive k-mers.
+
+.TP
+.BI -I \ NUM
+Load at most
+.I NUM
+target bases into RAM for indexing [4G]. If there are more than
+.I NUM
+bases in
+.IR target.fa ,
+minimap needs to read
+.I query.fa
+multiple times to map it against each batch of target sequences.
+.I NUM
+may be ending with k/K/m/M/g/G.
+
+.TP
+.BI -d \ FILE
+Dump minimizer index to
+.I FILE
+[no dump]
+
+.TP
+.B -l
+Indicate that
+.I target.fa
+is in fact a minimizer index generated by option
+.BR -d ,
+not a FASTA or FASTQ file.
+
+.SS Mapping options
+
+.TP 10
+.BI -f \ FLOAT
+Ignore top
+.I FLOAT
+fraction of most occurring minimizers [0.001]
+
+.TP
+.BI -r \ INT
+Approximate bandwidth for initial minimizer hits clustering [500]. A
+.I minimizer hit
+is a minimizer present in both the target and query sequences. A
+.I minimizer hit cluster
+is a group of potentially colinear minimizer hits between a target and a query
+sequence.
+
+.TP
+.BI -m \ FLOAT
+Merge initial minimizer hit clusters if
+.I FLOAT
+or higher fraction of minimizers are shared between the clusters [0.5]
+
+.TP
+.BI -c \ INT
+Retain a minimizer hit cluster if it contains
+.I INT
+or more minimizer hits [4]
+
+.TP
+.BI -L \ INT
+Discard a minimizer hit cluster if after colinearization, the number of matching bases is below
+.I INT
+[40]. This option mainly reduces the size of output. It has little effect on
+the speed and peak memory.
+
+.TP
+.BI -g \ INT
+Split a minimizer hit cluster at a gap
+.IR INT -bp
+or longer that does not contain any minimizer hits [10000]
+
+.TP
+.BI -T \ INT
+Mask regions on query sequences with SDUST score threshold
+.IR INT ;
+0 to disable [0]. SDUST is an algorithm
+to identify low-complexity subsequences. It is not enabled by default. If SDUST
+is preferred, a value between 20 and 25 is recommended. A higher threshold masks
+less sequences.
+
+.TP
+.B -S
+Perform all-vs-all mapping. In this mode, if the query sequence name is
+lexicographically larger than the target sequence name, the hits between them
+will be suppressed; if the query sequence name is the same as the target name,
+diagonal minimizer hits will also be suppressed.
+
+.TP
+.B -O
+Drop a minimizer hit if it is far away from other hits (EXPERIMENTAL). This
+option is useful for mapping long chromosomes from two diverged species.
+
+.TP
+.BI -x \ STR
+Changing multiple settings based on
+.I STR
+[not set]. It is recommended to apply this option before other options, such
+that the following options may override the multiple settings modified by this
+option.
+
+.RS
+.TP 8
+.B ava10k
+for PacBio or Oxford Nanopore all-vs-all read mapping (-Sw5 -L100 -m0).
+.RE
+
+.SS Input/output options
+
+.TP 10
+.BI -t \ INT
+Number of threads [3]. Minimap uses at most three threads when collecting
+minimizers on target sequences, and uses up to 
+.IR INT +1
+threads when mapping (the extra thread is for I/O, which is frequently idle and
+takes little CPU time).
+
+.TP
+.B -V
+Print version number to stdout
+
+.SH OUTPUT FORMAT
+
+.PP
+Minimap outputs mapping positions in the Pairwise mApping Format (PAF). PAF is
+a TAB-delimited text format with each line consisting of at least 12 fields as
+are described in the following table:
+
+.TS
+center box;
+cb | cb | cb
+r | c | l .
+Col	Type	Description
+_
+1	string	Query sequence name
+2	int	Query sequence length
+3	int	Query start coordinate (0-based)
+4	int	Query end coordinate (0-based)
+5	char	`+' if query and target on the same strand; `-' if opposite
+6	string	Target sequence name
+7	int	Target sequence length
+8	int	Target start coordinate on the original strand
+9	int	Target end coordinate on the original strand
+10	int	Number of matching bases in the mapping
+11	int	Number bases, including gaps, in the mapping
+12	int	Mapping quality (0-255 with 255 for missing)
+.TE
+
+.PP
+When the alignment is available, column 11 gives the total number of sequence
+matches, mismatches and gaps in the alignment; column 10 divided by column 11
+gives the alignment identity. As minimap does not generate detailed alignment,
+these two columns are approximate. PAF may optionally have additional fields in
+the SAM-like typed key-value format. Minimap writes the number of minimizer
+hits in a cluster to the cm tag.
+
+.SH SEE ALSO
+.PP
+miniasm(1)
diff --git a/minimap.h b/minimap.h
new file mode 100644
index 0000000..84cd159
--- /dev/null
+++ b/minimap.h
@@ -0,0 +1,104 @@
+#ifndef MINIMAP_H
+#define MINIMAP_H
+
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include "bseq.h"
+
+#define MM_IDX_DEF_B    14
+#define MM_DEREP_Q50    5.0
+
+#define MM_F_WITH_REP  0x1
+#define MM_F_NO_SELF   0x2
+#define MM_F_NO_ISO    0x4
+#define MM_F_AVA       0x8
+
+typedef struct {
+ 	uint64_t x, y;
+} mm128_t;
+
+typedef struct { size_t n, m; mm128_t *a; } mm128_v;
+typedef struct { size_t n, m; uint64_t *a; } uint64_v;
+typedef struct { size_t n, m; uint32_t *a; } uint32_v;
+
+typedef struct {
+	mm128_v a;   // (minimizer, position) array
+	int32_t n;   // size of the _p_ array
+	uint64_t *p; // position array for minimizers appearing >1 times
+	void *h;     // hash table indexing _p_ and minimizers appearing once
+} mm_idx_bucket_t;
+
+typedef struct {
+	int b, w, k;
+	uint32_t n;  // number of reference sequences
+	mm_idx_bucket_t *B;
+	uint32_t max_occ;
+	float freq_thres;
+	int32_t *len;    // length of each reference sequence
+	char **name; // TODO: if this uses too much RAM, switch one concatenated string
+} mm_idx_t;
+
+typedef struct {
+	uint32_t cnt:31, rev:1;
+	uint32_t rid:31, rep:1;
+	uint32_t len;
+	int32_t qs, qe, rs, re;
+} mm_reg1_t;
+
+typedef struct {
+	int radius;  // bandwidth to cluster hits
+	int max_gap; // break a chain if there are no minimizers in a max_gap window
+	int min_cnt; // minimum number of minimizers to start a chain
+	int min_match;
+	int sdust_thres;  // score threshold for SDUST; 0 to disable
+	int flag;    // see MM_F_* macros
+	float merge_frac; // merge two chains if merge_frac fraction of minimzers are shared between the chains
+} mm_mapopt_t;
+
+extern int mm_verbose;
+extern double mm_realtime0;
+
+struct mm_tbuf_s;
+typedef struct mm_tbuf_s mm_tbuf_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// compute minimizers
+void mm_sketch(const char *str, int len, int w, int k, uint32_t rid, mm128_v *p);
+
+// minimizer indexing
+mm_idx_t *mm_idx_init(int w, int k, int b);
+void mm_idx_destroy(mm_idx_t *mi);
+mm_idx_t *mm_idx_gen(bseq_file_t *fp, int w, int k, int b, int tbatch_size, int n_threads, uint64_t ibatch_size, int keep_name);
+void mm_idx_set_max_occ(mm_idx_t *mi, float f);
+const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
+
+mm_idx_t *mm_idx_build(const char *fn, int w, int k, int n_threads);
+
+// minimizer index I/O
+void mm_idx_dump(FILE *fp, const mm_idx_t *mi);
+mm_idx_t *mm_idx_load(FILE *fp);
+
+// mapping
+void mm_mapopt_init(mm_mapopt_t *opt);
+mm_tbuf_t *mm_tbuf_init(void);
+void mm_tbuf_destroy(mm_tbuf_t *b);
+const mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name);
+
+int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads, int tbatch_size);
+
+// private functions (may be moved to a "mmpriv.h" in future)
+double cputime(void);
+double realtime(void);
+void radix_sort_128x(mm128_t *beg, mm128_t *end);
+void radix_sort_64(uint64_t *beg, uint64_t *end);
+uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/misc.c b/misc.c
new file mode 100644
index 0000000..3f8bf52
--- /dev/null
+++ b/misc.c
@@ -0,0 +1,26 @@
+#include <sys/resource.h>
+#include <sys/time.h>
+#include "minimap.h"
+
+int mm_verbose = 3;
+double mm_realtime0;
+
+double cputime()
+{
+	struct rusage r;
+	getrusage(RUSAGE_SELF, &r);
+	return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec);
+}
+
+double realtime()
+{
+	struct timeval tp;
+	struct timezone tzp;
+	gettimeofday(&tp, &tzp);
+	return tp.tv_sec + tp.tv_usec * 1e-6;
+}
+
+#include "ksort.h"
+#define sort_key_128x(a) ((a).x)
+KRADIX_SORT_INIT(128x, mm128_t, sort_key_128x, 8) 
+KSORT_INIT_GENERIC(uint32_t)
diff --git a/sdust.c b/sdust.c
new file mode 100644
index 0000000..e6032c3
--- /dev/null
+++ b/sdust.c
@@ -0,0 +1,209 @@
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include "kdq.h"
+#include "kvec.h"
+#include "sdust.h"
+
+#define SD_WLEN 3
+#define SD_WTOT (1<<(SD_WLEN<<1))
+#define SD_WMSK (SD_WTOT - 1)
+
+typedef struct {
+	int start, finish;
+	int r, l;
+} perf_intv_t;
+
+typedef kvec_t(perf_intv_t) perf_intv_v;
+typedef kvec_t(uint64_t) uint64_v;
+
+KDQ_INIT(int)
+
+#if defined(_NO_NT4_TBL) || defined(_SDUST_MAIN)
+unsigned char seq_nt4_table[256] = {
+	0, 1, 2, 3,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+#else
+extern unsigned char seq_nt4_table[256];
+#endif
+
+struct sdust_buf_s {
+	kdq_t(int) *w;
+	perf_intv_v P; // the list of perfect intervals for the current window, sorted by descending start and then by ascending finish
+	uint64_v res;  // the result
+};
+
+sdust_buf_t *sdust_buf_init(void)
+{
+	sdust_buf_t *buf;
+	buf = (sdust_buf_t*)calloc(1, sizeof(sdust_buf_t));
+	buf->w = kdq_init(int);
+	return buf;
+}
+
+void sdust_buf_destroy(sdust_buf_t *buf)
+{
+	if (buf == 0) return;
+	kdq_destroy(int, buf->w);
+	free(buf->P.a); free(buf->res.a);
+	free(buf);
+}
+
+static inline void shift_window(int t, kdq_t(int) *w, int T, int W, int *L, int *rw, int *rv, int *cw, int *cv)
+{
+	int s;
+	if (kdq_size(w) >= W - SD_WLEN + 1) { // TODO: is this right for SD_WLEN!=3?
+		s = *kdq_shift(int, w);
+		*rw -= --cw[s];
+		if (*L > kdq_size(w))
+			--*L, *rv -= --cv[s];
+	}
+	kdq_push(int, w, t);
+	++*L;
+	*rw += cw[t]++;
+	*rv += cv[t]++;
+	if (cv[t] * 10 > T<<1) {
+		do {
+			s = kdq_at(w, kdq_size(w) - *L);
+			*rv -= --cv[s];
+			--*L;
+		} while (s != t);
+	}
+}
+
+static inline void save_masked_regions(uint64_v *res, perf_intv_v *P, int start)
+{
+	int i, saved = 0;
+	perf_intv_t *p;
+	if (P->n == 0 || P->a[P->n - 1].start >= start) return;
+	p = &P->a[P->n - 1];
+	if (res->n) {
+		int s = res->a[res->n - 1]>>32, f = (uint32_t)res->a[res->n - 1];
+		if (p->start <= f) // if overlapping with or adjacent to the previous interval
+			saved = 1, res->a[res->n - 1] = (uint64_t)s<<32 | (f > p->finish? f : p->finish);
+	}
+	if (!saved) kv_push(uint64_t, *res, (uint64_t)p->start<<32|p->finish);
+	for (i = P->n - 1; i >= 0 && P->a[i].start < start; --i); // remove perfect intervals that have falled out of the window
+	P->n = i + 1;
+}
+
+static void find_perfect(perf_intv_v *P, const kdq_t(int) *w, int T, int start, int L, int rv, const int *cv)
+{
+	int c[SD_WTOT], r = rv, i, max_r = 0, max_l = 0;
+	memcpy(c, cv, SD_WTOT * sizeof(int));
+	for (i = (long)kdq_size(w) - L - 1; i >= 0; --i) {
+		int j, t = kdq_at(w, i), new_r, new_l;
+		r += c[t]++;
+		new_r = r, new_l = kdq_size(w) - i - 1;
+		if (new_r * 10 > T * new_l) {
+			for (j = 0; j < P->n && P->a[j].start >= i + start; ++j) { // find insertion position
+				perf_intv_t *p = &P->a[j];
+				if (max_r == 0 || p->r * max_l > max_r * p->l)
+					max_r = p->r, max_l = p->l;
+			}
+			if (max_r == 0 || new_r * max_l >= max_r * new_l) { // then insert
+				max_r = new_r, max_l = new_l;
+				if (P->n == P->m) kv_resize(perf_intv_t, *P, P->n + 1);
+				memmove(&P->a[j+1], &P->a[j], (P->n - j) * sizeof(perf_intv_t)); // make room
+				++P->n;
+				P->a[j].start = i + start, P->a[j].finish = kdq_size(w) + (SD_WLEN - 1) + start;
+				P->a[j].r = new_r, P->a[j].l = new_l;
+			}
+		}
+	}
+}
+
+const uint64_t *sdust_core(const uint8_t *seq, int l_seq, int T, int W, int *n, sdust_buf_t *buf)
+{
+	int rv = 0, rw = 0, L = 0, cv[SD_WTOT], cw[SD_WTOT];
+	int i, start, l; // _start_: start of the current window; _l_: length of a contiguous A/C/G/T (sub)sequence
+	unsigned t; // current word
+
+	buf->P.n = buf->res.n = 0;
+	buf->w->front = buf->w->count = 0;
+	memset(cv, 0, SD_WTOT * sizeof(int));
+	memset(cw, 0, SD_WTOT * sizeof(int));
+	if (l_seq < 0) l_seq = strlen((const char*)seq);
+	for (i = l = t = 0; i <= l_seq; ++i) {
+		int b = i < l_seq? seq_nt4_table[seq[i]] : 4;
+		if (b < 4) { // an A/C/G/T base
+			++l, t = (t<<2 | b) & SD_WMSK;
+			if (l >= SD_WLEN) { // we have seen a word
+				start = (l - W > 0? l - W : 0) + (i + 1 - l); // set the start of the current window
+				save_masked_regions(&buf->res, &buf->P, start); // save intervals falling out of the current window?
+				shift_window(t, buf->w, T, W, &L, &rw, &rv, cw, cv);
+				if (rw * 10 > L * T)
+					find_perfect(&buf->P, buf->w, T, start, L, rv, cv);
+			}
+		} else { // N or the end of sequence; N effectively breaks input into pieces of independent sequences
+			start = (l - W + 1 > 0? l - W + 1 : 0) + (i + 1 - l);
+			while (buf->P.n) save_masked_regions(&buf->res, &buf->P, start++); // clear up unsaved perfect intervals
+			l = t = 0;
+		}
+	}
+	*n = buf->res.n;
+	return buf->res.a;
+}
+
+uint64_t *sdust(const uint8_t *seq, int l_seq, int T, int W, int *n)
+{
+	uint64_t *ret;
+	sdust_buf_t *buf;
+	buf = sdust_buf_init();
+	ret = (uint64_t*)sdust_core(seq, l_seq, T, W, n, buf);
+	buf->res.a = 0;
+	sdust_buf_destroy(buf);
+	return ret;
+}
+
+#ifdef _SDUST_MAIN
+#include <zlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+int main(int argc, char *argv[])
+{
+	gzFile fp;
+	kseq_t *ks;
+	int W = 64, T = 20, c;
+
+	while ((c = getopt(argc, argv, "w:t:")) >= 0) {
+		if (c == 'w') W = atoi(optarg);
+		else if (c == 't') T = atoi(optarg);
+	}
+	if (optind == argc) {
+		fprintf(stderr, "Usage: sdust [-w %d] [-t %d] <in.fa>\n", W, T);
+		return 1;
+	}
+	fp = strcmp(argv[optind], "-")? gzopen(argv[optind], "r") : gzdopen(fileno(stdin), "r");
+	ks = kseq_init(fp);
+	while (kseq_read(ks) >= 0) {
+		uint64_t *r;
+		int i, n;
+		r = sdust((uint8_t*)ks->seq.s, -1, T, W, &n);
+		for (i = 0; i < n; ++i)
+			printf("%s\t%d\t%d\n", ks->name.s, (int)(r[i]>>32), (int)r[i]);
+		free(r);
+	}
+	kseq_destroy(ks);
+	gzclose(fp);
+	return 0;
+}
+#endif
diff --git a/sdust.h b/sdust.h
new file mode 100644
index 0000000..52a13ff
--- /dev/null
+++ b/sdust.h
@@ -0,0 +1,23 @@
+#ifndef SDUST_H
+#define SDUST_H
+
+struct sdust_buf_s;
+typedef struct sdust_buf_s sdust_buf_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// the simple interface
+uint64_t *sdust(const uint8_t *seq, int l_seq, int T, int W, int *n);
+
+// the following interface dramatically reduce heap allocations when sdust is frequently called.
+sdust_buf_t *sdust_buf_init(void);
+void sdust_buf_destroy(sdust_buf_t *buf);
+const uint64_t *sdust_core(const uint8_t *seq, int l_seq, int T, int W, int *n, sdust_buf_t *buf);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/sketch.c b/sketch.c
new file mode 100644
index 0000000..04fbb4c
--- /dev/null
+++ b/sketch.c
@@ -0,0 +1,102 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "kvec.h"
+#include "minimap.h"
+
+unsigned char seq_nt4_table[256] = {
+	0, 1, 2, 3,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+
+static inline uint64_t hash64(uint64_t key, uint64_t mask)
+{
+	key = (~key + (key << 21)) & mask; // key = (key << 21) - key - 1;
+	key = key ^ key >> 24;
+	key = ((key + (key << 3)) + (key << 8)) & mask; // key * 265
+	key = key ^ key >> 14;
+	key = ((key + (key << 2)) + (key << 4)) & mask; // key * 21
+	key = key ^ key >> 28;
+	key = (key + (key << 31)) & mask;
+	return key;
+}
+
+/**
+ * Find symmetric (w,k)-minimizers on a DNA sequence
+ *
+ * @param str    DNA sequence
+ * @param len    length of $str
+ * @param w      find a minimizer for every $w consecutive k-mers
+ * @param k      k-mer size
+ * @param rid    reference ID; will be copied to the output $p array
+ * @param p      minimizers; p->a[i].x is the 2k-bit hash value;
+ *               p->a[i].y = rid<<32 | lastPos<<1 | strand
+ *               where lastPos is the position of the last base of the i-th minimizer,
+ *               and strand indicates whether the minimizer comes from the top or the bottom strand.
+ *               Callers may want to set "p->n = 0"; otherwise results are appended to p
+ */
+void mm_sketch(const char *str, int len, int w, int k, uint32_t rid, mm128_v *p)
+{
+	uint64_t shift1 = 2 * (k - 1), mask = (1ULL<<2*k) - 1, kmer[2] = {0,0};
+	int i, j, l, buf_pos, min_pos;
+	mm128_t *buf, min = { UINT64_MAX, UINT64_MAX };
+
+	assert(len > 0 && w > 0 && k > 0);
+	buf = (mm128_t*)alloca(w * 16);
+	memset(buf, 0xff, w * 16);
+
+	for (i = l = buf_pos = min_pos = 0; i < len; ++i) {
+		int c = seq_nt4_table[(uint8_t)str[i]];
+		mm128_t info = { UINT64_MAX, UINT64_MAX };
+		if (c < 4) { // not an ambiguous base
+			int z;
+			kmer[0] = (kmer[0] << 2 | c) & mask;           // forward k-mer
+			kmer[1] = (kmer[1] >> 2) | (3ULL^c) << shift1; // reverse k-mer
+			if (kmer[0] == kmer[1]) continue; // skip "symmetric k-mers" as we don't know it strand
+			z = kmer[0] < kmer[1]? 0 : 1; // strand
+			if (++l >= k)
+				info.x = hash64(kmer[z], mask), info.y = (uint64_t)rid<<32 | (uint32_t)i<<1 | z;
+		} else l = 0;
+		buf[buf_pos] = info; // need to do this here as appropriate buf_pos and buf[buf_pos] are needed below
+		if (l == w + k - 1) { // special case for the first window - because identical k-mers are not stored yet
+			for (j = buf_pos + 1; j < w; ++j)
+				if (min.x == buf[j].x && buf[j].y != min.y) kv_push(mm128_t, *p, buf[j]);
+			for (j = 0; j < buf_pos; ++j)
+				if (min.x == buf[j].x && buf[j].y != min.y) kv_push(mm128_t, *p, buf[j]);
+		}
+		if (info.x <= min.x) { // a new minimum; then write the old min
+			if (l >= w + k) kv_push(mm128_t, *p, min);
+			min = info, min_pos = buf_pos;
+		} else if (buf_pos == min_pos) { // old min has moved outside the window
+			if (l >= w + k - 1) kv_push(mm128_t, *p, min);
+			for (j = buf_pos + 1, min.x = UINT64_MAX; j < w; ++j) // the two loops are necessary when there are identical k-mers
+				if (min.x >= buf[j].x) min = buf[j], min_pos = j; // >= is important s.t. min is always the closest k-mer
+			for (j = 0; j <= buf_pos; ++j)
+				if (min.x >= buf[j].x) min = buf[j], min_pos = j;
+			if (l >= w + k - 1) { // write identical k-mers
+				for (j = buf_pos + 1; j < w; ++j) // these two loops make sure the output is sorted
+					if (min.x == buf[j].x && min.y != buf[j].y) kv_push(mm128_t, *p, buf[j]);
+				for (j = 0; j <= buf_pos; ++j)
+					if (min.x == buf[j].x && min.y != buf[j].y) kv_push(mm128_t, *p, buf[j]);
+			}
+		}
+		if (++buf_pos == w) buf_pos = 0;
+	}
+	if (min.x != UINT64_MAX)
+		kv_push(mm128_t, *p, min);
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/minimap.git