[med-svn] [miniasm] 01/01: Imported Upstream version 0.2

Fri Jan 15 20:01:08 UTC 2016

This is an automated email from the git hooks/post-receive script.

sascha-guest pushed a commit to branch master
in repository miniasm.

commit beec1c3d63dde7f6fedbc5c23b471e6094ef31e8
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date:   Thu Jan 14 17:00:00 2016 +0000

    Imported Upstream version 0.2
---
 .gitignore                |    4 +
 LICENSE.txt               |   23 +
 Makefile                  |   37 ++
 PAF.md                    |   28 +
 README.md                 |  107 ++++
 asg.c                     |  433 +++++++++++++++
 asg.h                     |   79 +++
 asm.c                     |  286 ++++++++++
 common.c                  |   24 +
 dotter.c                  |  197 +++++++
 eps.h                     |   49 ++
 hit.c                     |  217 ++++++++
 kdq.h                     |  128 +++++
 khash.h                   |  627 ++++++++++++++++++++++
 kseq.h                    |  256 +++++++++
 ksort.h                   |  185 +++++++
 kvec.h                    |  110 ++++
 main.c                    |  202 +++++++
 miniasm.1                 |  239 +++++++++
 miniasm.h                 |  107 ++++
 misc/da2paf.pl            |   43 ++
 misc/demo-ecoli-pacbio.sh |   26 +
 misc/demo-worm-pacbio.sh  |   56 ++
 misc/mhap2paf.pl          |   23 +
 misc/paf2mhap.pl          |   35 ++
 misc/sam2paf.js           |   51 ++
 paf.c                     |   67 +++
 paf.h                     |   38 ++
 sdict.c                   |   86 +++
 sdict.h                   |   31 ++
 sys.c                     |   46 ++
 sys.h                     |   17 +
 tex/Makefile              |   18 +
 tex/bioinfo.cls           |  927 ++++++++++++++++++++++++++++++++
 tex/ce.pdf                |  Bin 0 -> 95345 bytes
 tex/diagrams.graffle      |  908 ++++++++++++++++++++++++++++++++
 tex/miniasm.bib           |  236 +++++++++
 tex/miniasm.tex           |  807 ++++++++++++++++++++++++++++
 tex/natbib.bst            | 1288 +++++++++++++++++++++++++++++++++++++++++++++
 tex/natbib.sty            |  803 ++++++++++++++++++++++++++++
 tex/overhang.pdf          |  Bin 0 -> 22703 bytes
 41 files changed, 8844 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8698ab5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.*.swp
+*.o
+*.a
+Makefile.bak
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..beb04ef
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,23 @@
+The MIT License
+
+Copyright (c) 2015 Broad Institute
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..d649289
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,37 @@
+CC=			gcc
+CFLAGS=		-g -Wall -O2 -Wc++-compat -Wno-unused-function
+CPPFLAGS=
+INCLUDES=	-I.
+OBJS=		sys.o sdict.o paf.o asg.o common.o hit.o asm.o
+PROG=		miniasm minidot
+LIBS=		-lm -lz -lpthread
+
+.SUFFIXES:.c .o
+
+.c.o:
+		$(CC) -c $(CFLAGS) $(CPPFLAGS) $(INCLUDES) $< -o $@
+
+all:$(PROG)
+
+miniasm:$(OBJS) main.o
+		$(CC) $(CFLAGS) $^ -o $@ $(LIBS)
+
+minidot:paf.o sdict.o dotter.o
+		$(CC) $(CFLAGS) $^ -o $@ $(LIBS)
+
+clean:
+		rm -fr gmon.out *.o a.out $(PROG) *~ *.a *.dSYM session*
+
+depend:
+		(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c)
+
+# DO NOT DELETE
+
+asg.o: asg.h kvec.h ksort.h
+asm.o: miniasm.h sdict.h asg.h kvec.h kdq.h kseq.h
+common.o: miniasm.h sdict.h asg.h
+dotter.o: paf.h sdict.h kvec.h eps.h ksort.h
+hit.o: sdict.h paf.h kvec.h sys.h miniasm.h asg.h ksort.h
+main.o: kvec.h sys.h paf.h sdict.h miniasm.h asg.h
+paf.o: paf.h kseq.h
+sdict.o: sdict.h khash.h
diff --git a/PAF.md b/PAF.md
new file mode 100644
index 0000000..cb6ddfa
--- /dev/null
+++ b/PAF.md
@@ -0,0 +1,28 @@
+## PAF: a Pairwise mApping Format
+
+PAF is a text format describing the approximate mapping positions between two
+set of sequences. PAF is TAB-delimited with each line consisting of the
+following predefined fields:
+
+|Col|Type  |Description                               |
+|--:|:----:|:-----------------------------------------|
+|1  |string|Query sequence name                       |
+|2  |int   |Query sequence length                     |
+|3  |int   |Query start (0-based)                     |
+|4  |int   |Query end (0-based)                       |
+|5  |char  |Relative strand: "+" or "-"               |
+|6  |string|Target sequence name                      |
+|7  |int   |Target sequence length                    |
+|8  |int   |Target start on original strand (0-based) |
+|9  |int   |Target end on original strand (0-based)   |
+|10 |int   |Number of residue matches                 |
+|11 |int   |Alignment block length                    |
+|12 |int   |Mapping quality (0-255; 255 for missing)  |
+
+If PAF is generated from an alignment, column 10 equals the number of sequence
+matches, and column 11 equals the total number of sequence matches, mismatches,
+insertions and deletions in the alignment. If alignment is not available,
+column 10 and 11 are still required but can be approximate.
+
+A PAF file may optionally contain SAM-like typed key-value pairs at the end of
+each line.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9a1a395
--- /dev/null
+++ b/README.md
@@ -0,0 +1,107 @@
+*Warning: since r104, miniasm only works with minimap-r122 or later*
+
+## Getting Started
+
+```sh
+# Download sample PacBio from the PBcR website
+wget -O- http://www.cbcb.umd.edu/software/PBcR/data/selfSampleData.tar.gz | tar zxf -
+ln -s selfSampleData/pacbio_filtered.fastq reads.fq
+# Install minimap and miniasm (requiring gcc and zlib)
+git clone https://github.com/lh3/minimap && (cd minimap && make)
+git clone https://github.com/lh3/miniasm && (cd miniasm && make)
+# Overlap
+minimap/minimap -Sw5 -L100 -m0 -t8 reads.fq reads.fq | gzip -1 > reads.paf.gz
+# Layout
+miniasm/miniasm -f reads.fq reads.paf.gz > reads.gfa
+```
+
+## Introduction
+
+Miniasm is a very fast OLC-based *de novo* assembler for noisy long reads. It
+takes all-vs-all read self-mappings (typically by [minimap][minimap]) as input
+and outputs an assembly graph in the [GFA][gfa] format. Different from
+mainstream assemblers, miniasm does not have a consensus step. It simply
+concatenates pieces of read sequences to generate the final [unitig][unitig]
+sequences. Thus the per-base error rate is similar to the raw input reads.
+
+So far miniasm is in early development stage. It has only been tested on
+a dozen of PacBio and Oxford Nanopore (ONT) bacterial data sets. Including the
+mapping step, it takes about 3 minutes to assemble a bacterial genome. Under
+the default setting, miniasm assembles 9 out of 12 PacBio datasets and 3 out of
+4 ONT datasets into a single contig. The 12 PacBio data sets are [PacBio E.
+coli sample][PB-151103], [ERS473430][ERS473430], [ERS544009][ERS544009],
+[ERS554120][ERS554120], [ERS605484][ERS605484], [ERS617393][ERS617393],
+[ERS646601][ERS646601], [ERS659581][ERS659581], [ERS670327][ERS670327],
+[ERS685285][ERS685285], [ERS743109][ERS743109] and a [deprecated PacBio E.
+coli data set][PB-deprecated]. ONT data are acquired from the [Loman
+Lab][loman-ont].
+
+For a *C. elegans* [PacBio data set][ce] (only 40X are used, not the whole
+dataset), miniasm finishes the assembly, including reads overlapping, in ~10
+minutes with 16 CPUs. The total assembly size is 105Mb; the N50 is 1.94Mb. In
+comparison, the [HGAP3][hgap] produces a 104Mb assembly with N50 1.61Mb. [This
+dotter plot][ce-img] gives a global view of the miniasm assembly (on the X
+axis) and the HGAP3 assembly (on Y). They are broadly comparable. Of course,
+the HGAP3 consensus sequences are much more accurate. In addition, on the whole
+data set (assembled in ~30 min), the miniasm N50 is reduced to 1.79Mb. Miniasm
+still needs improvements.
+
+Miniasm confirms that at least for high-coverage bacterial genomes, it is
+possible to generate long contigs from raw PacBio or ONT reads without error
+correction. It also shows that [minimap][minimap] can be used as a read
+overlapper, even though it is probably not as sensitive as the more
+sophisticated overlapers such as [MHAP][mhap] and [DALIGNER][daligner].
+Coupled with long-read error correctors and consensus tools, miniasm
+may also be useful to produce high-quality assemblies.
+
+## Algorithm Overview
+
+1. Crude read selection. For each read, find the longest contiguous region
+   covered by three good mappings. Get an approximate estimate of read
+   coverage.
+
+2. Fine read selection. Use the coverage information to find the good regions
+   again but with more stringent thresholds. Discard contained reads.
+
+3. Generate a [string graph][sg]. Prune tips, drop weak overlaps and collapse
+   short bubbles. These procedures are similar to those implemented in
+   short-read assemblers.
+
+4. Merge unambiguous overlaps to produce unitig sequences.
+
+## Limitations
+
+1. Consensus base quality is similar to input reads (may be fixed with a
+   consensus tool).
+
+2. Only tested on a dozen of high-coverage PacBio/ONT data sets (more testing
+   needed).
+
+3. Prone to collapse repeats or segmental duplications longer than input reads
+   (hard to fix without error correction).
+
+
+
+[unitig]: http://wgs-assembler.sourceforge.net/wiki/index.php/Celera_Assembler_Terminology
+[minimap]: https://github.com/lh3/minimap
+[paf]: https://github.com/lh3/miniasm/blob/master/PAF.md
+[gfa]: https://github.com/pmelsted/GFA-spec/blob/master/GFA-spec.md
+[ERS473430]: http://www.ebi.ac.uk/ena/data/view/ERS473430
+[ERS544009]: http://www.ebi.ac.uk/ena/data/view/ERS544009
+[ERS554120]: http://www.ebi.ac.uk/ena/data/view/ERS554120
+[ERS605484]: http://www.ebi.ac.uk/ena/data/view/ERS605484
+[ERS617393]: http://www.ebi.ac.uk/ena/data/view/ERS617393
+[ERS646601]: http://www.ebi.ac.uk/ena/data/view/ERS646601
+[ERS659581]: http://www.ebi.ac.uk/ena/data/view/ERS659581
+[ERS670327]: http://www.ebi.ac.uk/ena/data/view/ERS670327
+[ERS685285]: http://www.ebi.ac.uk/ena/data/view/ERS685285
+[ERS743109]: http://www.ebi.ac.uk/ena/data/view/ERS743109
+[PB-151103]: https://github.com/PacificBiosciences/DevNet/wiki/E.-coli-Bacterial-Assembly
+[PB-deprecated]: https://github.com/PacificBiosciences/DevNet/wiki/E.-coli-20kb-Size-Selected-Library-with-P6-C4/ce0533c1d2a957488594f0b29da61ffa3e4627e8
+[ce]: https://github.com/PacificBiosciences/DevNet/wiki/C.-elegans-data-set
+[mhap]: https://github.com/marbl/MHAP
+[daligner]: https://github.com/thegenemyers/DALIGNER
+[sg]: http://bioinformatics.oxfordjournals.org/content/21/suppl_2/ii79.abstract
+[loman-ont]: http://lab.loman.net/2015/09/24/first-sqk-map-006-experiment/
+[hgap]: https://github.com/PacificBiosciences/Bioinformatics-Training/wiki/HGAP
+[ce-img]: http://lh3lh3.users.sourceforge.net/download/ce-miniasm.png
diff --git a/asg.c b/asg.c
new file mode 100644
index 0000000..497828a
--- /dev/null
+++ b/asg.c
@@ -0,0 +1,433 @@
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+#include "asg.h"
+#include "kvec.h"
+
+#include "ksort.h"
+#define asg_arc_key(a) ((a).ul)
+KRADIX_SORT_INIT(asg, asg_arc_t, asg_arc_key, 8)
+
+asg_t *asg_init(void)
+{
+	return (asg_t*)calloc(1, sizeof(asg_t));
+}
+
+void asg_destroy(asg_t *g)
+{
+	if (g == 0) return;
+	free(g->seq); free(g->idx); free(g->arc); free(g);
+}
+
+void asg_arc_sort(asg_t *g)
+{
+	radix_sort_asg(g->arc, g->arc + g->n_arc);
+}
+
+uint64_t *asg_arc_index_core(size_t max_seq, size_t n, const asg_arc_t *a)
+{
+	size_t i, last;
+	uint64_t *idx;
+	idx = (uint64_t*)calloc(max_seq * 2, 8);
+	for (i = 1, last = 0; i <= n; ++i)
+		if (i == n || a[i-1].ul>>32 != a[i].ul>>32)
+			idx[a[i-1].ul>>32] = (uint64_t)last<<32 | (i - last), last = i;
+	return idx;
+}
+
+void asg_arc_index(asg_t *g)
+{
+	if (g->idx) free(g->idx);
+	g->idx = asg_arc_index_core(g->n_seq, g->n_arc, g->arc);
+}
+
+void asg_seq_set(asg_t *g, int sid, int len, int del)
+{
+	if (sid >= g->m_seq) {
+		g->m_seq = sid + 1;
+		kv_roundup32(g->m_seq);
+		g->seq = (asg_seq_t*)realloc(g->seq, g->m_seq * sizeof(asg_seq_t));
+	}
+	if (sid >= g->n_seq) g->n_seq = sid + 1;
+	g->seq[sid].len = len;
+	g->seq[sid].del = !!del;
+}
+
+// hard remove arcs marked as "del"
+void asg_arc_rm(asg_t *g)
+{
+	uint32_t e, n;
+	for (e = n = 0; e < g->n_arc; ++e) {
+		uint32_t u = g->arc[e].ul>>32, v = g->arc[e].v;
+		if (!g->arc[e].del && !g->seq[u>>1].del && !g->seq[v>>1].del)
+			g->arc[n++] = g->arc[e];
+	}
+	if (n < g->n_arc) { // arc index is out of sync
+		if (g->idx) free(g->idx);
+		g->idx = 0;
+	}
+	g->n_arc = n;
+}
+
+void asg_cleanup(asg_t *g)
+{
+	asg_arc_rm(g);
+	if (!g->is_srt) {
+		asg_arc_sort(g);
+		g->is_srt = 1;
+	}
+	if (g->idx == 0) asg_arc_index(g);
+}
+
+// delete short arcs
+int asg_arc_del_short(asg_t *g, float drop_ratio)
+{
+	uint32_t v, n_vtx = g->n_seq * 2, n_short = 0;
+	for (v = 0; v < n_vtx; ++v) {
+		asg_arc_t *av = asg_arc_a(g, v);
+		uint32_t i, thres, nv = asg_arc_n(g, v);
+		if (nv < 2) continue;
+		thres = (uint32_t)(av[0].ol * drop_ratio + .499);
+		for (i = nv - 1; i >= 1 && av[i].ol < thres; --i);
+		for (i = i + 1; i < nv; ++i)
+			av[i].del = 1, ++n_short;
+	}
+	if (n_short) {
+		asg_cleanup(g);
+		asg_symm(g);
+	}
+	fprintf(stderr, "[M::%s] removed %d short overlaps\n", __func__, n_short);
+	return n_short;
+}
+
+// delete multi-arcs
+int asg_arc_del_multi(asg_t *g)
+{
+	uint32_t *cnt, n_vtx = g->n_seq * 2, n_multi = 0, v;
+	cnt = (uint32_t*)calloc(n_vtx, 4);
+	for (v = 0; v < n_vtx; ++v) {
+		asg_arc_t *av = asg_arc_a(g, v);
+		int32_t i, nv = asg_arc_n(g, v);
+		if (nv < 2) continue;
+		for (i = nv - 1; i >= 0; --i) ++cnt[av[i].v];
+		for (i = nv - 1; i >= 0; --i)
+			if (--cnt[av[i].v] != 0)
+				av[i].del = 1, ++n_multi;
+	}
+	free(cnt);
+	if (n_multi) asg_cleanup(g);
+	fprintf(stderr, "[M::%s] removed %d multi-arcs\n", __func__, n_multi);
+	return n_multi;
+}
+
+// remove asymmetric arcs: u->v is present, but v'->u' not
+int asg_arc_del_asymm(asg_t *g)
+{
+	uint32_t e, n_asymm = 0;
+	for (e = 0; e < g->n_arc; ++e) {
+		uint32_t v = g->arc[e].v^1, u = g->arc[e].ul>>32^1;
+		uint32_t i, nv = asg_arc_n(g, v);
+		asg_arc_t *av = asg_arc_a(g, v);
+		for (i = 0; i < nv; ++i)
+			if (av[i].v == u) break;
+		if (i == nv) g->arc[e].del = 1, ++n_asymm;
+	}
+	if (n_asymm) asg_cleanup(g);
+	fprintf(stderr, "[M::%s] removed %d asymmetric arcs\n", __func__, n_asymm);
+	return n_asymm;
+}
+
+void asg_symm(asg_t *g)
+{
+	asg_arc_del_multi(g);
+	asg_arc_del_asymm(g);
+	g->is_symm = 1;
+}
+
+// transitive reduction; see Myers, 2005
+int asg_arc_del_trans(asg_t *g, int fuzz)
+{
+	uint8_t *mark;
+	uint32_t v, n_vtx = g->n_seq * 2, n_reduced = 0;
+
+	mark = (uint8_t*)calloc(n_vtx, 1);
+	for (v = 0; v < n_vtx; ++v) {
+		uint32_t L, i, nv = asg_arc_n(g, v);
+		asg_arc_t *av = asg_arc_a(g, v);
+		if (nv == 0) continue; // no hits
+		if (g->seq[v>>1].del) {
+			for (i = 0; i < nv; ++i) av[i].del = 1, ++n_reduced;
+			continue;
+		}
+		for (i = 0; i < nv; ++i) mark[av[i].v] = 1;
+		L = asg_arc_len(av[nv-1]) + fuzz;
+		for (i = 0; i < nv; ++i) {
+			uint32_t w = av[i].v;
+			uint32_t j, nw = asg_arc_n(g, w);
+			asg_arc_t *aw = asg_arc_a(g, w);
+			if (mark[av[i].v] != 1) continue;
+			for (j = 0; j < nw && asg_arc_len(aw[j]) + asg_arc_len(av[i]) <= L; ++j)
+				if (mark[aw[j].v]) mark[aw[j].v] = 2;
+		}
+		#if 0
+		for (i = 0; i < nv; ++i) {
+			uint32_t w = av[i].v;
+			uint32_t j, nw = asg_arc_n(g, w);
+			asg_arc_t *aw = asg_arc_a(g, w);
+			for (j = 0; j < nw && (j == 0 || asg_arc_len(aw[j]) < fuzz); ++j)
+				if (mark[aw[j].v]) mark[aw[j].v] = 2;
+		}
+		#endif
+		for (i = 0; i < nv; ++i) {
+			if (mark[av[i].v] == 2) av[i].del = 1, ++n_reduced;
+			mark[av[i].v] = 0;
+		}
+	}
+	free(mark);
+	fprintf(stderr, "[M::%s] transitively reduced %d arcs\n", __func__, n_reduced);
+	if (n_reduced) {
+		asg_cleanup(g);
+		asg_symm(g);
+	}
+	return n_reduced;
+}
+
+/**********************************
+ * Filter short potential unitigs *
+ **********************************/
+
+#define ASG_ET_MERGEABLE 0
+#define ASG_ET_TIP       1
+#define ASG_ET_MULTI_OUT 2
+#define ASG_ET_MULTI_NEI 3
+
+static inline int asg_is_utg_end(const asg_t *g, uint32_t v, uint64_t *lw)
+{
+	uint32_t w, nv, nw, nw0, nv0 = asg_arc_n(g, v^1);
+	int i, i0 = -1;
+	asg_arc_t *aw, *av = asg_arc_a(g, v^1);
+	for (i = nv = 0; i < nv0; ++i)
+		if (!av[i].del) i0 = i, ++nv;
+	if (nv == 0) return ASG_ET_TIP; // tip
+	if (nv > 1) return ASG_ET_MULTI_OUT; // multiple outgoing arcs
+	if (lw) *lw = av[i0].ul<<32 | av[i0].v;
+	w = av[i0].v ^ 1;
+	nw0 = asg_arc_n(g, w);
+	aw = asg_arc_a(g, w);
+	for (i = nw = 0; i < nw0; ++i)
+		if (!aw[i].del) ++nw;
+	if (nw != 1) return ASG_ET_MULTI_NEI;
+	return ASG_ET_MERGEABLE;
+}
+
+int asg_extend(const asg_t *g, uint32_t v, int max_ext, asg64_v *a)
+{
+	int ret;
+	uint64_t lw;
+	a->n = 0;
+	kv_push(uint64_t, *a, v);
+	do {
+		ret = asg_is_utg_end(g, v^1, &lw);
+		if (ret != 0) break;
+		kv_push(uint64_t, *a, lw);
+		v = (uint32_t)lw;
+	} while (--max_ext > 0);
+	return ret;
+}
+
+int asg_cut_tip(asg_t *g, int max_ext)
+{
+	asg64_v a = {0,0,0};
+	uint32_t n_vtx = g->n_seq * 2, v, i, cnt = 0;
+	for (v = 0; v < n_vtx; ++v) {
+		if (g->seq[v>>1].del) continue;
+		if (asg_is_utg_end(g, v, 0) != ASG_ET_TIP) continue; // not a tip
+		if (asg_extend(g, v, max_ext, &a) == ASG_ET_MERGEABLE) continue; // not a short unitig
+		for (i = 0; i < a.n; ++i)
+			asg_seq_del(g, (uint32_t)a.a[i]>>1);
+		++cnt;
+	}
+	free(a.a);
+	if (cnt > 0) asg_cleanup(g);
+	fprintf(stderr, "[M::%s] cut %d tips\n", __func__, cnt);
+	return cnt;
+}
+
+int asg_cut_internal(asg_t *g, int max_ext)
+{
+	asg64_v a = {0,0,0};
+	uint32_t n_vtx = g->n_seq * 2, v, i, cnt = 0;
+	for (v = 0; v < n_vtx; ++v) {
+		if (g->seq[v>>1].del) continue;
+		if (asg_is_utg_end(g, v, 0) != ASG_ET_MULTI_NEI) continue;
+		if (asg_extend(g, v, max_ext, &a) != ASG_ET_MULTI_NEI) continue;
+		for (i = 0; i < a.n; ++i)
+			asg_seq_del(g, (uint32_t)a.a[i]>>1);
+		++cnt;
+	}
+	free(a.a);
+	if (cnt > 0) asg_cleanup(g);
+	fprintf(stderr, "[M::%s] cut %d internal sequences\n", __func__, cnt);
+	return cnt;
+}
+
+int asg_cut_biloop(asg_t *g, int max_ext)
+{
+	asg64_v a = {0,0,0};
+	uint32_t n_vtx = g->n_seq * 2, v, i, cnt = 0;
+	for (v = 0; v < n_vtx; ++v) {
+		uint32_t nv, nw, w = UINT32_MAX, x, ov = 0, ox = 0;
+		asg_arc_t *av, *aw;
+		if (g->seq[v>>1].del) continue;
+		if (asg_is_utg_end(g, v, 0) != ASG_ET_MULTI_NEI) continue;
+		if (asg_extend(g, v, max_ext, &a) != ASG_ET_MULTI_OUT) continue;
+		x = (uint32_t)a.a[a.n - 1] ^ 1;
+		nv = asg_arc_n(g, v ^ 1), av = asg_arc_a(g, v ^ 1);
+		for (i = 0; i < nv; ++i)
+			if (!av[i].del) w = av[i].v ^ 1;
+		assert(w != UINT32_MAX);
+		nw = asg_arc_n(g, w), aw = asg_arc_a(g, w);
+		for (i = 0; i < nw; ++i) { // we are looking for: v->...->x', w->v and w->x
+			if (aw[i].del) continue;
+			if (aw[i].v == x) ox = aw[i].ol;
+			if (aw[i].v == v) ov = aw[i].ol;
+		}
+		if (ov == 0 && ox == 0) continue;
+		if (ov > ox) {
+			asg_arc_del(g, w, x, 1);
+			asg_arc_del(g, x^1, w^1, 1);
+			++cnt;
+		}
+	}
+	free(a.a);
+	if (cnt > 0) asg_cleanup(g);
+	fprintf(stderr, "[M::%s] cut %d small bi-loops\n", __func__, cnt);
+	return cnt;
+}
+
+/******************
+ * Bubble popping *
+ ******************/
+
+typedef struct {
+	uint32_t p; // the optimal parent vertex
+	uint32_t d; // the shortest distance from the initial vertex
+	uint32_t c; // max count of reads
+	uint32_t r:31, s:1; // r: the number of remaining incoming arc; s: state
+} binfo_t;
+
+typedef struct {
+	binfo_t *a;
+	kvec_t(uint32_t) S; // set of vertices without parents
+	kvec_t(uint32_t) T; // set of tips
+	kvec_t(uint32_t) b; // visited vertices
+	kvec_t(uint32_t) e; // visited edges/arcs
+} buf_t;
+
+// count the number of outgoing arcs, excluding reduced arcs
+static inline int count_out(const asg_t *g, uint32_t v)
+{
+	uint32_t i, n, nv = asg_arc_n(g, v);
+	const asg_arc_t *av = asg_arc_a(g, v);
+	for (i = n = 0; i < nv; ++i)
+		if (!av[i].del) ++n;
+	return n;
+}
+
+// in a resolved bubble, mark unused vertices and arcs as "reduced"
+static void asg_bub_backtrack(asg_t *g, uint32_t v0, buf_t *b)
+{
+	uint32_t i, v;
+	assert(b->S.n == 1);
+	for (i = 0; i < b->b.n; ++i)
+		g->seq[b->b.a[i]>>1].del = 1;
+	for (i = 0; i < b->e.n; ++i) {
+		asg_arc_t *a = &g->arc[b->e.a[i]];
+		a->del = 1;
+		asg_arc_del(g, a->v^1, a->ul>>32^1, 1);
+	}
+	v = b->S.a[0];
+	do {
+		uint32_t u = b->a[v].p; // u->v
+		g->seq[v>>1].del = 0;
+		asg_arc_del(g, u, v, 0);
+		asg_arc_del(g, v^1, u^1, 0);
+		v = u;
+	} while (v != v0);
+}
+
+// pop bubbles from vertex v0; the graph MJUST BE symmetric: if u->v present, v'->u' must be present as well
+static uint64_t asg_bub_pop1(asg_t *g, uint32_t v0, int max_dist, buf_t *b)
+{
+	uint32_t i, n_pending = 0;
+	uint64_t n_pop = 0;
+	if (g->seq[v0>>1].del) return 0; // already deleted
+	if ((uint32_t)g->idx[v0] < 2) return 0; // no bubbles
+	b->S.n = b->T.n = b->b.n = b->e.n = 0;
+	b->a[v0].c = b->a[v0].d = 0;
+	kv_push(uint32_t, b->S, v0);
+	do {
+		uint32_t v = kv_pop(b->S), d = b->a[v].d, c = b->a[v].c;
+		uint32_t nv = asg_arc_n(g, v);
+		asg_arc_t *av = asg_arc_a(g, v);
+		assert(nv > 0);
+		for (i = 0; i < nv; ++i) { // loop through v's neighbors
+			uint32_t w = av[i].v, l = (uint32_t)av[i].ul; // u->w with length l
+			binfo_t *t = &b->a[w];
+			if (w == v0) goto pop_reset;
+			if (av[i].del) continue;
+			kv_push(uint32_t, b->e, (g->idx[v]>>32) + i);
+			if (d + l > max_dist) break; // too far
+			if (t->s == 0) { // this vertex has never been visited
+				kv_push(uint32_t, b->b, w); // save it for revert
+				t->p = v, t->s = 1, t->d = d + l;
+				t->r = count_out(g, w^1);
+				++n_pending;
+			} else { // visited before
+				if (c + 1 > t->c || (c + 1 == t->c && d + l > t->d)) t->p = v;
+				if (c + 1 > t->c) t->c = c + 1;
+				if (d + l < t->d) t->d = d + l; // update dist
+			}
+			assert(t->r > 0);
+			if (--(t->r) == 0) {
+				uint32_t x = asg_arc_n(g, w);
+				if (x) kv_push(uint32_t, b->S, w);
+				else kv_push(uint32_t, b->T, w); // a tip
+				--n_pending;
+			}
+		}
+		if (i < nv || b->S.n == 0) goto pop_reset;
+	} while (b->S.n > 1 || n_pending);
+	asg_bub_backtrack(g, v0, b);
+	n_pop = 1 | (uint64_t)b->T.n<<32;
+pop_reset:
+	for (i = 0; i < b->b.n; ++i) { // clear the states of visited vertices
+		binfo_t *t = &b->a[b->b.a[i]];
+		t->s = t->c = t->d = 0;
+	}
+	return n_pop;
+}
+
+// pop bubbles
+int asg_pop_bubble(asg_t *g, int max_dist)
+{
+	uint32_t v, n_vtx = g->n_seq * 2;
+	uint64_t n_pop = 0;
+	buf_t b;
+	if (!g->is_symm) asg_symm(g);
+	memset(&b, 0, sizeof(buf_t));
+	b.a = (binfo_t*)calloc(n_vtx, sizeof(binfo_t));
+	for (v = 0; v < n_vtx; ++v) {
+		uint32_t i, n_arc = 0, nv = asg_arc_n(g, v);
+		asg_arc_t *av = asg_arc_a(g, v);
+		if (nv < 2 || g->seq[v>>1].del) continue;
+		for (i = 0; i < nv; ++i) // asg_bub_pop1() may delete some edges/arcs
+			if (!av[i].del) ++n_arc;
+		if (n_arc > 1)
+			n_pop += asg_bub_pop1(g, v, max_dist, &b);
+	}
+	free(b.a); free(b.S.a); free(b.T.a); free(b.b.a); free(b.e.a);
+	if (n_pop) asg_cleanup(g);
+	fprintf(stderr, "[M::%s] popped %d bubbles and trimmed %d tips\n", __func__, (uint32_t)n_pop, (uint32_t)(n_pop>>32));
+	return n_pop;
+}
diff --git a/asg.h b/asg.h
new file mode 100644
index 0000000..942cea0
--- /dev/null
+++ b/asg.h
@@ -0,0 +1,79 @@
+#ifndef ASG_H
+#define ASG_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef struct {
+	uint64_t ul;
+	uint32_t v;
+	uint32_t ol:31, del:1;
+} asg_arc_t;
+
+typedef struct {
+	uint32_t len:31, del:1;
+} asg_seq_t;
+
+typedef struct {
+	uint32_t m_arc, n_arc:31, is_srt:1;
+	asg_arc_t *arc;
+	uint32_t m_seq, n_seq:31, is_symm:1;
+	asg_seq_t *seq;
+	uint64_t *idx;
+} asg_t;
+
+typedef struct { size_t n, m; uint64_t *a; } asg64_v;
+
+#define asg_arc_len(arc) ((uint32_t)(arc).ul)
+#define asg_arc_n(g, v) ((uint32_t)(g)->idx[(v)])
+#define asg_arc_a(g, v) (&(g)->arc[(g)->idx[(v)]>>32])
+
+asg_t *asg_init(void);
+void asg_destroy(asg_t *g);
+void asg_seq_set(asg_t *g, int sid, int len, int del);
+void asg_symm(asg_t *g);
+void asg_cleanup(asg_t *g);
+
+int asg_arc_del_short(asg_t *g, float drop_ratio);
+int asg_arc_del_trans(asg_t *g, int fuzz);
+int asg_cut_tip(asg_t *g, int max_ext);
+int asg_cut_internal(asg_t *g, int max_ext);
+int asg_cut_biloop(asg_t *g, int max_ext);
+int asg_pop_bubble(asg_t *g, int max_dist);
+
+// append an arc
+static inline asg_arc_t *asg_arc_pushp(asg_t *g)
+{
+	if (g->n_arc == g->m_arc) {
+		g->m_arc = g->m_arc? g->m_arc<<1 : 16;
+		g->arc = (asg_arc_t*)realloc(g->arc, g->m_arc * sizeof(asg_arc_t));
+	}
+	return &g->arc[g->n_arc++];
+}
+
+// set asg_arc_t::del for v->w
+static inline void asg_arc_del(asg_t *g, uint32_t v, uint32_t w, int del)
+{
+	uint32_t i, nv = asg_arc_n(g, v);
+	asg_arc_t *av = asg_arc_a(g, v);
+	for (i = 0; i < nv; ++i)
+		if (av[i].v == w) av[i].del = !!del;
+}
+
+// set asg_arc_t::del and asg_seq_t::del to 1 for sequence s and all its associated arcs
+static inline void asg_seq_del(asg_t *g, uint32_t s)
+{
+	uint32_t k;
+	g->seq[s].del = 1;
+	for (k = 0; k < 2; ++k) {
+		uint32_t i, v = s<<1 | k;
+		uint32_t nv = asg_arc_n(g, v);
+		asg_arc_t *av = asg_arc_a(g, v);
+		for (i = 0; i < nv; ++i) {
+			av[i].del = 1;
+			asg_arc_del(g, av[i].v^1, v^1, 1);
+		}
+	}
+}
+
+#endif
diff --git a/asm.c b/asm.c
new file mode 100644
index 0000000..0282fae
--- /dev/null
+++ b/asm.c
@@ -0,0 +1,286 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+#include "miniasm.h"
+#include "kvec.h"
+
+asg_t *ma_sg_gen(const ma_opt_t *opt, const sdict_t *d, const ma_sub_t *sub, size_t n_hits, const ma_hit_t *hit)
+{
+	size_t i;
+	asg_t *g;
+	g = asg_init();
+	for (i = 0; i < d->n_seq; ++i) {
+		if (sub) asg_seq_set(g, i, sub[i].e - sub[i].s, (sub[i].del || d->seq[i].del));
+		else asg_seq_set(g, i, d->seq[i].len, d->seq[i].del);
+	}
+	for (i = 0; i < n_hits; ++i) {
+		int r;
+		asg_arc_t t, *p;
+		const ma_hit_t *h = &hit[i];
+		uint32_t qn = h->qns>>32;
+		int ql = sub? sub[qn].e - sub[qn].s : d->seq[qn].len;
+		int tl = sub? sub[h->tn].e - sub[h->tn].s : d->seq[h->tn].len;
+		r = ma_hit2arc(h, ql, tl, opt->max_hang, opt->int_frac, opt->min_ovlp, &t);
+		if (r >= 0) {
+			if (qn == h->tn) { // self match
+				if ((uint32_t)h->qns == h->ts && h->qe == h->te && h->rev) // PacBio-specific artifact (TODO: is this right when we skip target containment above?)
+					g->seq[qn].del = 1;
+				continue;
+			}
+			p = asg_arc_pushp(g);
+			*p = t;
+		} else if (r == MA_HT_QCONT) g->seq[qn].del = 1;
+	}
+	asg_cleanup(g);
+	fprintf(stderr, "[M::%s] read %d arcs\n", __func__, g->n_arc);
+	return g;
+}
+
+void ma_sg_print(const asg_t *g, const sdict_t *d, const ma_sub_t *sub, FILE *fp)
+{
+	uint32_t i;
+	for (i = 0; i < g->n_arc; ++i) {
+		const asg_arc_t *p = &g->arc[i];
+		if (sub) {
+			const ma_sub_t *sq = &sub[p->ul>>33], *st = &sub[p->v>>1];
+			fprintf(fp, "L\t%s:%d-%d\t%c\t%s:%d-%d\t%c\t%dM\tSD:i:%d\n", d->seq[p->ul>>33].name, sq->s + 1, sq->e, "+-"[p->ul>>32&1],
+					d->seq[p->v>>1].name, st->s + 1, st->e, "+-"[p->v&1], p->ol, (uint32_t)p->ul);
+		} else {
+			fprintf(fp, "L\t%s\t%c\t%s\t%c\t%dM\tSD:i:%d\n", d->seq[p->ul>>33].name, "+-"[p->ul>>32&1],
+					d->seq[p->v>>1].name, "+-"[p->v&1], p->ol, (uint32_t)p->ul);
+		}
+	}
+}
+
+/*********************
+ * Unitig generation *
+ *********************/
+
+#include "kdq.h"
+KDQ_INIT(uint64_t)
+
+void ma_ug_destroy(ma_ug_t *ug)
+{
+	uint32_t i;
+	if (ug == 0) return;
+	for (i = 0; i < ug->u.n; ++i) {
+		free(ug->u.a[i].a);
+		free(ug->u.a[i].s);
+	}
+	free(ug->u.a);
+	asg_destroy(ug->g);
+	free(ug);
+}
+
+void ma_ug_print(const ma_ug_t *ug, const sdict_t *d, const ma_sub_t *sub, FILE *fp)
+{
+	uint32_t i, j, l;
+	char name[32];
+	for (i = 0; i < ug->u.n; ++i) { // the Segment lines in GFA
+		ma_utg_t *p = &ug->u.a[i];
+		sprintf(name, "utg%.6d%c", i + 1, "lc"[p->circ]);
+		fprintf(fp, "S\t%s\t%s\tLN:i:%d\n", name, p->s? p->s : "*", p->len);
+		for (j = l = 0; j < p->n; l += (uint32_t)p->a[j++]) {
+			uint32_t x = p->a[j]>>33;
+			if (sub) fprintf(fp, "a\t%s\t%d\t%s:%d-%d\t%c\t%d\n", name, l, d->seq[x].name, sub[x].s + 1, sub[x].e, "+-"[p->a[j]>>32&1], (uint32_t)p->a[j]);
+			else fprintf(fp, "a\t%s\t%d\t%s\t%c\t%d\n", name, l, d->seq[x].name, "+-"[p->a[j]>>32&1], (uint32_t)p->a[j]);
+		}
+	}
+	for (i = 0; i < ug->g->n_arc; ++i) { // the Link lines in GFA
+		uint32_t u = ug->g->arc[i].ul>>32, v = ug->g->arc[i].v;
+		fprintf(fp, "L\tutg%.6d%c\t%c\tutg%.6d%c\t%c\t%dM\tSD:i:%d\n", (u>>1)+1, "lc"[ug->u.a[u>>1].circ], "+-"[u&1],
+				(v>>1)+1, "lc"[ug->u.a[v>>1].circ], "+-"[v&1], ug->g->arc[i].ol, asg_arc_len(ug->g->arc[i]));
+	}
+	for (i = 0; i < ug->u.n; ++i) { // summary of unitigs
+		uint32_t cnt[2];
+		ma_utg_t *u = &ug->u.a[i];
+		if (u->start == UINT32_MAX) {
+			fprintf(fp, "x\tutg%.6dc\t%d\t%d\n", i + 1, u->len, u->n);
+		} else {
+			for (j = 0; j < 2; ++j) cnt[j] = asg_arc_n(ug->g, i<<1|j);
+			if (sub)
+				fprintf(fp, "x\tutg%.6dl\t%d\t%d\t%d\t%d\t%s:%d-%d\t%c\t%s:%d-%d\t%c\n", i + 1, u->len, u->n, cnt[1], cnt[0],
+						d->seq[u->start>>1].name, sub[u->start>>1].s + 1, sub[u->start>>1].e, "+-"[u->start&1],
+						d->seq[u->end>>1].name, sub[u->end>>1].s + 1, sub[u->end>>1].e, "+-"[u->end&1]);
+			else
+				fprintf(fp, "x\tutg%.6dl\t%d\t%d\t%d\t%d\t%s\t%c\t%s\t%c\n", i + 1, u->len, u->n, cnt[1], cnt[0],
+						d->seq[u->start>>1].name, "+-"[u->start&1], d->seq[u->end>>1].name, "+-"[u->end&1]);
+		}
+	}
+}
+
+#define arc_cnt(g, v) ((uint32_t)(g)->idx[(v)])
+#define arc_first(g, v) ((g)->arc[(g)->idx[(v)]>>32])
+
+ma_ug_t *ma_ug_gen(asg_t *g)
+{
+	int32_t *mark;
+	uint32_t i, v, n_vtx = g->n_seq * 2;
+	kdq_t(uint64_t) *q;
+	ma_ug_t *ug;
+
+	ug = (ma_ug_t*)calloc(1, sizeof(ma_ug_t));
+	ug->g = asg_init();
+	mark = (int32_t*)calloc(n_vtx, 4);
+
+	q = kdq_init(uint64_t);
+	for (v = 0; v < n_vtx; ++v) {
+		uint32_t w, x, l, start, end, len;
+		ma_utg_t *p;
+		if (g->seq[v>>1].del || arc_cnt(g, v) == 0 || mark[v]) continue;
+		mark[v] = 1;
+		q->count = 0, start = v, end = v^1, len = 0;
+		// forward
+		w = v;
+		while (1) {
+			if (arc_cnt(g, w) != 1) break;
+			x = arc_first(g, w).v; // w->x
+			if (arc_cnt(g, x^1) != 1) break;
+			mark[x] = mark[w^1] = 1;
+			l = asg_arc_len(arc_first(g, w));
+			kdq_push(uint64_t, q, (uint64_t)w<<32 | l);
+			end = x^1, len += l;
+			w = x;
+			if (x == v) break;
+		}
+		if (start != (end^1) || kdq_size(q) == 0) { // linear unitig
+			l = g->seq[end>>1].len;
+			kdq_push(uint64_t, q, (uint64_t)(end^1)<<32 | l);
+			len += l;
+		} else { // circular unitig
+			start = end = UINT32_MAX;
+			goto add_unitig; // then it is not necessary to do the backward
+		}
+		// backward
+		x = v;
+		while (1) { // similar to forward but not the same
+			if (arc_cnt(g, x^1) != 1) break;
+			w = arc_first(g, x^1).v ^ 1; // w->x
+			if (arc_cnt(g, w) != 1) break;
+			mark[x] = mark[w^1] = 1;
+			l = asg_arc_len(arc_first(g, w));
+			kdq_unshift(uint64_t, q, (uint64_t)w<<32 | l);
+			start = w, len += l;
+			x = w;
+		}
+add_unitig:
+		if (start != UINT32_MAX) mark[start] = mark[end] = 1;
+		kv_pushp(ma_utg_t, ug->u, &p);
+		p->s = 0, p->start = start, p->end = end, p->len = len, p->n = kdq_size(q), p->circ = (start == UINT32_MAX);
+		p->m = p->n;
+		kv_roundup32(p->m);
+		p->a = (uint64_t*)malloc(8 * p->m);
+		for (i = 0; i < kdq_size(q); ++i)
+			p->a[i] = kdq_at(q, i);
+	}
+	kdq_destroy(uint64_t, q);
+
+	// add arcs between unitigs; reusing mark for a different purpose
+	for (v = 0; v < n_vtx; ++v) mark[v] = -1;
+	for (i = 0; i < ug->u.n; ++i) {
+		if (ug->u.a[i].circ) continue;
+		mark[ug->u.a[i].start] = i<<1 | 0;
+		mark[ug->u.a[i].end] = i<<1 | 1;
+	}
+	for (i = 0; i < g->n_arc; ++i) {
+		asg_arc_t *p = &g->arc[i];
+		if (p->del) continue;
+		if (mark[p->ul>>32^1] >= 0 && mark[p->v] >= 0) {
+			asg_arc_t *q;
+			uint32_t u = mark[p->ul>>32^1]^1;
+			int l = ug->u.a[u>>1].len - p->ol;
+			if (l < 0) l = 1;
+			q = asg_arc_pushp(ug->g);
+			q->ol = p->ol, q->del = 0;
+			q->ul = (uint64_t)u<<32 | l;
+			q->v = mark[p->v];
+		}
+	}
+	for (i = 0; i < ug->u.n; ++i)
+		asg_seq_set(ug->g, i, ug->u.a[i].len, 0);
+	asg_cleanup(ug->g);
+	free(mark);
+	return ug;
+}
+
+/*******************
+ * Unitig sequence *
+ *******************/
+
+#include <zlib.h>
+#include "kseq.h"
+KSEQ_INIT(gzFile, gzread)
+
+typedef struct {
+	uint32_t utg:31, ori:1, start, len;
+} utg_intv_t;
+
+static char comp_tab[] = { // complement base
+	  0,   1,	2,	 3,	  4,   5,	6,	 7,	  8,   9,  10,	11,	 12,  13,  14,	15,
+	 16,  17,  18,	19,	 20,  21,  22,	23,	 24,  25,  26,	27,	 28,  29,  30,	31,
+	 32,  33,  34,	35,	 36,  37,  38,	39,	 40,  41,  42,	43,	 44,  45,  46,	47,
+	 48,  49,  50,	51,	 52,  53,  54,	55,	 56,  57,  58,	59,	 60,  61,  62,	63,
+	 64, 'T', 'V', 'G', 'H', 'E', 'F', 'C', 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O',
+	'P', 'Q', 'Y', 'S', 'A', 'A', 'B', 'W', 'X', 'R', 'Z',	91,	 92,  93,  94,	95,
+	 64, 't', 'v', 'g', 'h', 'e', 'f', 'c', 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o',
+	'p', 'q', 'y', 's', 'a', 'a', 'b', 'w', 'x', 'r', 'z', 123, 124, 125, 126, 127
+};
+
+// generate unitig sequences
+int ma_ug_seq(ma_ug_t *g, const sdict_t *d, const ma_sub_t *sub, const char *fn)
+{
+	gzFile fp;
+	kseq_t *ks;
+	utg_intv_t *tmp;
+	uint32_t i, j;
+
+	fp = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) return -1;
+	ks = kseq_init(fp);
+
+	tmp = (utg_intv_t*)calloc(d->n_seq, sizeof(utg_intv_t));
+	for (i = 0; i < g->u.n; ++i) {
+		ma_utg_t *u = &g->u.a[i];
+		uint32_t l = 0;
+		u->s = (char*)calloc(1, u->len + 1);
+		memset(u->s, 'N', u->len);
+		for (j = 0; j < u->n; ++j) {
+			utg_intv_t *t = &tmp[u->a[j]>>33];
+			assert(t->len == 0);
+			t->utg = i, t->ori = u->a[j]>>32&1;
+			t->start = l, t->len = (uint32_t)u->a[j];
+			l += t->len;
+		}
+	}
+
+	while (kseq_read(ks) >= 0) {
+		int32_t id;
+		utg_intv_t *t;
+		ma_utg_t *u;
+		id = sd_get(d, ks->name.s);
+		if (id < 0 || tmp[id].len == 0) continue;
+		t = &tmp[id];
+		u = &g->u.a[t->utg];
+		if (sub) {
+			assert(sub[id].e - sub[id].s <= ks->seq.l);
+			memmove(ks->seq.s, ks->seq.s + sub[id].s, sub[id].e - sub[id].s);
+			ks->seq.l = sub[id].e - sub[id].s;
+		}
+		if (!t->ori) { // forward strand
+			for (i = 0; i < t->len; ++i)
+				u->s[t->start + i] = ks->seq.s[i];
+		} else {
+			for (i = 0; i < t->len; ++i) {
+				int c = (uint8_t)ks->seq.s[ks->seq.l - 1 - i];
+				u->s[t->start + i] = c >= 128? 'N' : comp_tab[c];
+			}
+		}
+	}
+	free(tmp);
+
+	kseq_destroy(ks);
+	gzclose(fp);
+	return 0;
+}
diff --git a/common.c b/common.c
new file mode 100644
index 0000000..d52224e
--- /dev/null
+++ b/common.c
@@ -0,0 +1,24 @@
+#include "miniasm.h"
+
+int ma_verbose = 3;
+
+void ma_opt_init(ma_opt_t *opt)
+{
+	opt->min_span = 2000;
+	opt->min_match = 100;
+	opt->min_dp = 3;
+	opt->min_iden = .05;
+	opt->cov_ratio = 0.;
+
+	opt->max_hang = 1000;
+	opt->min_ovlp = opt->min_span;
+	opt->int_frac = .8;
+
+	opt->gap_fuzz = 1000;
+	opt->n_rounds = 2;
+	opt->bub_dist = 50000;
+	opt->max_ext = 4;
+	opt->min_ovlp_drop_ratio = .5;
+	opt->max_ovlp_drop_ratio = .7;
+	opt->final_ovlp_drop_ratio = .8;
+}
diff --git a/dotter.c b/dotter.c
new file mode 100644
index 0000000..1dc10e2
--- /dev/null
+++ b/dotter.c
@@ -0,0 +1,197 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <math.h>
+#include "paf.h"
+#include "sdict.h"
+#include "kvec.h"
+#include "eps.h"
+
+typedef struct {
+	uint32_t qn, qs, qe;
+	uint32_t tn, ts, te;
+	uint32_t ml;
+} dt_hit_t;
+
+typedef struct {
+	const char *name;
+	double tot;
+	uint64_t w;
+	uint32_t i;
+} srtaux_t;
+
+static inline int mixed_numcompare(const char *_a, const char *_b)
+{
+	const unsigned char *a = (const unsigned char*)_a, *b = (const unsigned char*)_b;
+	const unsigned char *pa = a, *pb = b;
+	while (*pa && *pb) {
+		if (isdigit(*pa) && isdigit(*pb)) {
+			while (*pa == '0') ++pa;
+			while (*pb == '0') ++pb;
+			while (isdigit(*pa) && isdigit(*pb) && *pa == *pb) ++pa, ++pb;
+			if (isdigit(*pa) && isdigit(*pb)) {
+				int i = 0;
+				while (isdigit(pa[i]) && isdigit(pb[i])) ++i;
+				return isdigit(pa[i])? 1 : isdigit(pb[i])? -1 : (int)*pa - (int)*pb;
+			} else if (isdigit(*pa)) return 1;
+			else if (isdigit(*pb)) return -1;
+			else if (pa - a != pb - b) return pa - a < pb - b? 1 : -1;
+		} else {
+			if (*pa != *pb) return (int)*pa - (int)*pb;
+			++pa; ++pb;
+		}
+	}
+	return *pa? 1 : *pb? -1 : 0;
+}
+
+#include "ksort.h"
+#define srtx_lt(a, b) (mixed_numcompare((a).name, (b).name) < 0)
+KSORT_INIT(dtx, srtaux_t, srtx_lt)
+#define srty_lt(a, b) ((a).tot < (b).tot)
+KSORT_INIT(dty, srtaux_t, srty_lt)
+
+int main(int argc, char *argv[])
+{
+	int min_span = 1000, min_match = 100, width = 600, height, diagonal = 1;
+	int color[2] = { 0xFF0000, 0x0080FF }, font_size = 11, no_label = 0;
+	float min_iden = .1;
+	paf_file_t *f;
+	sdict_t *d[2];
+	paf_rec_t r;
+	int32_t c, i, j;
+	uint64_t *acclen[2], totlen[2];
+	srtaux_t *a[2];
+	kvec_t(dt_hit_t) h = {0,0,0};
+	double sx, sy;
+
+	while ((c = getopt(argc, argv, "m:i:s:w:f:Ld")) >= 0) {
+		if (c == 'm') min_match = atoi(optarg);
+		else if (c == 'i') min_iden = atof(optarg);
+		else if (c == 's') min_span = atoi(optarg);
+		else if (c == 'w') width = atoi(optarg);
+		else if (c == 'f') font_size = atoi(optarg);
+		else if (c == 'L') no_label = 1;
+		else if (c == 'd') diagonal = 0;
+	}
+	if (argc == optind) {
+		fprintf(stderr, "Usage: minidot [options] <in.paf>\n");
+		fprintf(stderr, "Options:\n");
+		fprintf(stderr, "  -m INT      min match length [%d]\n", min_match);
+		fprintf(stderr, "  -i FLOAT    min identity [%.2f]\n", min_iden);
+		fprintf(stderr, "  -s INT      min span [%d]\n", min_span);
+		fprintf(stderr, "  -w INT      image width [%d]\n", width);
+		fprintf(stderr, "  -f INT      font size [%d]\n", font_size);
+		fprintf(stderr, "  -L          don't print labels\n");
+		fprintf(stderr, "  -D          don't try to put hits onto the diagonal\n");
+		return 1;
+	}
+
+	d[0] = sd_init();
+	d[1] = sd_init();
+
+	f = paf_open(argv[optind]);
+	while (paf_read(f, &r) >= 0) {
+		dt_hit_t *s;
+		if (r.qe - r.qs < min_span || r.te - r.ts < min_span || r.ml < min_match) continue;
+		if (r.ml < r.bl * min_iden) continue;
+		kv_pushp(dt_hit_t, h, &s);
+		s->qn = sd_put(d[0], r.qn, r.ql), s->qs = r.qs, s->qe = r.qe;
+		s->tn = sd_put(d[1], r.tn, r.tl);
+		s->ts = r.rev? r.te : r.ts, s->te = r.rev? r.ts : r.te;
+		s->ml = r.ml;
+	}
+	paf_close(f);
+
+	for (i = 0; i < 2; ++i) {
+		uint32_t n = d[i]->n_seq;
+		uint64_t l = 0;
+		a[i] = (srtaux_t*)calloc(n + 1, sizeof(srtaux_t));
+		if (i == 0 || !diagonal) {
+			for (j = 0; j < n; ++j)
+				a[i][j].name = d[i]->seq[j].name, a[i][j].i = j;
+			ks_introsort_dtx(n, a[i]);
+		} else {
+			srtaux_t *b = a[i];
+			uint32_t *inv;
+			inv = (uint32_t*)calloc(d[0]->n_seq, 4);
+			for (j = 0; j < d[0]->n_seq; ++j)
+				inv[a[0][j].i] = j;
+			for (j = 0; j < n; ++j)
+				b[j].name = d[i]->seq[j].name, b[j].tot = b[j].w = 0, b[j].i = j;
+			for (j = 0; j < h.n; ++j) {
+				uint64_t w, coor;
+				dt_hit_t *p = &h.a[j];
+				srtaux_t *q = &b[p->tn];
+				coor = acclen[0][inv[p->qn]] + (p->qs + p->qe) / 2;
+				w = (uint64_t)(.01 * p->ml * p->ml + .499);
+				q->tot += (double)coor * w;
+				q->w += w;
+			}
+			free(inv);
+			for (j = 0; j < n; ++j) b[j].tot /= b[j].w;
+			ks_introsort_dty(n, b);
+		}
+		acclen[i] = (uint64_t*)calloc(n, 8);
+		for (j = 0; j < n; ++j)
+			acclen[i][a[i][j].i] = l, l += d[i]->seq[a[i][j].i].len;
+		totlen[i] = l;
+	}
+	height = (int)((double)width / totlen[0] * totlen[1] + .499);
+	sx = (double)width / totlen[0];
+	sy = (double)height / totlen[1];
+
+	eps_header(stdout, width, height, .2);
+	eps_font(stdout, "Helvetica-Narrow", font_size);
+	eps_gray(stdout, .8);
+
+	if (!no_label) {
+		// write x labels
+		for (i = 0; i < d[0]->n_seq; ++i)
+			eps_Mstr(stdout, (acclen[0][a[0][i].i] + .5 * d[0]->seq[a[0][i].i].len) * sx, font_size*.5, a[0][i].name);
+		eps_stroke(stdout);
+		fprintf(stdout, "gsave %g 0 translate 90 rotate\n", font_size*1.25);
+		// write y labels
+		for (i = 0; i < d[1]->n_seq; ++i)
+			eps_Mstr(stdout, (acclen[1][a[1][i].i] + .5 * d[1]->seq[a[1][i].i].len) * sx, 0, a[1][i].name);
+		fprintf(stdout, "grestore\n");
+		eps_stroke(stdout);
+	}
+
+	// write grid lines
+	eps_linewidth(stdout, .1);
+	for (i = 0; i < d[1]->n_seq; ++i)
+		eps_linex(stdout, 1, width, i == 0? 1 : acclen[1][a[1][i].i] * sy);
+	eps_linex(stdout, 1, width, totlen[1] * sy);
+	for (i = 0; i < d[0]->n_seq; ++i)
+		eps_liney(stdout, 1, height, i == 0? 1 : acclen[0][a[0][i].i] * sx);
+	eps_liney(stdout, 1, height, totlen[0] * sx);
+	eps_stroke(stdout);
+
+	// write hits
+	eps_linewidth(stdout, .1);
+	for (j = 0; j < 2; ++j) {
+		eps_color(stdout, color[j]);
+		for (i = 0; i < h.n; ++i) {
+			dt_hit_t *p = &h.a[i];
+			double x0, y0, x1, y1;
+			uint64_t xo = acclen[0][p->qn], yo = acclen[1][p->tn];
+			if (j == 0 && p->ts > p->te) continue;
+			if (j == 1 && p->ts < p->te) continue;
+			x0 = (p->qs + xo) * sx, y0 = (p->ts + yo) * sy;
+			x1 = (p->qe + xo) * sx, y1 = (p->te + yo) * sy;
+			eps_line(stdout, x0, y0, x1, y1);
+		}
+		eps_stroke(stdout);
+	}
+	eps_bottom(stdout);
+
+	for (i = 0; i < 2; ++i) {
+		free(acclen[i]);
+		free(a[i]);
+		sd_destroy(d[i]);
+	}
+
+	free(h.a);
+	return 0;
+}
diff --git a/eps.h b/eps.h
new file mode 100644
index 0000000..e3af633
--- /dev/null
+++ b/eps.h
@@ -0,0 +1,49 @@
+#ifndef EPS_H_
+#define EPS_H_
+
+#include <stdio.h>
+
+#define EPS FILE
+#define EPSPTR  FILE *
+#define eps_open(s) fopen((s),"w+")
+#define eps_close(fp) fclose(fp)
+
+#define eps_header(fp,x,y,linewidth) { \
+	fprintf(fp,"%%!PS-Adobe-3.0 EPSF-3.0\n"); \
+	fprintf(fp,"%%%%BoundingBox:"); \
+	fprintf(fp," 1 1 %g %g\n\n",(float)(x),(float)(y)); \
+	fprintf(fp,"/C { dup 255 and 255 div exch dup -8 bitshift 255 and 255 div 3 1 roll -16 bitshift 255 and 255 div 3 1 roll setrgbcolor } bind def\n"); \
+	fprintf(fp,"/L { 4 2 roll moveto lineto } bind def\n"); \
+	fprintf(fp,"/LX { dup 4 -1 roll exch moveto lineto } bind def\n"); \
+	fprintf(fp,"/LY { dup 4 -1 roll moveto exch lineto } bind def\n"); \
+	fprintf(fp,"/LS { 3 1 roll moveto show } bind def\n"); \
+	fprintf(fp,"/MS { dup stringwidth pop 2 div 4 -1 roll exch sub 3 -1 roll moveto show } bind def\n"); \
+	fprintf(fp,"/RS { dup stringwidth pop 4 -1 roll exch sub 3 -1 roll moveto show } bind def\n"); \
+	fprintf(fp,"/B { 4 copy 3 1 roll exch 6 2 roll 8 -2 roll moveto lineto lineto lineto closepath } bind def\n");\
+	fprintf(fp,"%g setlinewidth\n\n",linewidth);\
+}
+#define eps_font(fp,f,s) do { \
+	fprintf(fp,"/FS %d def\n",s); \
+	fprintf(fp,"/FS4 FS 4 div def\n"); \
+	fprintf(fp,"/%s findfont FS scalefont setfont\n\n",f); \
+  } while (0)
+
+#define eps_bottom(fp) fprintf(fp,"stroke showpage\n")
+#define eps_color(fp,col) fprintf(fp,"stroke %d C\n",col)
+#define eps_gray(fp,gray) fprintf(fp, "%g setgray\n",(float)gray)
+#define eps_linewidth(fp, lw) fprintf(fp, "%g setlinewidth\n", (float)(lw))
+#define eps_line(fp,x1,y1,x2,y2) fprintf(fp,"%g %g %g %g L\n",(float)(x1),(float)(y1),(float)(x2),(float)(y2))
+#define eps_linex(fp,x1,x2,y) fprintf(fp,"%g %g %g LX\n",(float)(x1),(float)(x2),(float)(y))
+#define eps_liney(fp,y1,y2,x) fprintf(fp,"%g %g %g LY\n",(float)(y1),(float)(y2),(float)(x))
+#define eps_Lstr(fp,x,y,s) fprintf(fp,"%g %g (%s) LS\n",(float)(x),(float)(y),s)
+#define eps_Mstr(fp,x,y,s) fprintf(fp,"%g %g (%s) MS\n",(float)(x),(float)(y),s)
+#define eps_Rstr(fp,x,y,s) fprintf(fp,"%g %g (%s) RS\n",(float)(x),(float)(y),s)
+#define eps_Lstr4(fp,x,y,s) fprintf(fp,"%g %g FS4 add (%s) LS\n",(float)(x),(float)(y),s)
+#define eps_Rstr4(fp,x,y,s) fprintf(fp,"%g %g FS4 add (%s) RS\n",(float)(x),(float)(y),s)
+#define eps_Lstr4s(fp,x,y,s) fprintf(fp,"%g %g FS4 sub (%s) LS\n",(float)(x),(float)(y),s)
+#define eps_Rstr4s(fp,x,y,s) fprintf(fp,"%g %g FS4 sub (%s) RS\n",(float)(x),(float)(y),s)
+#define eps_box(fp,x1,y1,x2,y2) fprintf(fp,"%g %g %g %g B\n",(float)(x1),(float)(y1),(float)(x2),(float)(y2))
+#define eps_fill(fp) fprintf(fp,"fill\n")
+#define eps_stroke(fp) fprintf(fp,"stroke\n")
+
+#endif
diff --git a/hit.c b/hit.c
new file mode 100644
index 0000000..38195a2
--- /dev/null
+++ b/hit.c
@@ -0,0 +1,217 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "sdict.h"
+#include "paf.h"
+#include "kvec.h"
+#include "sys.h"
+#include "miniasm.h"
+
+#include "ksort.h"
+#define ma_hit_key(a) ((a).qns)
+KRADIX_SORT_INIT(hit, ma_hit_t, ma_hit_key, 8)
+
+KSORT_INIT_GENERIC(uint32_t)
+
+void ma_hit_sort(size_t n, ma_hit_t *a)
+{
+	radix_sort_hit(a, a + n);
+}
+
+void ma_hit_mark_unused(sdict_t *d, int n, const ma_hit_t *a)
+{
+	size_t i;
+	for (i = 0; i < d->n_seq; ++i)
+		d->seq[i].aux = 0;
+	for (i = 0; i < n; ++i)
+		d->seq[a[i].qns>>32].aux = d->seq[a[i].tn].aux = 1;
+	for (i = 0; i < d->n_seq; ++i) {
+		sd_seq_t *s = &d->seq[i];
+		if (!s->aux) s->del = 1;
+		else s->aux = 0;
+	}
+}
+
+ma_hit_t *ma_hit_read(const char *fn, int min_span, int min_match, sdict_t *d, size_t *n, int bi_dir)
+{
+	paf_file_t *fp;
+	paf_rec_t r;
+	ma_hit_v h = {0,0,0};
+	size_t i, tot = 0, tot_len = 0;
+
+	fp = paf_open(fn);
+	while (paf_read(fp, &r) >= 0) {
+		ma_hit_t *p;
+		++tot;
+		if (r.qe - r.qs < min_span || r.te - r.ts < min_span || r.ml < min_match) continue;
+		kv_pushp(ma_hit_t, h, &p);
+		p->qns = (uint64_t)sd_put(d, r.qn, r.ql)<<32 | r.qs;
+		p->qe = r.qe;
+		p->tn = sd_put(d, r.tn, r.tl);
+		p->ts = r.ts, p->te = r.te, p->rev = r.rev, p->ml = r.ml, p->bl = r.bl;
+		if (bi_dir && p->qns>>32 != p->tn) {
+			kv_pushp(ma_hit_t, h, &p);
+			p->qns = (uint64_t)sd_put(d, r.tn, r.tl)<<32 | r.ts;
+			p->qe = r.te;
+			p->tn = sd_put(d, r.qn, r.ql);
+			p->ts = r.qs, p->te = r.qe, p->rev = r.rev, p->ml = r.ml, p->bl = r.bl;
+		}
+	}
+	paf_close(fp);
+	for (i = 0; i < d->n_seq; ++i)
+		tot_len += d->seq[i].len;
+	if (ma_verbose >= 3) fprintf(stderr, "[M::%s::%s] read %ld hits; stored %ld hits and %d sequences (%ld bp)\n", __func__, sys_timestamp(), tot, h.n, d->n_seq, tot_len);
+	ma_hit_sort(h.n, h.a);
+	*n = h.n;
+	return h.a;
+}
+
+ma_sub_t *ma_hit_sub(int min_dp, float min_iden, int end_clip, size_t n, const ma_hit_t *a, size_t n_sub)
+{
+	size_t i, j, last, n_remained = 0;
+	kvec_t(uint32_t) b = {0,0,0};
+	ma_sub_t *sub = 0;
+
+	sub = (ma_sub_t*)calloc(n_sub, sizeof(ma_sub_t));
+	for (i = 1, last = 0; i <= n; ++i) {
+		if (i == n || a[i].qns>>32 != a[i-1].qns>>32) { // we come to a new query sequence
+			size_t start = 0;
+			int dp, qid = a[i-1].qns>>32;
+			ma_sub_t max, max2;
+			kv_resize(uint32_t, b, i - last);
+			b.n = 0;
+			for (j = last; j < i; ++j) { // collect all starts and ends
+				uint32_t qs, qe;
+				if (a[j].tn == qid || a[j].ml < a[j].bl * min_iden) continue; // skip self match
+				qs = (uint32_t)a[j].qns + end_clip, qe = a[j].qe - end_clip;
+				if (qe > qs) {
+					kv_push(uint32_t, b, qs<<1);
+					kv_push(uint32_t, b, qe<<1|1);
+				}
+			}
+			ks_introsort_uint32_t(b.n, b.a);
+			max.s = max.e = max.del = max2.s = max2.e = max2.del = 0;
+			for (j = 0, dp = 0; j < b.n; ++j) {
+				int old_dp = dp;
+				if (b.a[j]&1) --dp;
+				else ++dp;
+				if (old_dp < min_dp && dp >= min_dp) {
+					start = b.a[j]>>1;
+				} else if (old_dp >= min_dp && dp < min_dp) {
+					int len = (b.a[j]>>1) - start;
+					if (len > max.e - max.s) max2 = max, max.s = start, max.e = b.a[j]>>1;
+					else if (len > max2.e - max2.s) max2.s = start, max2.e = b.a[j]>>1;
+				}
+			}
+			if (max.e - max.s > 0) {
+				assert(qid < n_sub);
+				sub[qid].s = max.s - end_clip;
+				sub[qid].e = max.e + end_clip;
+				sub[qid].del = 0;
+				++n_remained;
+			} else sub[qid].del = 1;
+			last = i;
+		}
+	}
+	free(b.a);
+	if (ma_verbose >= 3)
+		fprintf(stderr, "[M::%s::%s] %ld query sequences remain after sub\n", __func__, sys_timestamp(), n_remained);
+	return sub;
+}
+
+size_t ma_hit_cut(const ma_sub_t *reg, int min_span, size_t n, ma_hit_t *a)
+{
+	size_t i, m;
+	for (i = m = 0; i < n; ++i) {
+		ma_hit_t *p = &a[i];
+		const ma_sub_t *rq = &reg[p->qns>>32], *rt = &reg[p->tn];
+		int qs, qe, ts, te;
+		if (rq->del || rt->del) continue;
+		if (p->rev) {
+			qs = p->te < rt->e? (uint32_t)p->qns : (uint32_t)p->qns + (p->te - rt->e);
+			qe = p->ts > rt->s? p->qe : p->qe - (rt->s - p->ts);
+			ts = p->qe < rq->e? p->ts : p->ts + (p->qe - rq->e);
+			te = (uint32_t)p->qns > rq->s? p->te : p->te - (rq->s - (uint32_t)p->qns);
+		} else {
+			qs = p->ts > rt->s? (uint32_t)p->qns : (uint32_t)p->qns + (rt->s - p->ts);
+			qe = p->te < rt->e? p->qe : p->qe - (p->te - rt->e);
+			ts = (uint32_t)p->qns > rq->s? p->ts : p->ts + (rq->s - (uint32_t)p->qns);
+			te = p->qe < rq->e? p->te : p->te - (p->qe - rq->e);
+		}
+		qs = (qs > rq->s? qs : rq->s) - rq->s;
+		qe = (qe < rq->e? qe : rq->e) - rq->s;
+		ts = (ts > rt->s? ts : rt->s) - rt->s;
+		te = (te < rt->e? te : rt->e) - rt->s;
+		if (qe - qs >= min_span && te - ts >= min_span) {
+			p->qns = p->qns>>32<<32 | qs, p->qe = qe, p->ts = ts, p->te = te;
+			a[m++] = *p;
+		}
+	}
+	if (ma_verbose >= 3)
+		fprintf(stderr, "[M::%s::%s] %ld hits remain after cut\n", __func__, sys_timestamp(), m);
+	return m;
+}
+
+size_t ma_hit_flt(const ma_sub_t *sub, int max_hang, int min_ovlp, size_t n, ma_hit_t *a, float *cov)
+{
+	size_t i, m;
+	asg_arc_t t;
+	uint64_t tot_dp = 0, tot_len = 0;
+	for (i = m = 0; i < n; ++i) {
+		ma_hit_t *h = &a[i];
+		const ma_sub_t *sq = &sub[h->qns>>32], *st = &sub[h->tn];
+		int r;
+		if (sq->del || st->del) continue;
+		r = ma_hit2arc(h, sq->e - sq->s, st->e - st->s, max_hang, .5, min_ovlp, &t);
+		if (r >= 0 || r == MA_HT_QCONT || r == MA_HT_TCONT)
+			a[m++] = *h, tot_dp += r >= 0? r : r == MA_HT_QCONT? sq->e - sq->s : st->e - st->s;
+	}
+	for (i = 1; i <= m; ++i)
+		if (i == m || a[i].qns>>32 != a[i-1].qns>>32)
+			tot_len += sub[a[i-1].qns>>32].e - sub[a[i-1].qns>>32].s;
+	*cov = (double)tot_dp / tot_len;
+	if (ma_verbose >= 3)
+		fprintf(stderr, "[M::%s::%s] %ld hits remain after filtering; crude coverage after filtering: %.2f\n", __func__, sys_timestamp(), m, *cov);
+	return m;
+}
+
+void ma_sub_merge(size_t n_sub, ma_sub_t *a, const ma_sub_t *b)
+{
+	size_t i;
+	for (i = 0; i < n_sub; ++i)
+		a[i].e = a[i].s + b[i].e, a[i].s += b[i].s;
+}
+
+size_t ma_hit_contained(const ma_opt_t *opt, sdict_t *d, ma_sub_t *sub, size_t n, ma_hit_t *a)
+{
+	int32_t *map, r;
+	size_t i, m, old_n_seq = d->n_seq;
+	asg_arc_t t;
+	for (i = m = 0; i < n; ++i) {
+		ma_hit_t *h = &a[i];
+		ma_sub_t *sq = &sub[h->qns>>32], *st = &sub[h->tn];
+		r = ma_hit2arc(h, sq->e - sq->s, st->e - st->s, opt->max_hang, opt->int_frac, opt->min_ovlp, &t);
+		if (r == MA_HT_QCONT) sq->del = 1;
+		else if (r == MA_HT_TCONT) st->del = 1;
+	}
+	for (i = 0; i < d->n_seq; ++i)
+		if (sub[i].del) d->seq[i].del = 1;
+	ma_hit_mark_unused(d, n, a);
+	map = sd_squeeze(d);
+	for (i = 0; i < old_n_seq; ++i)
+		if (map[i] >= 0) sub[map[i]] = sub[i];
+	for (i = m = 0; i < n; ++i) {
+		ma_hit_t *h = &a[i];
+		int32_t qn = map[h->qns>>32], tn = map[h->tn];
+		if (qn >= 0 && tn >= 0) {
+			a[i].qns = (uint64_t)qn<<32 | (uint32_t)a[i].qns;
+			a[i].tn = tn;
+			a[m++] = a[i];
+		}
+	}
+	free(map);
+	if (ma_verbose >= 3)
+		fprintf(stderr, "[M::%s::%s] %d sequences and %ld hits remain after containment removal\n", __func__, sys_timestamp(), d->n_seq, m);
+	return m;
+}
diff --git a/kdq.h b/kdq.h
new file mode 100644
index 0000000..edd55b5
--- /dev/null
+++ b/kdq.h
@@ -0,0 +1,128 @@
+#ifndef __AC_KDQ_H
+#define __AC_KDQ_H
+
+#include <stdlib.h>
+#include <string.h>
+
+#define __KDQ_TYPE(type) \
+	typedef struct { \
+		size_t front:58, bits:6, count, mask; \
+		type *a; \
+	} kdq_##type##_t;
+
+#define kdq_t(type) kdq_##type##_t
+#define kdq_size(q) ((q)->count)
+#define kdq_first(q) ((q)->a[(q)->front])
+#define kdq_last(q) ((q)->a[((q)->front + (q)->count - 1) & (q)->mask])
+#define kdq_at(q, i) ((q)->a[((q)->front + (i)) & (q)->mask])
+
+#define __KDQ_IMPL(type, SCOPE) \
+	SCOPE kdq_##type##_t *kdq_init_##type() \
+	{ \
+		kdq_##type##_t *q; \
+		q = (kdq_##type##_t*)calloc(1, sizeof(kdq_##type##_t)); \
+		q->bits = 2, q->mask = (1ULL<<q->bits) - 1; \
+		q->a = (type*)malloc((1<<q->bits) * sizeof(type)); \
+		return q; \
+	} \
+	SCOPE void kdq_destroy_##type(kdq_##type##_t *q) \
+	{ \
+		if (q == 0) return; \
+		free(q->a); free(q); \
+	} \
+	SCOPE int kdq_resize_##type(kdq_##type##_t *q, int new_bits) \
+	{ \
+		size_t new_size = 1ULL<<new_bits, old_size = 1ULL<<q->bits; \
+		if (new_size < q->count) { /* not big enough */ \
+			int i; \
+			for (i = 0; i < 64; ++i) \
+				if (1ULL<<i > q->count) break; \
+			new_bits = i, new_size = 1ULL<<new_bits; \
+		} \
+		if (new_bits == q->bits) return q->bits; /* unchanged */ \
+		if (new_bits > q->bits) q->a = (type*)realloc(q->a, (1ULL<<new_bits) * sizeof(type)); \
+		if (q->front + q->count <= old_size) { /* unwrapped */ \
+			if (q->front + q->count > new_size) /* only happens for shrinking */ \
+				memmove(q->a, q->a + new_size, (q->front + q->count - new_size) * sizeof(type)); \
+		} else { /* wrapped */ \
+			memmove(q->a + (new_size - (old_size - q->front)), q->a + q->front, (old_size - q->front) * sizeof(type)); \
+			q->front = new_size - (old_size - q->front); \
+		} \
+		q->bits = new_bits, q->mask = (1ULL<<q->bits) - 1; \
+		if (new_bits < q->bits) q->a = (type*)realloc(q->a, (1ULL<<new_bits) * sizeof(type)); \
+		return q->bits; \
+	} \
+	SCOPE type *kdq_pushp_##type(kdq_##type##_t *q) \
+	{ \
+		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
+		return &q->a[((q->count++) + q->front) & (q)->mask]; \
+	} \
+	SCOPE void kdq_push_##type(kdq_##type##_t *q, type v) \
+	{ \
+		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
+		q->a[((q->count++) + q->front) & (q)->mask] = v; \
+	} \
+	SCOPE type *kdq_unshiftp_##type(kdq_##type##_t *q) \
+	{ \
+		if (q->count == 1ULL<<q->bits) kdq_resize_##type(q, q->bits + 1); \
+		++q->count; \
+		q->front = q->front? q->front - 1 : (1ULL<<q->bits) - 1; \
+		return &q->a[q->front]; \
+	} \
+	SCOPE void kdq_unshift_##type(kdq_##type##_t *q, type v) \
+	{ \
+		type *p; \
+		p = kdq_unshiftp_##type(q); \
+		*p = v; \
+	} \
+	SCOPE type *kdq_pop_##type(kdq_##type##_t *q) \
+	{ \
+		return q->count? &q->a[((--q->count) + q->front) & q->mask] : 0; \
+	} \
+	SCOPE type *kdq_shift_##type(kdq_##type##_t *q) \
+	{ \
+		type *d = 0; \
+		if (q->count == 0) return 0; \
+		d = &q->a[q->front++]; \
+		q->front &= q->mask; \
+		--q->count; \
+		return d; \
+	}
+
+#define KDQ_INIT2(type, SCOPE) \
+	__KDQ_TYPE(type) \
+	__KDQ_IMPL(type, SCOPE)
+
+#ifndef klib_unused
+#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__ ((__unused__))
+#else
+#define klib_unused
+#endif
+#endif /* klib_unused */
+
+#define KDQ_INIT(type) KDQ_INIT2(type, static inline klib_unused)
+
+#define KDQ_DECLARE(type) \
+	__KDQ_TYPE(type) \
+	kdq_##type##_t *kdq_init_##type(); \
+	void kdq_destroy_##type(kdq_##type##_t *q); \
+	int kdq_resize_##type(kdq_##type##_t *q, int new_bits); \
+	type *kdq_pushp_##type(kdq_##type##_t *q); \
+	void kdq_push_##type(kdq_##type##_t *q, type v); \
+	type *kdq_unshiftp_##type(kdq_##type##_t *q); \
+	void kdq_unshift_##type(kdq_##type##_t *q, type v); \
+	type *kdq_pop_##type(kdq_##type##_t *q); \
+	type *kdq_shift_##type(kdq_##type##_t *q);
+
+#define kdq_init(type) kdq_init_##type()
+#define kdq_destroy(type, q) kdq_destroy_##type(q)
+#define kdq_resize(type, q, new_bits) kdq_resize_##type(q, new_bits)
+#define kdq_pushp(type, q) kdq_pushp_##type(q)
+#define kdq_push(type, q, v) kdq_push_##type(q, v)
+#define kdq_pop(type, q) kdq_pop_##type(q)
+#define kdq_unshiftp(type, q) kdq_unshiftp_##type(q)
+#define kdq_unshift(type, q, v) kdq_unshift_##type(q, v)
+#define kdq_shift(type, q) kdq_shift_##type(q)
+
+#endif
diff --git a/khash.h b/khash.h
new file mode 100644
index 0000000..06fc7a3
--- /dev/null
+++ b/khash.h
@@ -0,0 +1,627 @@
+/* The MIT License
+
+   Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  An example:
+
+#include "khash.h"
+KHASH_MAP_INIT_INT(32, char)
+int main() {
+	int ret, is_missing;
+	khiter_t k;
+	khash_t(32) *h = kh_init(32);
+	k = kh_put(32, h, 5, &ret);
+	kh_value(h, k) = 10;
+	k = kh_get(32, h, 10);
+	is_missing = (k == kh_end(h));
+	k = kh_get(32, h, 5);
+	kh_del(32, h, k);
+	for (k = kh_begin(h); k != kh_end(h); ++k)
+		if (kh_exist(h, k)) kh_value(h, k) = 1;
+	kh_destroy(32, h);
+	return 0;
+}
+*/
+
+/*
+  2013-05-02 (0.2.8):
+
+	* Use quadratic probing. When the capacity is power of 2, stepping function
+	  i*(i+1)/2 guarantees to traverse each bucket. It is better than double
+	  hashing on cache performance and is more robust than linear probing.
+
+	  In theory, double hashing should be more robust than quadratic probing.
+	  However, my implementation is probably not for large hash tables, because
+	  the second hash function is closely tied to the first hash function,
+	  which reduce the effectiveness of double hashing.
+
+	Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
+
+  2011-12-29 (0.2.7):
+
+    * Minor code clean up; no actual effect.
+
+  2011-09-16 (0.2.6):
+
+	* The capacity is a power of 2. This seems to dramatically improve the
+	  speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
+
+	   - http://code.google.com/p/ulib/
+	   - http://nothings.org/computer/judy/
+
+	* Allow to optionally use linear probing which usually has better
+	  performance for random input. Double hashing is still the default as it
+	  is more robust to certain non-random input.
+
+	* Added Wang's integer hash function (not used by default). This hash
+	  function is more robust to certain non-random input.
+
+  2011-02-14 (0.2.5):
+
+    * Allow to declare global functions.
+
+  2009-09-26 (0.2.4):
+
+    * Improve portability
+
+  2008-09-19 (0.2.3):
+
+	* Corrected the example
+	* Improved interfaces
+
+  2008-09-11 (0.2.2):
+
+	* Improved speed a little in kh_put()
+
+  2008-09-10 (0.2.1):
+
+	* Added kh_clear()
+	* Fixed a compiling error
+
+  2008-09-02 (0.2.0):
+
+	* Changed to token concatenation which increases flexibility.
+
+  2008-08-31 (0.1.2):
+
+	* Fixed a bug in kh_get(), which has not been tested previously.
+
+  2008-08-31 (0.1.1):
+
+	* Added destructor
+*/
+
+
+#ifndef __AC_KHASH_H
+#define __AC_KHASH_H
+
+/*!
+  @header
+
+  Generic hash table library.
+ */
+
+#define AC_VERSION_KHASH_H "0.2.8"
+
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+/* compiler specific configuration */
+
+#if UINT_MAX == 0xffffffffu
+typedef unsigned int khint32_t;
+#elif ULONG_MAX == 0xffffffffu
+typedef unsigned long khint32_t;
+#endif
+
+#if ULONG_MAX == ULLONG_MAX
+typedef unsigned long khint64_t;
+#else
+typedef unsigned long long khint64_t;
+#endif
+
+#ifndef kh_inline
+#ifdef _MSC_VER
+#define kh_inline __inline
+#else
+#define kh_inline inline
+#endif
+#endif /* kh_inline */
+
+#ifndef klib_unused
+#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__ ((__unused__))
+#else
+#define klib_unused
+#endif
+#endif /* klib_unused */
+
+typedef khint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
+
+#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#ifndef kcalloc
+#define kcalloc(N,Z) calloc(N,Z)
+#endif
+#ifndef kmalloc
+#define kmalloc(Z) malloc(Z)
+#endif
+#ifndef krealloc
+#define krealloc(P,Z) realloc(P,Z)
+#endif
+#ifndef kfree
+#define kfree(P) free(P)
+#endif
+
+static const double __ac_HASH_UPPER = 0.77;
+
+#define __KHASH_TYPE(name, khkey_t, khval_t) \
+	typedef struct kh_##name##_s { \
+		khint_t n_buckets, size, n_occupied, upper_bound; \
+		khint32_t *flags; \
+		khkey_t *keys; \
+		khval_t *vals; \
+	} kh_##name##_t;
+
+#define __KHASH_PROTOTYPES(name, khkey_t, khval_t)	 					\
+	extern kh_##name##_t *kh_init_##name(void);							\
+	extern void kh_destroy_##name(kh_##name##_t *h);					\
+	extern void kh_clear_##name(kh_##name##_t *h);						\
+	extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); 	\
+	extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
+	extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
+	extern void kh_del_##name(kh_##name##_t *h, khint_t x);
+
+#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	SCOPE kh_##name##_t *kh_init_##name(void) {							\
+		return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t));		\
+	}																	\
+	SCOPE void kh_destroy_##name(kh_##name##_t *h)						\
+	{																	\
+		if (h) {														\
+			kfree((void *)h->keys); kfree(h->flags);					\
+			kfree((void *)h->vals);										\
+			kfree(h);													\
+		}																\
+	}																	\
+	SCOPE void kh_clear_##name(kh_##name##_t *h)						\
+	{																	\
+		if (h && h->flags) {											\
+			memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
+			h->size = h->n_occupied = 0;								\
+		}																\
+	}																	\
+	SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) 	\
+	{																	\
+		if (h->n_buckets) {												\
+			khint_t k, i, last, mask, step = 0; \
+			mask = h->n_buckets - 1;									\
+			k = __hash_func(key); i = k & mask;							\
+			last = i; \
+			while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+				i = (i + (++step)) & mask; \
+				if (i == last) return h->n_buckets;						\
+			}															\
+			return __ac_iseither(h->flags, i)? h->n_buckets : i;		\
+		} else return 0;												\
+	}																	\
+	SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
+	{ /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
+		khint32_t *new_flags = 0;										\
+		khint_t j = 1;													\
+		{																\
+			kroundup32(new_n_buckets); 									\
+			if (new_n_buckets < 4) new_n_buckets = 4;					\
+			if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	/* requested size is too small */ \
+			else { /* hash table size to be changed (shrink or expand); rehash */ \
+				new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t));	\
+				if (!new_flags) return -1;								\
+				memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
+				if (h->n_buckets < new_n_buckets) {	/* expand */		\
+					khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+					if (!new_keys) { kfree(new_flags); return -1; }		\
+					h->keys = new_keys;									\
+					if (kh_is_map) {									\
+						khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+						if (!new_vals) { kfree(new_flags); return -1; }	\
+						h->vals = new_vals;								\
+					}													\
+				} /* otherwise shrink */								\
+			}															\
+		}																\
+		if (j) { /* rehashing is needed */								\
+			for (j = 0; j != h->n_buckets; ++j) {						\
+				if (__ac_iseither(h->flags, j) == 0) {					\
+					khkey_t key = h->keys[j];							\
+					khval_t val;										\
+					khint_t new_mask;									\
+					new_mask = new_n_buckets - 1; 						\
+					if (kh_is_map) val = h->vals[j];					\
+					__ac_set_isdel_true(h->flags, j);					\
+					while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+						khint_t k, i, step = 0; \
+						k = __hash_func(key);							\
+						i = k & new_mask;								\
+						while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
+						__ac_set_isempty_false(new_flags, i);			\
+						if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
+							{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+							if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
+							__ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
+						} else { /* write the element and jump out of the loop */ \
+							h->keys[i] = key;							\
+							if (kh_is_map) h->vals[i] = val;			\
+							break;										\
+						}												\
+					}													\
+				}														\
+			}															\
+			if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
+				h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
+				if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
+			}															\
+			kfree(h->flags); /* free the working space */				\
+			h->flags = new_flags;										\
+			h->n_buckets = new_n_buckets;								\
+			h->n_occupied = h->size;									\
+			h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
+		}																\
+		return 0;														\
+	}																	\
+	SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
+	{																	\
+		khint_t x;														\
+		if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
+			if (h->n_buckets > (h->size<<1)) {							\
+				if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
+					*ret = -1; return h->n_buckets;						\
+				}														\
+			} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
+				*ret = -1; return h->n_buckets;							\
+			}															\
+		} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
+		{																\
+			khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
+			x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
+			if (__ac_isempty(h->flags, i)) x = i; /* for speed up */	\
+			else {														\
+				last = i; \
+				while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
+					if (__ac_isdel(h->flags, i)) site = i;				\
+					i = (i + (++step)) & mask; \
+					if (i == last) { x = site; break; }					\
+				}														\
+				if (x == h->n_buckets) {								\
+					if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
+					else x = i;											\
+				}														\
+			}															\
+		}																\
+		if (__ac_isempty(h->flags, x)) { /* not present at all */		\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size; ++h->n_occupied;									\
+			*ret = 1;													\
+		} else if (__ac_isdel(h->flags, x)) { /* deleted */				\
+			h->keys[x] = key;											\
+			__ac_set_isboth_false(h->flags, x);							\
+			++h->size;													\
+			*ret = 2;													\
+		} else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
+		return x;														\
+	}																	\
+	SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x)				\
+	{																	\
+		if (x != h->n_buckets && !__ac_iseither(h->flags, x)) {			\
+			__ac_set_isdel_true(h->flags, x);							\
+			--h->size;													\
+		}																\
+	}
+
+#define KHASH_DECLARE(name, khkey_t, khval_t)		 					\
+	__KHASH_TYPE(name, khkey_t, khval_t) 								\
+	__KHASH_PROTOTYPES(name, khkey_t, khval_t)
+
+#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	__KHASH_TYPE(name, khkey_t, khval_t) 								\
+	__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
+	KHASH_INIT2(name, static kh_inline klib_unused, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
+
+/* --- BEGIN OF HASH FUNCTIONS --- */
+
+/*! @function
+  @abstract     Integer hash function
+  @param  key   The integer [khint32_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int_hash_func(key) (khint32_t)(key)
+/*! @function
+  @abstract     Integer comparison function
+ */
+#define kh_int_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     64-bit integer hash function
+  @param  key   The integer [khint64_t]
+  @return       The hash value [khint_t]
+ */
+#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
+/*! @function
+  @abstract     64-bit integer comparison function
+ */
+#define kh_int64_hash_equal(a, b) ((a) == (b))
+/*! @function
+  @abstract     const char* hash function
+  @param  s     Pointer to a null terminated string
+  @return       The hash value
+ */
+static kh_inline khint_t __ac_X31_hash_string(const char *s)
+{
+	khint_t h = (khint_t)*s;
+	if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
+	return h;
+}
+/*! @function
+  @abstract     Another interface to const char* hash function
+  @param  key   Pointer to a null terminated string [const char*]
+  @return       The hash value [khint_t]
+ */
+#define kh_str_hash_func(key) __ac_X31_hash_string(key)
+/*! @function
+  @abstract     Const char* comparison function
+ */
+#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
+
+static kh_inline khint_t __ac_Wang_hash(khint_t key)
+{
+    key += ~(key << 15);
+    key ^=  (key >> 10);
+    key +=  (key << 3);
+    key ^=  (key >> 6);
+    key += ~(key << 11);
+    key ^=  (key >> 16);
+    return key;
+}
+#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
+
+/* --- END OF HASH FUNCTIONS --- */
+
+/* Other convenient macros... */
+
+/*!
+  @abstract Type of the hash table.
+  @param  name  Name of the hash table [symbol]
+ */
+#define khash_t(name) kh_##name##_t
+
+/*! @function
+  @abstract     Initiate a hash table.
+  @param  name  Name of the hash table [symbol]
+  @return       Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_init(name) kh_init_##name()
+
+/*! @function
+  @abstract     Destroy a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_destroy(name, h) kh_destroy_##name(h)
+
+/*! @function
+  @abstract     Reset a hash table without deallocating memory.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+ */
+#define kh_clear(name, h) kh_clear_##name(h)
+
+/*! @function
+  @abstract     Resize a hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  s     New size [khint_t]
+ */
+#define kh_resize(name, h, s) kh_resize_##name(h, s)
+
+/*! @function
+  @abstract     Insert a key to the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @param  r     Extra return code: -1 if the operation failed;
+                0 if the key is present in the hash table;
+                1 if the bucket is empty (never used); 2 if the element in
+				the bucket has been deleted [int*]
+  @return       Iterator to the inserted element [khint_t]
+ */
+#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
+
+/*! @function
+  @abstract     Retrieve a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Key [type of keys]
+  @return       Iterator to the found element, or kh_end(h) if the element is absent [khint_t]
+ */
+#define kh_get(name, h, k) kh_get_##name(h, k)
+
+/*! @function
+  @abstract     Remove a key from the hash table.
+  @param  name  Name of the hash table [symbol]
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  k     Iterator to the element to be deleted [khint_t]
+ */
+#define kh_del(name, h, k) kh_del_##name(h, k)
+
+/*! @function
+  @abstract     Test whether a bucket contains data.
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
+
+/*! @function
+  @abstract     Get key given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x])
+
+/*! @function
+  @abstract     Get value given an iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Value [type of values]
+  @discussion   For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Alias of kh_val()
+ */
+#define kh_value(h, x) ((h)->vals[x])
+
+/*! @function
+  @abstract     Get the start iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The start iterator [khint_t]
+ */
+#define kh_begin(h) (khint_t)(0)
+
+/*! @function
+  @abstract     Get the end iterator
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       The end iterator [khint_t]
+ */
+#define kh_end(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Get the number of elements in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->size)
+
+/*! @function
+  @abstract     Get the number of buckets in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @return       Number of buckets in the hash table [khint_t]
+ */
+#define kh_n_buckets(h) ((h)->n_buckets)
+
+/*! @function
+  @abstract     Iterate over the entries in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  kvar  Variable to which key will be assigned
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach(h, kvar, vvar, code) { khint_t __i;		\
+	for (__i = kh_begin(h); __i != kh_end(h); ++__i) {		\
+		if (!kh_exist(h,__i)) continue;						\
+		(kvar) = kh_key(h,__i);								\
+		(vvar) = kh_val(h,__i);								\
+		code;												\
+	} }
+
+/*! @function
+  @abstract     Iterate over the values in the hash table
+  @param  h     Pointer to the hash table [khash_t(name)*]
+  @param  vvar  Variable to which value will be assigned
+  @param  code  Block of code to execute
+ */
+#define kh_foreach_value(h, vvar, code) { khint_t __i;		\
+	for (__i = kh_begin(h); __i != kh_end(h); ++__i) {		\
+		if (!kh_exist(h,__i)) continue;						\
+		(vvar) = kh_val(h,__i);								\
+		code;												\
+	} }
+
+/* More conenient interfaces */
+
+/*! @function
+  @abstract     Instantiate a hash set containing integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT(name)										\
+	KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT(name, khval_t)								\
+	KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_INT64(name)										\
+	KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing 64-bit integer keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_INT64(name, khval_t)								\
+	KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
+
+typedef const char *kh_cstr_t;
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+ */
+#define KHASH_SET_INIT_STR(name)										\
+	KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
+
+/*! @function
+  @abstract     Instantiate a hash map containing const char* keys
+  @param  name  Name of the hash table [symbol]
+  @param  khval_t  Type of values [type]
+ */
+#define KHASH_MAP_INIT_STR(name, khval_t)								\
+	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
+
+#endif /* __AC_KHASH_H */
diff --git a/kseq.h b/kseq.h
new file mode 100644
index 0000000..d9dc686
--- /dev/null
+++ b/kseq.h
@@ -0,0 +1,256 @@
+/* The MIT License
+
+   Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Last Modified: 05MAR2012 */
+
+#ifndef AC_KSEQ_H
+#define AC_KSEQ_H
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#ifndef klib_unused
+#if (defined __clang__ && __clang_major__ >= 3) || (defined __GNUC__ && __GNUC__ >= 3)
+#define klib_unused __attribute__ ((__unused__))
+#else
+#define klib_unused
+#endif
+#endif /* klib_unused */
+
+#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
+#define KS_SEP_TAB   1 // isspace() && !' '
+#define KS_SEP_LINE  2 // line separator: "\n" (Unix) or "\r\n" (Windows)
+#define KS_SEP_MAX   2
+
+#define __KS_TYPE(type_t) \
+	typedef struct __kstream_t { \
+		int begin, end; \
+		int is_eof:2, bufsize:30; \
+		type_t f; \
+		unsigned char *buf; \
+	} kstream_t;
+
+#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
+#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
+
+#define __KS_BASIC(SCOPE, type_t, __bufsize) \
+	SCOPE kstream_t *ks_init(type_t f) \
+	{ \
+		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
+		ks->f = f; ks->bufsize = __bufsize; \
+		ks->buf = (unsigned char*)malloc(__bufsize); \
+		return ks; \
+	} \
+	SCOPE void ks_destroy(kstream_t *ks) \
+	{ \
+		if (!ks) return; \
+		free(ks->buf); \
+		free(ks); \
+	}
+
+#define __KS_INLINED(__read) \
+	static inline klib_unused int ks_getc(kstream_t *ks) \
+	{ \
+		if (ks->is_eof && ks->begin >= ks->end) return -1; \
+		if (ks->begin >= ks->end) { \
+			ks->begin = 0; \
+			ks->end = __read(ks->f, ks->buf, ks->bufsize); \
+			if (ks->end < ks->bufsize) ks->is_eof = 1; \
+			if (ks->end == 0) return -1; \
+		} \
+		return (int)ks->buf[ks->begin++]; \
+	} \
+	static inline klib_unused int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
+	{ return ks_getuntil2(ks, delimiter, str, dret, 0); }
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#define __KS_GETUNTIL(SCOPE, __read) \
+	SCOPE int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append)  \
+	{ \
+		if (dret) *dret = 0; \
+		str->l = append? str->l : 0; \
+		if (ks->begin >= ks->end && ks->is_eof) return -1; \
+		for (;;) { \
+			int i; \
+			if (ks->begin >= ks->end) { \
+				if (!ks->is_eof) { \
+					ks->begin = 0; \
+					ks->end = __read(ks->f, ks->buf, ks->bufsize); \
+					if (ks->end < ks->bufsize) ks->is_eof = 1; \
+					if (ks->end == 0) break; \
+				} else break; \
+			} \
+			if (delimiter == KS_SEP_LINE) {  \
+				for (i = ks->begin; i < ks->end; ++i)  \
+					if (ks->buf[i] == '\n') break; \
+			} else if (delimiter > KS_SEP_MAX) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (ks->buf[i] == delimiter) break; \
+			} else if (delimiter == KS_SEP_SPACE) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (isspace(ks->buf[i])) break; \
+			} else if (delimiter == KS_SEP_TAB) { \
+				for (i = ks->begin; i < ks->end; ++i) \
+					if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break;  \
+			} else i = 0; /* never come to here! */ \
+			if (str->m - str->l < (size_t)(i - ks->begin + 1)) { \
+				str->m = str->l + (i - ks->begin) + 1; \
+				kroundup32(str->m); \
+				str->s = (char*)realloc(str->s, str->m); \
+			} \
+			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin);  \
+			str->l = str->l + (i - ks->begin); \
+			ks->begin = i + 1; \
+			if (i < ks->end) { \
+				if (dret) *dret = ks->buf[i]; \
+				break; \
+			} \
+		} \
+		if (str->s == 0) { \
+			str->m = 1; \
+			str->s = (char*)calloc(1, 1); \
+		} else if (delimiter == KS_SEP_LINE && str->l > 1 && str->s[str->l-1] == '\r') --str->l; \
+		str->s[str->l] = '\0';											\
+		return str->l; \
+	}
+
+#define KSTREAM_INIT2(SCOPE, type_t, __read, __bufsize) \
+	__KS_TYPE(type_t) \
+	__KS_BASIC(SCOPE, type_t, __bufsize) \
+	__KS_GETUNTIL(SCOPE, __read) \
+	__KS_INLINED(__read)
+
+#define KSTREAM_INIT(type_t, __read, __bufsize) KSTREAM_INIT2(static, type_t, __read, __bufsize)
+
+#define KSTREAM_DECLARE(type_t, __read) \
+	__KS_TYPE(type_t) \
+	extern int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append); \
+	extern kstream_t *ks_init(type_t f); \
+	extern void ks_destroy(kstream_t *ks); \
+	__KS_INLINED(__read)
+
+/******************
+ * FASTA/Q parser *
+ ******************/
+
+#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)
+
+#define __KSEQ_BASIC(SCOPE, type_t)										\
+	SCOPE kseq_t *kseq_init(type_t fd)									\
+	{																	\
+		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));					\
+		s->f = ks_init(fd);												\
+		return s;														\
+	}																	\
+	SCOPE void kseq_destroy(kseq_t *ks)									\
+	{																	\
+		if (!ks) return;												\
+		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
+		ks_destroy(ks->f);												\
+		free(ks);														\
+	}
+
+/* Return value:
+   >=0  length of the sequence (normal)
+   -1   end-of-file
+   -2   truncated quality string
+ */
+#define __KSEQ_READ(SCOPE) \
+	SCOPE int kseq_read(kseq_t *seq) \
+	{ \
+		int c; \
+		kstream_t *ks = seq->f; \
+		if (seq->last_char == 0) { /* then jump to the next header line */ \
+			while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
+			if (c == -1) return -1; /* end of file */ \
+			seq->last_char = c; \
+		} /* else: the first header char has been read in the previous call */ \
+		seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
+		if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
+		if (c != '\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \
+		if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
+			seq->seq.m = 256; \
+			seq->seq.s = (char*)malloc(seq->seq.m); \
+		} \
+		while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+			if (c == '\n') continue; /* skip empty lines */ \
+			seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
+			ks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \
+		} \
+		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */	\
+		if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
+			seq->seq.m = seq->seq.l + 2; \
+			kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
+			seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
+		} \
+		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */ \
+		if (c != '+') return seq->seq.l; /* FASTA */ \
+		if (seq->qual.m < seq->seq.m) {	/* allocate memory for qual in case insufficient */ \
+			seq->qual.m = seq->seq.m; \
+			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
+		} \
+		while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
+		if (c == -1) return -2; /* error: no quality string */ \
+		while (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
+		seq->last_char = 0;	/* we have not come to the next header line */ \
+		if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
+		return seq->seq.l; \
+	}
+
+#define __KSEQ_TYPE(type_t)						\
+	typedef struct {							\
+		kstring_t name, comment, seq, qual;		\
+		int last_char;							\
+		kstream_t *f;							\
+	} kseq_t;
+
+#define KSEQ_INIT2(SCOPE, type_t, __read)		\
+	KSTREAM_INIT2(SCOPE, type_t, __read, 16384)	\
+	__KSEQ_TYPE(type_t)							\
+	__KSEQ_BASIC(SCOPE, type_t)					\
+	__KSEQ_READ(SCOPE)
+
+#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
+
+#define KSEQ_DECLARE(type_t) \
+	__KS_TYPE(type_t) \
+	__KSEQ_TYPE(type_t) \
+	extern kseq_t *kseq_init(type_t fd); \
+	void kseq_destroy(kseq_t *ks); \
+	int kseq_read(kseq_t *seq);
+
+#endif
diff --git a/ksort.h b/ksort.h
new file mode 100644
index 0000000..7e02489
--- /dev/null
+++ b/ksort.h
@@ -0,0 +1,185 @@
+/* The MIT License
+
+   Copyright (c) 2008, 2011 Attractive Chaos <attractor at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+// This is a simplified version of ksort.h
+
+#ifndef AC_KSORT_H
+#define AC_KSORT_H
+
+#include <stdlib.h>
+#include <string.h>
+
+typedef struct {
+	void *left, *right;
+	int depth;
+} ks_isort_stack_t;
+
+#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
+
+#define KSORT_INIT(name, type_t, __sort_lt)								\
+	static inline void __ks_insertsort_##name(type_t *s, type_t *t)		\
+	{																	\
+		type_t *i, *j, swap_tmp;										\
+		for (i = s + 1; i < t; ++i)										\
+			for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) {			\
+				swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp;			\
+			}															\
+	}																	\
+	void ks_combsort_##name(size_t n, type_t a[])						\
+	{																	\
+		const double shrink_factor = 1.2473309501039786540366528676643; \
+		int do_swap;													\
+		size_t gap = n;													\
+		type_t tmp, *i, *j;												\
+		do {															\
+			if (gap > 2) {												\
+				gap = (size_t)(gap / shrink_factor);					\
+				if (gap == 9 || gap == 10) gap = 11;					\
+			}															\
+			do_swap = 0;												\
+			for (i = a; i < a + n - gap; ++i) {							\
+				j = i + gap;											\
+				if (__sort_lt(*j, *i)) {								\
+					tmp = *i; *i = *j; *j = tmp;						\
+					do_swap = 1;										\
+				}														\
+			}															\
+		} while (do_swap || gap > 2);									\
+		if (gap != 1) __ks_insertsort_##name(a, a + n);					\
+	}																	\
+	void ks_introsort_##name(size_t n, type_t a[])						\
+	{																	\
+		int d;															\
+		ks_isort_stack_t *top, *stack;									\
+		type_t rp, swap_tmp;											\
+		type_t *s, *t, *i, *j, *k;										\
+																		\
+		if (n < 1) return;												\
+		else if (n == 2) {												\
+			if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
+			return;														\
+		}																\
+		for (d = 2; 1ul<<d < n; ++d);									\
+		stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
+		top = stack; s = a; t = a + (n-1); d <<= 1;						\
+		while (1) {														\
+			if (s < t) {												\
+				if (--d == 0) {											\
+					ks_combsort_##name(t - s + 1, s);					\
+					t = s;												\
+					continue;											\
+				}														\
+				i = s; j = t; k = i + ((j-i)>>1) + 1;					\
+				if (__sort_lt(*k, *i)) {								\
+					if (__sort_lt(*k, *j)) k = j;						\
+				} else k = __sort_lt(*j, *i)? i : j;					\
+				rp = *k;												\
+				if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }	\
+				for (;;) {												\
+					do ++i; while (__sort_lt(*i, rp));					\
+					do --j; while (i <= j && __sort_lt(rp, *j));		\
+					if (j <= i) break;									\
+					swap_tmp = *i; *i = *j; *j = swap_tmp;				\
+				}														\
+				swap_tmp = *i; *i = *t; *t = swap_tmp;					\
+				if (i-s > t-i) {										\
+					if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
+					s = t-i > 16? i+1 : t;								\
+				} else {												\
+					if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
+					t = i-s > 16? i-1 : s;								\
+				}														\
+			} else {													\
+				if (top == stack) {										\
+					free(stack);										\
+					__ks_insertsort_##name(a, a+n);						\
+					return;												\
+				} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
+			}															\
+		}																\
+	}
+
+#define ks_lt_generic(a, b) ((a) < (b))
+#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
+
+typedef const char *ksstr_t;
+
+#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
+#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
+
+#define RS_MIN_SIZE 64
+
+#define KRADIX_SORT_INIT(name, rstype_t, rskey, sizeof_key) \
+	typedef struct { \
+		rstype_t *b, *e; \
+	} rsbucket_##name##_t; \
+	void rs_insertsort_##name(rstype_t *beg, rstype_t *end) \
+	{ \
+		rstype_t *i; \
+		for (i = beg + 1; i < end; ++i) \
+			if (rskey(*i) < rskey(*(i - 1))) { \
+				rstype_t *j, tmp = *i; \
+				for (j = i; j > beg && rskey(tmp) < rskey(*(j-1)); --j) \
+					*j = *(j - 1); \
+				*j = tmp; \
+			} \
+	} \
+	void rs_sort_##name(rstype_t *beg, rstype_t *end, int n_bits, int s) \
+	{ \
+		rstype_t *i; \
+		int size = 1<<n_bits, m = size - 1; \
+		rsbucket_##name##_t *k, b[size], *be = b + size; \
+		for (k = b; k != be; ++k) k->b = k->e = beg; \
+		for (i = beg; i != end; ++i) ++b[rskey(*i)>>s&m].e; \
+		for (k = b + 1; k != be; ++k) \
+			k->e += (k-1)->e - beg, k->b = (k-1)->e; \
+		for (k = b; k != be;) { \
+			if (k->b != k->e) { \
+				rsbucket_##name##_t *l; \
+				if ((l = b + (rskey(*k->b)>>s&m)) != k) { \
+					rstype_t tmp = *k->b, swap; \
+					do { \
+						swap = tmp; tmp = *l->b; *l->b++ = swap; \
+						l = b + (rskey(tmp)>>s&m); \
+					} while (l != k); \
+					*k->b++ = tmp; \
+				} else ++k->b; \
+			} else ++k; \
+		} \
+		for (b->b = beg, k = b + 1; k != be; ++k) k->b = (k-1)->e; \
+		if (s) { \
+			s = s > n_bits? s - n_bits : 0; \
+			for (k = b; k != be; ++k) \
+				if (k->e - k->b > RS_MIN_SIZE) rs_sort_##name(k->b, k->e, n_bits, s); \
+				else if (k->e - k->b > 1) rs_insertsort_##name(k->b, k->e); \
+		} \
+	} \
+	void radix_sort_##name(rstype_t *beg, rstype_t *end) \
+	{ \
+		if (end - beg <= RS_MIN_SIZE) rs_insertsort_##name(beg, end); \
+		else rs_sort_##name(beg, end, 8, sizeof_key * 8 - 8); \
+	}
+
+#endif
diff --git a/kvec.h b/kvec.h
new file mode 100644
index 0000000..632fce4
--- /dev/null
+++ b/kvec.h
@@ -0,0 +1,110 @@
+/* The MIT License
+
+   Copyright (c) 2008, by Attractive Chaos <attractor at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  An example:
+
+#include "kvec.h"
+int main() {
+	kvec_t(int) array;
+	kv_init(array);
+	kv_push(int, array, 10); // append
+	kv_a(int, array, 20) = 5; // dynamic
+	kv_A(array, 20) = 4; // static
+	kv_destroy(array);
+	return 0;
+}
+*/
+
+/*
+  2008-09-22 (0.1.0):
+
+	* The initial version.
+
+*/
+
+#ifndef AC_KVEC_H
+#define AC_KVEC_H
+
+#include <stdlib.h>
+
+#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+
+#define kvec_t(type) struct { size_t n, m; type *a; }
+#define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
+#define kv_destroy(v) free((v).a)
+#define kv_A(v, i) ((v).a[(i)])
+#define kv_pop(v) ((v).a[--(v).n])
+#define kv_size(v) ((v).n)
+#define kv_max(v) ((v).m)
+
+#define kv_resize(type, v, s) do { \
+		if ((v).m < (s)) { \
+			(v).m = (s); \
+			kv_roundup32((v).m); \
+			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
+		} \
+	} while (0)
+
+#define kv_copy(type, v1, v0) do {							\
+		if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n);	\
+		(v1).n = (v0).n;									\
+		memcpy((v1).a, (v0).a, sizeof(type) * (v0).n);		\
+	} while (0)												\
+
+#define kv_push(type, v, x) do {									\
+		if ((v).n == (v).m) {										\
+			(v).m = (v).m? (v).m<<1 : 2;							\
+			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m);	\
+		}															\
+		(v).a[(v).n++] = (x);										\
+	} while (0)
+
+#define kv_pushp(type, v, p) do { \
+		if ((v).n == (v).m) { \
+			(v).m = (v).m? (v).m<<1 : 2; \
+			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m); \
+		} \
+		*(p) = &(v).a[(v).n++]; \
+	} while (0)
+
+#define kv_a(type, v, i) ((v).m <= (size_t)(i)?						\
+						  ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
+						   (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
+						  : (v).n <= (size_t)(i)? (v).n = (i)			\
+						  : 0), (v).a[(i)]
+
+#define kv_reverse(type, v, start) do { \
+		if ((v).m > 0 && (v).n > (start)) { \
+			size_t __i, __end = (v).n - (start); \
+			type *__a = (v).a + (start); \
+			for (__i = 0; __i < __end>>1; ++__i) { \
+				type __t = __a[__end - 1 - __i]; \
+				__a[__end - 1 - __i] = __a[__i]; __a[__i] = __t; \
+			} \
+		} \
+	} while (0)
+
+#endif
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..281db7f
--- /dev/null
+++ b/main.c
@@ -0,0 +1,202 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "kvec.h"
+#include "sys.h"
+#include "paf.h"
+#include "sdict.h"
+#include "miniasm.h"
+
+#define MA_VERSION "0.2-r128"
+
+static void print_subs(const sdict_t *d, const ma_sub_t *sub)
+{
+	uint32_t i;
+	for (i = 0; i < d->n_seq; ++i)
+		if (!d->seq[i].del && sub[i].s != sub[i].e)
+			printf("%s\t%d\t%d\n", d->seq[i].name, sub[i].s, sub[i].e);
+}
+
+static void print_hits(size_t n_hits, const ma_hit_t *hit, const sdict_t *d, const ma_sub_t *sub)
+{
+	size_t i;
+	for (i = 0; i < n_hits; ++i) {
+		const ma_hit_t *p = &hit[i];
+		const ma_sub_t *rq = &sub[p->qns>>32], *rt = &sub[p->tn];
+		printf("%s:%d-%d\t%d\t%d\t%d\t%c\t%s:%d-%d\t%d\t%d\t%d\t%d\t%d\t255\n", d->seq[p->qns>>32].name, rq->s + 1, rq->e, rq->e - rq->s, (uint32_t)p->qns, p->qe,
+				"+-"[p->rev], d->seq[p->tn].name, rt->s + 1, rt->e, rt->e - rt->s, p->ts, p->te, p->ml, p->bl);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	ma_opt_t opt;
+	int i, c, stage = 100, no_first = 0, no_second = 0, bi_dir = 1, o_set = 0;
+	sdict_t *d;
+	ma_sub_t *sub = 0;
+	ma_hit_t *hit;
+	size_t n_hits;
+	float cov;
+	char *fn_reads = 0, *outfmt = "ug";
+
+	ma_opt_init(&opt);
+	while ((c = getopt(argc, argv, "n:m:s:c:C:S:i:d:g:o:h:I:r:f:e:p:12VBbF:")) >= 0) {
+		if (c == 'm') opt.min_match = atoi(optarg);
+		else if (c == 'i') opt.min_iden = atof(optarg);
+		else if (c == 's') opt.min_span = atoi(optarg);
+		else if (c == 'c') opt.min_dp = atoi(optarg);
+		else if (c == 'o') opt.min_ovlp = atoi(optarg), o_set = 1;
+		else if (c == 'S') stage = atoi(optarg);
+		else if (c == 'd') opt.bub_dist = atoi(optarg);
+		else if (c == 'g') opt.gap_fuzz = atoi(optarg);
+		else if (c == 'h') opt.max_hang = atoi(optarg);
+		else if (c == 'I') opt.int_frac = atof(optarg);
+		else if (c == 'e') opt.max_ext = atoi(optarg);
+		else if (c == 'f') fn_reads = optarg;
+		else if (c == 'p') outfmt = optarg;
+		else if (c == '1') no_first = 1;
+		else if (c == '2') no_second = 1;
+		else if (c == 'n') opt.n_rounds = atoi(optarg) - 1;
+		else if (c == 'C') opt.cov_ratio = atof(optarg);
+		else if (c == 'B') bi_dir = 1;
+		else if (c == 'b') bi_dir = 0;
+		else if (c == 'F') opt.final_ovlp_drop_ratio = atof(optarg);
+		else if (c == 'V') {
+			printf("%s\n", MA_VERSION);
+			return 0;
+		} else if (c == 'r') {
+			char *s;
+			opt.max_ovlp_drop_ratio = strtod(optarg, &s);
+			if (*s == ',') opt.min_ovlp_drop_ratio = strtod(s + 1, &s);
+		}
+	}
+	if (o_set == 0) opt.min_ovlp = opt.min_span;
+	if (argc == optind) {
+		fprintf(stderr, "Usage: miniasm [options] <in.paf>\n");
+		fprintf(stderr, "Options:\n");
+		fprintf(stderr, "  Pre-selection:\n");
+		fprintf(stderr, "    -m INT      min match length [%d]\n", opt.min_match);
+		fprintf(stderr, "    -i FLOAT    min identity [%.2g]\n", opt.min_iden);
+		fprintf(stderr, "    -s INT      min span [%d]\n", opt.min_span);
+		fprintf(stderr, "    -c INT      min coverage [%d]\n", opt.min_dp);
+		fprintf(stderr, "  Overlap:\n");
+		fprintf(stderr, "    -o INT      min overlap [same as -s]\n");
+		fprintf(stderr, "    -h INT      max over hang length [%d]\n", opt.max_hang);
+		fprintf(stderr, "    -I FLOAT    min end-to-end match ratio [%.2g]\n", opt.int_frac);
+		fprintf(stderr, "  Layout:\n");
+		fprintf(stderr, "    -g INT      max gap differences between reads for trans-reduction [%d]\n", opt.gap_fuzz);
+		fprintf(stderr, "    -d INT      max distance for bubble popping [%d]\n", opt.bub_dist);
+		fprintf(stderr, "    -e INT      small unitig threshold [%d]\n", opt.max_ext);
+		fprintf(stderr, "    -f FILE     read sequences []\n");
+		fprintf(stderr, "    -n INT      rounds of short overlap removal [%d]\n", opt.n_rounds + 1);
+		fprintf(stderr, "    -r FLOAT[,FLOAT]\n");
+		fprintf(stderr, "                max and min overlap drop ratio [%.2g,%.2g]\n", opt.max_ovlp_drop_ratio, opt.min_ovlp_drop_ratio);
+		fprintf(stderr, "    -F FLOAT    aggressive overlap drop ratio in the end [%.2g]\n", opt.final_ovlp_drop_ratio);
+		fprintf(stderr, "  Miscellaneous:\n");
+		fprintf(stderr, "    -p STR      output information: bed, paf, sg or ug [%s]\n", outfmt);
+//		fprintf(stderr, "    -B          only one direction of an arc is present in input PAF\n"); // deprecated; for backward compatibility
+		fprintf(stderr, "    -b          both directions of an arc are present in input\n");
+		fprintf(stderr, "    -1          skip 1-pass read selection\n");
+		fprintf(stderr, "    -2          skip 2-pass read selection\n");
+		fprintf(stderr, "    -V          print version number\n");
+		fprintf(stderr, "\nSee miniasm.1 for detailed description of the command-line options.\n");
+		return 1;
+	}
+
+	sys_init();
+	d = sd_init();
+
+	fprintf(stderr, "[M::%s] ===> Step 1: reading read mappings <===\n", __func__);
+	hit = ma_hit_read(argv[optind], opt.min_span, opt.min_match, d, &n_hits, bi_dir);
+
+	if (!no_first) {
+		fprintf(stderr, "[M::%s] ===> Step 2: 1-pass (crude) read selection <===\n", __func__);
+		if (stage >= 2) {
+			sub = ma_hit_sub(opt.min_dp, opt.min_iden, 0, n_hits, hit, d->n_seq);
+			n_hits = ma_hit_cut(sub, opt.min_span, n_hits, hit);
+		}
+		if (stage >= 3) n_hits = ma_hit_flt(sub, opt.max_hang * 1.5, opt.min_ovlp * .5, n_hits, hit, &cov);
+	}
+
+	if (!no_second) {
+		fprintf(stderr, "[M::%s] ===> Step 3: 2-pass (fine) read selection <===\n", __func__);
+		if (stage >= 4) {
+			ma_sub_t *sub2;
+			int min_dp = (int)(cov * opt.cov_ratio + .499) - 1;
+			min_dp = min_dp > opt.min_dp? min_dp : opt.min_dp;
+			sub2 = ma_hit_sub(min_dp, opt.min_iden, opt.min_span/2, n_hits, hit, d->n_seq);
+			n_hits = ma_hit_cut(sub2, opt.min_span, n_hits, hit);
+			ma_sub_merge(d->n_seq, sub, sub2);
+			free(sub2);
+		}
+		if (stage >= 5) n_hits = ma_hit_contained(&opt, d, sub, n_hits, hit);
+	}
+
+	hit = (ma_hit_t*)realloc(hit, n_hits * sizeof(ma_hit_t));
+
+	if (strcmp(outfmt, "bed") == 0) {
+		print_subs(d, sub);
+	} else if (strcmp(outfmt, "paf") == 0) {
+		print_hits(n_hits, hit, d, sub);
+	} if (strcmp(outfmt, "ug") == 0 || strcmp(outfmt, "sg") == 0) {
+		asg_t *sg = 0;
+		ma_ug_t *ug = 0;
+
+		fprintf(stderr, "[M::%s] ===> Step 4: graph cleaning <===\n", __func__);
+		sg = ma_sg_gen(&opt, d, sub, n_hits, hit);
+		if (stage >= 6) {
+			fprintf(stderr, "[M::%s] ===> Step 4.1: transitive reduction <===\n", __func__);
+			asg_arc_del_trans(sg, opt.gap_fuzz);
+		}
+		if (stage >= 7) {
+			fprintf(stderr, "[M::%s] ===> Step 4.2: initial tip cutting and bubble popping <===\n", __func__);
+			asg_cut_tip(sg, opt.max_ext);
+			asg_pop_bubble(sg, opt.bub_dist);
+		}
+		if (stage >= 9) {
+			fprintf(stderr, "[M::%s] ===> Step 4.3: cutting short overlaps (%d rounds in total) <===\n", __func__, opt.n_rounds + 1);
+			for (i = 0; i <= opt.n_rounds; ++i) {
+				float r = opt.min_ovlp_drop_ratio + (opt.max_ovlp_drop_ratio - opt.min_ovlp_drop_ratio) / opt.n_rounds * i;
+				if (asg_arc_del_short(sg, r) != 0) {
+					asg_cut_tip(sg, opt.max_ext);
+					asg_pop_bubble(sg, opt.bub_dist);
+				}
+			}
+		}
+		if (stage >= 10) {
+			fprintf(stderr, "[M::%s] ===> Step 4.4: removing short internal sequences and bi-loops <===\n", __func__);
+			asg_cut_internal(sg, 1);
+			asg_cut_biloop(sg, opt.max_ext);
+			asg_cut_tip(sg, opt.max_ext);
+			asg_pop_bubble(sg, opt.bub_dist);
+		}
+		if (stage >= 11) {
+			fprintf(stderr, "[M::%s] ===> Step 4.5: aggressively cutting short overlaps <===\n", __func__);
+			if (asg_arc_del_short(sg, opt.final_ovlp_drop_ratio) != 0) {
+				asg_cut_tip(sg, opt.max_ext);
+				asg_pop_bubble(sg, opt.bub_dist);
+			}
+		}
+
+		if (strcmp(outfmt, "ug") == 0) {
+			fprintf(stderr, "[M::%s] ===> Step 5: generating unitigs <===\n", __func__);
+			ug = ma_ug_gen(sg);
+			if (fn_reads) ma_ug_seq(ug, d, sub, fn_reads);
+			ma_ug_print(ug, d, sub, stdout);
+		} else ma_sg_print(sg, d, sub, stdout);
+
+		asg_destroy(sg);
+		ma_ug_destroy(ug);
+	}
+
+	free(sub); free(hit);
+	sd_destroy(d);
+
+	fprintf(stderr, "[M::%s] Version: %s\n", __func__, MA_VERSION);
+	fprintf(stderr, "[M::%s] CMD:", __func__);
+	for (i = 0; i < argc; ++i)
+		fprintf(stderr, " %s", argv[i]);
+	fprintf(stderr, "\n[M::%s] Real time: %.3f sec; CPU: %.3f sec\n", __func__, sys_realtime(), sys_cputime());
+	return 0;
+}
diff --git a/miniasm.1 b/miniasm.1
new file mode 100644
index 0000000..7960717
--- /dev/null
+++ b/miniasm.1
@@ -0,0 +1,239 @@
+.TH minimap 1 "06 December 2015" "miniasm-0.2" "Bioinformatics tools"
+
+.SH NAME
+.PP
+miniasm - de novo assembler for long read sequences
+
+.SH SYNOPSIS
+.PP
+miniasm
+.RB [ -b12V ]
+.RB [ -m
+.IR minMatch ]
+.RB [ -i
+.IR minIden ]
+.RB [ -s
+.IR minSpan ]
+.RB [ -c
+.IR minCov ]
+.RB [ -o
+.IR minOvlp ]
+.RB [ -h
+.IR maxHang ]
+.RB [ -I
+.IR intThres ]
+.RB [ -g
+.IR maxGapDiff ]
+.RB [ -d
+.IR maxBubDist ]
+.RB [ -e
+.IR minUtgSize ]
+.RB [ -f
+.IR readFile ]
+.RB [ -n
+.IR nRounds ]
+.RB [ -r
+.IR dropRatio ]
+.RB [ -F
+.IR finalDropRatio ]
+.RB [ -p
+.IR outputInfo ]
+.I mapping.paf
+>
+.I output.gfa
+
+.SH DESCRIPTION
+.PP
+Miniasm is a very fast OLC-based de novo assembler for noisy long reads. It
+takes all-vs-all read self-mappings in the PAF format as input and outputs an
+assembly graph in the GFA format. Different from mainstream assemblers, miniasm
+does not have a consensus step. It simply concatenates pieces of read sequences
+to generate the final unitig sequences. Thus the per-base error rate is similar
+to the raw input reads.
+
+.SH OPTIONS
+
+.SS Preselection options
+
+.TP 10
+.BI -m \ INT
+Drop mappings having less than
+.I INT
+matching bases (col10 in PAF) [100]. This option has the same role as
+.B -L
+of minimap.
+
+.TP
+.BI -s \ INT
+Drop mappings shorter than
+.IR INT -bp
+[1000]. This option also affects the second round of read filtering and minimal
+overlap length.
+
+.TP
+.BI -i \ FLOAT
+During read filtering, ignore mappings with col10/col11 below
+.I FLOAT
+[0.05]. Ignored mappings are still used for read overlaps.
+
+.TP
+.BI -c \ INT
+Minimal coverage by other reads [3]. In the first round of filtering, miniasm
+finds the longest region covered by
+.I INT
+or more reads. In the second round, it in addition requires each remaining base
+to be covered by
+.I INT
+bases at least
+.IR minSpan /2
+from the ends of other reads.
+
+.SS Overlapping options
+
+.TP 10
+.BI -o \ INT
+Minimal overlap length [same as
+.IR minSpan ]
+
+.TP
+.BI -h \ INT
+Maximum overhang length [1000]. An overhang is an unmapped region that should
+be mapped given a true overlap or true containment. If the overhang is too
+long, the mapping is considered an internal match and will be ignored.
+
+.TP
+.BI -I \ FLOAT
+Minimal ratio of mapping length to mapping+overhang length for a mapping
+considered a containment or an overlap [0.8]. This option has a similar role to
+.BR -h ,
+except that it controls the ratio, not length.
+
+.SS Graph layout options
+
+.TP 10
+.BI -g \ INT
+Maximal gap differences between two reads in a mapping [1000]. This parameter
+is only used for transitive reduction.
+
+.TP
+.BI -d \ INT
+Maximal probing distance for bubble popping [50000]. Bubbles longer than
+.I INT
+will not be popped.
+
+.TP
+.BI -e \ INT
+A unitig is considered small if it is composed of less than 
+.I INT
+reads [4]. Miniasm may try to remove small unitigs at various steps.
+
+.TP
+.BI -f \ FILE
+Read sequence file in FASTA or FASTQ format for generating unitig sequences
+[null]. If this option is absent, miniasm produces a GFA output without
+sequences.
+
+.TP
+.BI -r \ FLOAT1,[FLOAT2]
+Max and min overlap drop ratio [0.7,0.5]. Let overlap(v->w) be the overlap
+length of edge v->w and maxovlp(v)=max_w{overlap(v->w)} be the length of
+largest overlap. Miniasm drops overlap v->w if overlap(v->w)/maxovlp(v) is below
+a threshold controled by this option. Miniasm applies
+.I nRounds
+rounds of short overlap removal with an increasing threshold between
+.I FLOAT1
+and
+.IR FLOAT2 .
+
+.TP
+.BI -n \ INT
+Rounds of short overlap removal [3].
+
+.TP
+.BI -F \ FLOAT
+Overlap drop ratio threshold after short unitig removal [0.8]
+
+.SS Miscellaneous options
+
+.TP 10
+.B -b
+Indicate that in the input, the same mapping is likely to be given twice
+
+.TP
+.B -1
+Skip the first round of pre-assembly read selection
+
+.TP
+.B -2
+Skip the second round of pre-assembly read selection
+
+.TP
+.BI -p \ STR
+Output information and format [ug]. Possible
+.I STR
+values include -
+.BR bed :
+post-filtered read regions in the BED format;
+.BR paf :
+mappings between post-filtered reads;
+.BR sg :
+read overlap graph in the GFA format;
+.BR ug :
+unitig graph in the GFA format.
+
+.TP
+.B -V
+Print version number to stdout
+
+.SH INPUT FORMAT
+
+.PP
+Miniasm reads mapping positions in the Pairwise mApping Format (PAF), which is
+a TAB-delimited text format with each line consisting of at least 12 fields as
+are described in the following table:
+
+.TS
+center box;
+cb | cb | cb
+r | c | l .
+Col	Type	Description
+_
+1	string	Query sequence name
+2	int	Query sequence length
+3	int	Query start coordinate (0-based)
+4	int	Query end coordinate (0-based)
+5	char	`+' if query and target on the same strand; `-' if opposite
+6	string	Target sequence name
+7	int	Target sequence length
+8	int	Target start coordinate on the original strand
+9	int	Target end coordinate on the original strand
+10	int	Number of matching bases in the mapping
+11	int	Number bases, including gaps, in the mapping
+12	int	Mapping quality (0-255 with 255 for missing)
+.TE
+
+.PP
+Please see minimap(1) for the detailed description of each field.
+
+.SH OUTPUT FORMAT
+
+.PP
+Miniasm outputs the assembly in the Graphical Fragment Assembly format (GFA).
+It is a line based TAB-delimited format, with the leading letter indicates the
+type of the line. The following table gives the line types used by miniasm:
+
+.TS
+center box;
+cb | cb | cb
+c | l | l .
+Line	Comment	Fixed fields
+_
+H	Header	N/A
+S	Segment	segName segSeq
+L	Overlap	segName1 segOri1 segName2 segOri2 ovlpCIGAR
+a	Golden path	utgName utgStart readName:start-end readOri length
+.TE
+
+.SH SEE ALSO
+.PP
+minimap(1)
diff --git a/miniasm.h b/miniasm.h
new file mode 100644
index 0000000..2a88de6
--- /dev/null
+++ b/miniasm.h
@@ -0,0 +1,107 @@
+#ifndef MINIASM_H
+#define MINIASM_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include "sdict.h"
+#include "asg.h"
+
+extern int ma_verbose;
+
+typedef struct {
+	int min_span;
+	int min_match;
+	int min_dp;
+	float min_iden;
+	float cov_ratio;
+
+	int max_hang;
+	int min_ovlp;
+	float int_frac;
+
+	int gap_fuzz;
+	int n_rounds;
+	int bub_dist;
+	int max_ext;
+	float min_ovlp_drop_ratio, max_ovlp_drop_ratio, final_ovlp_drop_ratio;
+} ma_opt_t;
+
+typedef struct {
+	uint64_t qns;
+	uint32_t qe, tn, ts, te;
+	uint32_t ml:31, rev:1;
+	uint32_t bl:31, del:1;
+} ma_hit_t;
+
+typedef struct { size_t n, m; ma_hit_t *a; } ma_hit_v;
+
+typedef struct {
+	uint32_t s:31, del:1, e;
+} ma_sub_t;
+
+typedef struct {
+	uint32_t len:31, circ:1; // len: length of the unitig; circ: circular if non-zero
+	uint32_t start, end; // start: starting vertex in the string graph; end: ending vertex
+	uint32_t m, n; // number of reads
+	uint64_t *a; // list of reads
+	char *s; // unitig sequence is not null
+} ma_utg_t;
+
+typedef struct { size_t n, m; ma_utg_t *a; } ma_utg_v;
+
+typedef struct {
+	ma_utg_v u;
+	asg_t *g;
+} ma_ug_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void ma_opt_init(ma_opt_t *opt);
+ma_hit_t *ma_hit_read(const char *fn, int min_span, int min_match, sdict_t *d, size_t *n, int bi_dir);
+ma_sub_t *ma_hit_sub(int min_dp, float min_iden, int end_clip, size_t n, const ma_hit_t *a, size_t n_sub);
+size_t ma_hit_cut(const ma_sub_t *reg, int min_span, size_t n, ma_hit_t *a);
+size_t ma_hit_flt(const ma_sub_t *sub, int max_hang, int min_ovlp, size_t n, ma_hit_t *a, float *cov);
+void ma_sub_merge(size_t n_sub, ma_sub_t *a, const ma_sub_t *b);
+size_t ma_hit_contained(const ma_opt_t *opt, sdict_t *d, ma_sub_t *sub, size_t n, ma_hit_t *a);
+
+asg_t *ma_sg_gen(const ma_opt_t *opt, const sdict_t *d, const ma_sub_t *sub, size_t n_hits, const ma_hit_t *hit);
+void ma_sg_print(const asg_t *g, const sdict_t *d, const ma_sub_t *sub, FILE *fp);
+ma_ug_t *ma_ug_gen(asg_t *g);
+int ma_ug_seq(ma_ug_t *g, const sdict_t *d, const ma_sub_t *sub, const char *fn);
+void ma_ug_print(const ma_ug_t *ug, const sdict_t *d, const ma_sub_t *sub, FILE *fp);
+void ma_ug_destroy(ma_ug_t *ug);
+
+#ifdef __cplusplus
+}
+#endif
+
+#define MA_HT_INT        (-1)
+#define MA_HT_QCONT      (-2)
+#define MA_HT_TCONT      (-3)
+#define MA_HT_SHORT_OVLP (-4)
+
+static inline int ma_hit2arc(const ma_hit_t *h, int ql, int tl, int max_hang, float int_frac, int min_ovlp, asg_arc_t *p)
+{
+	int32_t tl5, tl3, ext5, ext3, qs = (int32_t)h->qns;
+	uint32_t u, v, l; // u: query end; v: target end; l: length from u to v
+	u = v = l = UINT32_MAX;
+	if (h->rev) tl5 = tl - h->te, tl3 = h->ts; // tl5: 5'-end overhang (on the query strand); tl3: similar
+	else tl5 = h->ts, tl3 = tl - h->te;
+	ext5 = qs < tl5? qs : tl5;
+	ext3 = ql - h->qe < tl3? ql - h->qe : tl3;
+	if (ext5 > max_hang || ext3 > max_hang || h->qe - qs < (h->qe - qs + ext5 + ext3) * int_frac)
+		return MA_HT_INT;
+	if (qs <= tl5 && ql - h->qe <= tl3) return MA_HT_QCONT; // query contained
+	else if (qs >= tl5 && ql - h->qe >= tl3) return MA_HT_TCONT; // target contained
+	else if (qs > tl5) u = 0, v = !!h->rev, l = qs - tl5;
+	else u = 1, v = !h->rev, l = (ql - h->qe) - tl3;
+	if (h->qe - qs + ext5 + ext3 < min_ovlp || h->te - h->ts + ext5 + ext3 < min_ovlp) return MA_HT_SHORT_OVLP; // short overlap
+	u |= h->qns>>32<<1, v |= h->tn<<1;
+	p->ul = (uint64_t)u<<32 | l, p->v = v, p->ol = ql - l, p->del = 0;
+	return l;
+}
+
+#endif
diff --git a/misc/da2paf.pl b/misc/da2paf.pl
new file mode 100755
index 0000000..0d6af4a
--- /dev/null
+++ b/misc/da2paf.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Getopt::Std;
+
+my %opts;
+getopts("2", \%opts);
+die("Usage: ls *.las | xargs -i LAdump -cd reads.db {} | da2paf.pl [-2] <(DBdump -rh reads.db)\n") if @ARGV < 1;
+my $is_dbl = defined($opts{2});
+
+warn("Reading sequence lengths...\n");
+my $fn = shift(@ARGV);
+open(FH, $fn) || die;
+my ($id, @len);
+while (<FH>) {
+	if (/^R\s(\d+)/) {
+		$id = $1;
+	} elsif (/^L\s\S+\s(\d+)\s(\d+)/) {
+		$len[$id] = $2 - $1;
+	}
+}
+close(FH);
+
+warn("Converting mappings...\n");
+my ($id0, $id1, $strand, $ab, $ae, $bb, $be, $skip);
+while (<>) {
+	if (/^P\s(\S+)\s(\S+)\s([nc])/) {
+		$id0 = $1; $id1 = $2; $strand = $3 eq 'n'? '+' : '-';
+		$skip = !$is_dbl && $id0 > $id1? 1 : 0;
+	} elsif (!$skip && /^C\s(\d+)\s(\d+)\s(\d+)\s(\d+)/) {
+		$ab = $1, $ae = $2, $bb = $3, $be = $4;
+	} elsif (!$skip && /^D\s(\d+)/) {
+		my $bl = $ae - $ab > $be - $bb? $ae - $ab : $be - $bb;
+		my $ml = $bl - $1;
+		if ($strand eq '+') {
+			print join("\t", $id0, $len[$id0], $ab, $ae, '+', $id1, $len[$id1], $bb, $be, $ml, $bl, 255), "\n";
+		} else {
+			my $l = $len[$id1];
+			print join("\t", $id0, $len[$id0], $ab, $ae, '-', $id1, $l, $l - $be, $l - $bb, $ml, $bl, 255), "\n";
+		}
+	}
+}
diff --git a/misc/demo-ecoli-pacbio.sh b/misc/demo-ecoli-pacbio.sh
new file mode 100755
index 0000000..c6f4f99
--- /dev/null
+++ b/misc/demo-ecoli-pacbio.sh
@@ -0,0 +1,26 @@
+#
+# Dependencies: awk, wget, git, gcc and zlib
+#
+
+# Download sample PacBio from the PBcR website
+wget -O- http://www.cbcb.umd.edu/software/PBcR/data/selfSampleData.tar.gz | tar zxf -
+ln -s selfSampleData/pacbio_filtered.fastq reads.fq
+
+# Install minimap and miniasm (requiring gcc and zlib)
+git clone https://github.com/lh3/minimap && (cd minimap && make)
+git clone https://github.com/lh3/miniasm && (cd miniasm && make)
+
+# Overlap
+minimap/minimap -Sw5 -L100 -m0 -t8 reads.fq reads.fq | gzip -1 > reads.paf.gz
+
+# Layout
+miniasm/miniasm -f reads.fq reads.paf.gz > utg.gfa
+
+# Convert to FASTA
+awk '/^S/{print ">"$2"\n"$3}' utg.gfa > utg.fa
+
+# Download E. coli K-12 sequence
+wget -O NC_000913.fa 'http://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?sendto=on&db=nuccore&dopt=fasta&val=556503834'
+
+# Map assembly to ref
+minimap/minimap NC_000913.fa utg.fa | miniasm/minidot - > utg.eps
diff --git a/misc/demo-worm-pacbio.sh b/misc/demo-worm-pacbio.sh
new file mode 100755
index 0000000..f40459b
--- /dev/null
+++ b/misc/demo-worm-pacbio.sh
@@ -0,0 +1,56 @@
+prefix=ce-40X
+
+# list of read files
+cat > $prefix.files <<EOF
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0001/Analysis_Results/m140928_184123_42139_c100719602550000001823155305141590_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0001/Analysis_Results/m140928_184123_42139_c100719602550000001823155305141590_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0001/Analysis_Results/m140928_184123_42139_c100719602550000001823155305141590_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0002/Analysis_Results/m140928_230547_42139_c100719602550000001823155305141591_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0002/Analysis_Results/m140928_230547_42139_c100719602550000001823155305141591_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0002/Analysis_Results/m140928_230547_42139_c100719602550000001823155305141591_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0003/Analysis_Results/m140929_033247_42139_c100719602550000001823155305141592_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0003/Analysis_Results/m140929_033247_42139_c100719602550000001823155305141592_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0003/Analysis_Results/m140929_033247_42139_c100719602550000001823155305141592_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0004/Analysis_Results/m140929_075857_42139_c100719602550000001823155305141593_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0004/Analysis_Results/m140929_075857_42139_c100719602550000001823155305141593_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0004/Analysis_Results/m140929_075857_42139_c100719602550000001823155305141593_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0005/Analysis_Results/m140929_122654_42139_c100719602550000001823155305141594_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0005/Analysis_Results/m140929_122654_42139_c100719602550000001823155305141594_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590970/0005/Analysis_Results/m140929_122654_42139_c100719602550000001823155305141594_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0001/Analysis_Results/m140928_191128_sidney_c100699772550000001823139903261590_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0001/Analysis_Results/m140928_191128_sidney_c100699772550000001823139903261590_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0001/Analysis_Results/m140928_191128_sidney_c100699772550000001823139903261590_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0002/Analysis_Results/m140928_233028_sidney_c100699772550000001823139903261591_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0002/Analysis_Results/m140928_233028_sidney_c100699772550000001823139903261591_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0002/Analysis_Results/m140928_233028_sidney_c100699772550000001823139903261591_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0003/Analysis_Results/m140929_034941_sidney_c100699772550000001823139903261592_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0003/Analysis_Results/m140929_034941_sidney_c100699772550000001823139903261592_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0003/Analysis_Results/m140929_034941_sidney_c100699772550000001823139903261592_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0004/Analysis_Results/m140929_080908_sidney_c100699772550000001823139903261593_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0004/Analysis_Results/m140929_080908_sidney_c100699772550000001823139903261593_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0004/Analysis_Results/m140929_080908_sidney_c100699772550000001823139903261593_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0005/Analysis_Results/m140929_122826_sidney_c100699772550000001823139903261594_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0005/Analysis_Results/m140929_122826_sidney_c100699772550000001823139903261594_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0005/Analysis_Results/m140929_122826_sidney_c100699772550000001823139903261594_s1_p0.3.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0006/Analysis_Results/m140929_164720_sidney_c100699772550000001823139903261595_s1_p0.1.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0006/Analysis_Results/m140929_164720_sidney_c100699772550000001823139903261595_s1_p0.2.subreads.fasta
+http://datasets.pacb.com.s3.amazonaws.com/2014/c_elegans/40X/raw_data/2590971/0006/Analysis_Results/m140929_164720_sidney_c100699772550000001823139903261595_s1_p0.3.subreads.fasta
+EOF
+
+# download read file
+if [ ! -f $prefix.fa.gz ]; then
+	wget -O- -qi $prefix.files | gzip -1 > $prefix.fa.gz
+fi
+
+# Install minimap and miniasm (requiring gcc and zlib)
+git clone https://github.com/lh3/minimap && (cd minimap && make)
+git clone https://github.com/lh3/miniasm && (cd miniasm && make)
+
+# Overlap (shorter N50 without -I6G)
+minimap/minimap -Sw5 -L100 -m0 -t8 -I6G $prefix.fa.gz $prefix.fa.gz 2> $prefix.paf.gz.log | gzip -1 > $prefix.paf.gz
+
+# Layout
+miniasm/miniasm -f $prefix.fa.gz $prefix.paf.gz > $prefix.gfa 2> $prefix.gfa.log
+
+# Convert to FASTA
+awk '/^S/{print ">"$2"\n"$3}' $prefix.gfa > $prefix.utg.fa
diff --git a/misc/mhap2paf.pl b/misc/mhap2paf.pl
new file mode 100755
index 0000000..0533521
--- /dev/null
+++ b/misc/mhap2paf.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Getopt::Std;
+
+my %opts = ();
+getopts("2", \%opts);
+my $is_dbl = defined($opts{2});
+
+die("Usage: mhap2paf.pl [-2] <in.mhap>\n") if (@ARGV == 0 && -t STDIN);
+
+while (<>) {
+	chomp;
+	my @t = split;
+	my $bl = $t[6] - $t[5] > $t[10] - $t[9]? $t[6] - $t[5] : $t[10] - $t[9];
+	my $r = $t[2];
+	my $ml = int($bl * ($r <= 1.? $r : .01 * $r) + .499);
+	my $cm = "cm:i:" . int($t[3] + .499);
+	my $rev = $t[4] == $t[8]? '+' : '-';
+	print(join("\t", @t[0,7,5,6], $rev, @t[1,11,9,10], $ml, $bl, 255, $cm), "\n");
+	print(join("\t", @t[1,11,9,10], $rev, @t[0,7,5,6], $ml, $bl, 255, $cm), "\n") if ($is_dbl);
+}
diff --git a/misc/paf2mhap.pl b/misc/paf2mhap.pl
new file mode 100755
index 0000000..d13bc86
--- /dev/null
+++ b/misc/paf2mhap.pl
@@ -0,0 +1,35 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use Getopt::Std;
+
+my %opts = ();
+getopts("p", \%opts);
+my $is_100 = defined($opts{p});
+
+die("Usage: paf2mhap.pl [-p] <in.fa> <in.paf>\n") if (@ARGV == 0);
+
+warn("Parsing FASTA to create the name<=>id table...\n");
+my %hash;
+my $fn = shift(@ARGV);
+open(FH, $fn =~ /\.gz$/? "gzip -dc {} |" : $fn) || die;
+my $cnt = 0;
+while (<FH>) {
+	if (/^>(\S+)/) {
+		$hash{$1} = ++$cnt unless defined($hash{$1});
+	}
+}
+close(FH);
+
+warn("Converting PAF to MHAP format...\n");
+while (<>) {
+	chomp;
+	my @t = split;
+	next if ($t[0] eq $t[5]); # NB: ignore self matches
+	my $cnt = /cm:i:(\d+)/? $1 : 0;
+	my $r = $t[9] / $t[10];
+	$r = sprintf("%.4f", $is_100? 100. * $r : $r);
+	die if !defined($hash{$t[0]}) || !defined($hash{$t[5]});
+	print(join(" ", $hash{$t[0]}, $hash{$t[5]}, $r, $cnt, 0, @t[2,3,1], $t[4] eq '+'? 0 : 1, @t[7,8,6]), "\n");
+}
diff --git a/misc/sam2paf.js b/misc/sam2paf.js
new file mode 100644
index 0000000..09ea276
--- /dev/null
+++ b/misc/sam2paf.js
@@ -0,0 +1,51 @@
+var file = arguments.length? new File(arguments[0]) : new File();
+var buf = new Bytes();
+var re = /(\d+)([MIDSHN])/g;
+
+var len = {}, lineno = 0;
+while (file.readline(buf) >= 0) {
+	var m, line = buf.toString();
+	++lineno;
+	if (line.charAt(0) == '@') {
+		if (/^@SQ/.test(line)) {
+			var name = (m = /\tSN:(\S+)/.exec(line)) != null? m[1] : null;
+			var l = (m = /\tLN:(\d+)/.exec(line)) != null? parseInt(m[1]) : null;
+			if (name != null && l != null) len[name] = l;
+		}
+		continue;
+	}
+	var t = line.split("\t");
+	var flag = parseInt(t[1]);
+	if (t[2] == '*' || (flag&4)) continue;
+	var tlen = len[t[2]];
+	if (tlen == null) throw Error("ERROR at line " + lineno + ": can't find the length of contig " + t[2]);
+	var NM = (m = /\tNM:i:(\d+)/.exec(line)) != null? parseInt(m[1]) : null;
+	if (NM == null) throw Error("ERROR at line " + lineno + ": no NM tag");
+	var clip = [0, 0], I = [0, 0], D = [0, 0], M = 0, N = 0;
+	while ((m = re.exec(t[5])) != null) {
+		var l = parseInt(m[1]);
+		if (m[2] == 'M') M += l;
+		else if (m[2] == 'I') ++I[0], I[1] += l;
+		else if (m[2] == 'D') ++D[0], D[1] += l;
+		else if (m[2] == 'N') N += l;
+		else if (m[2] == 'S' || m[2] == 'H')
+			clip[M == 0? 0 : 1] = l;
+	}
+	if (NM < I[1] + D[1]) {
+		warn("WARNING at line " + lineno + ": NM is less than the total number of gaps; skipped");
+		continue;
+	}
+	var extra = ["mm:i:"+(NM-I[1]-D[1]), "io:i:"+I[0], "in:i:"+I[1], "do:i:"+D[0], "dn:i:"+D[1]];
+	var match = M - (NM - I[1] - D[1]);
+	var blen = M + I[1] + D[1];
+	var qlen = M + I[1] + clip[0] + clip[1];
+	var qs, qe;
+	if (flag&16) qs = clip[1], qe = qlen - clip[0];
+	else qs = clip[0], qe = qlen - clip[1];
+	var ts = parseInt(t[3]) - 1, te = ts + M + D[1] + N;
+	var a = [t[0], qlen, qs, qe, flag&16? '-' : '+', t[2], tlen, ts, te, match, blen, t[4]];
+	print(a.join("\t"), extra.join("\t"));
+}
+
+buf.destroy();
+file.close();
diff --git a/paf.c b/paf.c
new file mode 100644
index 0000000..5d3bb4b
--- /dev/null
+++ b/paf.c
@@ -0,0 +1,67 @@
+#include <zlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "paf.h"
+
+#include "kseq.h"
+KSTREAM_INIT(gzFile, gzread, 0x10000)
+
+paf_file_t *paf_open(const char *fn)
+{
+	kstream_t *ks;
+	gzFile fp;
+	paf_file_t *pf;
+	fp = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
+	if (fp == 0) return 0;
+	ks = ks_init(fp);
+	pf = (paf_file_t*)calloc(1, sizeof(paf_file_t));
+	pf->fp = ks;
+	return pf;
+}
+
+int paf_close(paf_file_t *pf)
+{
+	kstream_t *ks;
+	if (pf == 0) return 0;
+	free(pf->buf.s);
+	ks = (kstream_t*)pf->fp;
+	gzclose(ks->f);
+	ks_destroy(ks);
+	free(pf);
+	return 0;
+}
+
+int paf_parse(int l, char *s, paf_rec_t *pr) // s must be NULL terminated
+{ // on return: <0 for failure; 0 for success; >0 for filtered
+	char *q, *r;
+	int i, t;
+	for (i = t = 0, q = s; i <= l; ++i) {
+		if (i < l && s[i] != '\t') continue;
+		s[i] = 0;
+		if (t == 0) pr->qn = q;
+		else if (t == 1) pr->ql = strtol(q, &r, 10);
+		else if (t == 2) pr->qs = strtol(q, &r, 10);
+		else if (t == 3) pr->qe = strtol(q, &r, 10);
+		else if (t == 4) pr->rev = (*q == '-');
+		else if (t == 5) pr->tn = q;
+		else if (t == 6) pr->tl = strtol(q, &r, 10);
+		else if (t == 7) pr->ts = strtol(q, &r, 10);
+		else if (t == 8) pr->te = strtol(q, &r, 10);
+		else if (t == 9) pr->ml = strtol(q, &r, 10);
+		else if (t == 10) pr->bl = strtol(q, &r, 10);
+		++t, q = i < l? &s[i+1] : 0;
+	}
+	if (t < 10) return -1;
+	return 0;
+}
+
+int paf_read(paf_file_t *pf, paf_rec_t *r)
+{
+	int ret, dret;
+file_read_more:
+	ret = ks_getuntil((kstream_t*)pf->fp, KS_SEP_LINE, &pf->buf, &dret);
+	if (ret < 0) return ret;
+	ret = paf_parse(pf->buf.l, pf->buf.s, r);
+	if (ret < 0) goto file_read_more;
+	return ret;
+}
diff --git a/paf.h b/paf.h
new file mode 100644
index 0000000..6c772d3
--- /dev/null
+++ b/paf.h
@@ -0,0 +1,38 @@
+#ifndef PAF_PAF_H
+#define PAF_PAF_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+typedef struct {
+	void *fp;
+	kstring_t buf;
+} paf_file_t;
+
+typedef struct {
+	const char *qn, *tn; // these point to the input string; NOT allocated
+	uint32_t ql, qs, qe, tl, ts, te;
+	uint32_t ml:31, rev:1, bl;
+} paf_rec_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+paf_file_t *paf_open(const char *fn);
+int paf_close(paf_file_t *pf);
+int paf_read(paf_file_t *pf, paf_rec_t *r);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/sdict.c b/sdict.c
new file mode 100644
index 0000000..457a62d
--- /dev/null
+++ b/sdict.c
@@ -0,0 +1,86 @@
+#include <string.h>
+#include "sdict.h"
+
+#include "khash.h"
+KHASH_MAP_INIT_STR(str, uint32_t)
+typedef khash_t(str) shash_t;
+
+sdict_t *sd_init(void)
+{
+	sdict_t *d;
+	d = (sdict_t*)calloc(1, sizeof(sdict_t));
+	d->h = kh_init(str);
+	return d;
+}
+
+void sd_destroy(sdict_t *d)
+{
+	uint32_t i;
+	if (d == 0) return;
+	if (d->h) kh_destroy(str, (shash_t*)d->h);
+	for (i = 0; i < d->n_seq; ++i)
+		free(d->seq[i].name);
+	free(d->seq);
+	free(d);
+}
+
+int32_t sd_put(sdict_t *d, const char *name, uint32_t len)
+{
+	shash_t *h = (shash_t*)d->h;
+	khint_t k;
+	int absent;
+	k = kh_put(str, h, name, &absent);
+	if (absent) {
+		sd_seq_t *s;
+		if (d->n_seq == d->m_seq) {
+			d->m_seq = d->m_seq? d->m_seq<<1 : 16;
+			d->seq = (sd_seq_t*)realloc(d->seq, d->m_seq * sizeof(sd_seq_t));
+		}
+		s = &d->seq[d->n_seq];
+		s->len = len, s->aux = 0, s->del = 0;
+		kh_key(h, k) = s->name = strdup(name);
+		kh_val(h, k) = d->n_seq++;
+	} // TODO: test if len is the same;
+	return kh_val(h, k);
+}
+
+int32_t sd_get(const sdict_t *d, const char *name)
+{
+	shash_t *h = (shash_t*)d->h;
+	khint_t k;
+	k = kh_get(str, h, name);
+	return k == kh_end(h)? -1 : kh_val(h, k);
+}
+
+void sd_hash(sdict_t *d)
+{
+	uint32_t i;
+	shash_t *h;
+	if (d->h) return;
+	d->h = h = kh_init(str);
+	for (i = 0; i < d->n_seq; ++i) {
+		int absent;
+		khint_t k;
+		k = kh_put(str, h, d->seq[i].name, &absent);
+		kh_val(h, k) = i;
+	}
+}
+
+int32_t *sd_squeeze(sdict_t *d)
+{
+	int32_t *map, i, j;
+	if (d->h) {
+		kh_destroy(str, (shash_t*)d->h);
+		d->h = 0;
+	}
+	map = (int32_t*)calloc(d->n_seq, 4);
+	for (i = j = 0; i < d->n_seq; ++i) {
+		if (d->seq[i].del) {
+			free(d->seq[i].name);
+			map[i] = -1;
+		} else d->seq[j] = d->seq[i], map[i] = j++;
+	}
+	d->n_seq = j;
+	sd_hash(d);
+	return map;
+}
diff --git a/sdict.h b/sdict.h
new file mode 100644
index 0000000..31c18a1
--- /dev/null
+++ b/sdict.h
@@ -0,0 +1,31 @@
+#ifndef SDICT_H
+#define SDICT_H
+
+#include <stdint.h>
+
+typedef struct {
+	char *name;
+	uint32_t len, aux:31, del:1;
+} sd_seq_t;
+
+typedef struct {
+	uint32_t n_seq, m_seq;
+	sd_seq_t *seq;
+	void *h;
+} sdict_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+sdict_t *sd_init(void);
+void sd_destroy(sdict_t *d);
+int32_t sd_put(sdict_t *d, const char *name, uint32_t len);
+int32_t sd_get(const sdict_t *d, const char *name);
+int32_t *sd_squeeze(sdict_t *d);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/sys.c b/sys.c
new file mode 100644
index 0000000..7bb24f7
--- /dev/null
+++ b/sys.c
@@ -0,0 +1,46 @@
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <stdio.h>
+
+static double realtime0;
+
+double sys_cputime()
+{
+	struct rusage r;
+	getrusage(RUSAGE_SELF, &r);
+	return r.ru_utime.tv_sec + r.ru_stime.tv_sec + 1e-6 * (r.ru_utime.tv_usec + r.ru_stime.tv_usec);
+}
+
+double sys_realtime()
+{
+	struct timeval tp;
+	struct timezone tzp;
+	gettimeofday(&tp, &tzp);
+	return (tp.tv_sec + tp.tv_usec * 1e-6) - realtime0;
+}
+
+void sys_liftrlimit()
+{
+#ifdef __linux__
+	struct rlimit r;
+	getrlimit(RLIMIT_AS, &r);
+	r.rlim_cur = r.rlim_max;
+	setrlimit(RLIMIT_AS, &r);
+#endif
+}
+
+void sys_init()
+{
+	sys_liftrlimit();
+	realtime0 = sys_realtime();
+}
+
+const char *sys_timestamp()
+{
+	static char buf[256];
+	double rt, ct;
+	rt = sys_realtime();
+	ct = sys_cputime();
+	snprintf(buf, 255, "%.3f*%.2f", rt, ct/rt);
+	return buf;
+}
diff --git a/sys.h b/sys.h
new file mode 100644
index 0000000..95668f8
--- /dev/null
+++ b/sys.h
@@ -0,0 +1,17 @@
+#ifndef HL_SYS_H
+#define HL_SYS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+double sys_cputime();
+double sys_realtime();
+void sys_init();
+const char *sys_timestamp();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tex/Makefile b/tex/Makefile
new file mode 100644
index 0000000..ec4a72c
--- /dev/null
+++ b/tex/Makefile
@@ -0,0 +1,18 @@
+.SUFFIXES: .gp .tex .eps .pdf .eps.gz
+
+.eps.pdf:
+		epstopdf --outfile $@ $<
+
+.eps.gz.pdf:
+		gzip -dc $< | epstopdf --filter > $@
+
+.pdf.eps:
+		pdftops -eps $< $@
+
+all:miniasm.pdf
+
+miniasm.pdf:miniasm.tex miniasm.bib
+		pdflatex miniasm; bibtex miniasm; pdflatex miniasm; pdflatex miniasm;
+
+clean:
+		rm -fr *.toc *.aux *.bbl *.blg *.idx *.log *.out *~ miniasm.pdf
diff --git a/tex/bioinfo.cls b/tex/bioinfo.cls
new file mode 100644
index 0000000..caa6b59
--- /dev/null
+++ b/tex/bioinfo.cls
@@ -0,0 +1,927 @@
+\newcommand\classname{bioinfo}
+\newcommand\lastmodifieddate{2003/02/08}
+\newcommand\versionnumber{0.1}
+
+% Are we printing crop marks?
+\newif\if at cropmarkson \@cropmarksontrue
+
+\NeedsTeXFormat{LaTeX2e}[2001/06/01]
+\ProvidesClass{\classname}[\lastmodifieddate\space\versionnumber]
+
+\setlength{\paperheight}{11truein}
+\setlength{\paperwidth}{8.5truein}
+
+\newif\if at final
+
+\DeclareOption{draft}{\PassOptionsToPackage{draft}{graphicx}}
+\DeclareOption{a4paper}{\PassOptionsToPackage{a4}{crop}}
+\DeclareOption{centre}{\PassOptionsToPackage{center}{crop}}
+\DeclareOption{crop}{\PassOptionsToPackage{cam}{crop}\global\@cropmarksontrue}
+\DeclareOption{nocrop}{\PassOptionsToPackage{off}{crop}\global\@cropmarksonfalse}
+\DeclareOption{info}{\PassOptionsToPackage{info}{crop}}
+\DeclareOption{noinfo}{\PassOptionsToPackage{noinfo}{crop}}
+\DeclareOption{final}{\global\@finaltrue}
+
+\ExecuteOptions{a4paper,nocrop,centre,info}
+
+\ProcessOptions
+
+% Load all necessary packages
+\RequirePackage{inputenc,crop,graphicx,amsmath,array,color,amssymb,flushend,stfloats,amsthm,chngpage,times}
+%\RequirePackage[LY1]{fontenc}
+%\RequirePackage[LY1,mtbold]{mathtime}
+\def\helvetica{\fontfamily{phv}\selectfont}
+\def\helveticaitalic{\fontfamily{phv}\itshape\selectfont}
+\def\helveticabold{\fontfamily{phv}\bfseries\selectfont}
+\def\helveticabolditalic{\fontfamily{phv}\bfseries\itshape\selectfont}
+
+% Not sure if needed.
+\newcommand\@ptsize{0}
+
+% Set twoside printing
+\@twosidetrue
+
+% Marginal notes are on the outside edge
+\@mparswitchfalse
+
+\reversemarginpar
+
+\renewcommand\normalsize{%
+   \@setfontsize\normalsize{9}{11}%
+   \abovedisplayskip 10\p@ \@plus2\p@ \@minus5\p@
+   \abovedisplayshortskip \z@ \@plus3\p@
+   \belowdisplayshortskip 6\p@ \@plus3\p@ \@minus3\p@
+   \belowdisplayskip \abovedisplayskip
+   \let\@listi\@listI}
+\normalsize
+\let\@bls\baselineskip
+
+\newcommand\small{%
+    \@setfontsize\small{9}{11}%
+    \abovedisplayskip 11\p@ minus 3\p@
+    \belowdisplayskip \abovedisplayskip
+    \abovedisplayshortskip \z@ plus 2\p@
+    \belowdisplayshortskip 4\p@ plus 2\p@ minus2\p@
+    \def\@listi{\topsep 4.5\p@ plus 2\p@ minus 1\p@
+       \itemsep \parsep 
+       \topsep 4\p@ plus 2\p@ minus 2\p@}}
+
+\newcommand\footnotesize{%
+    \@setfontsize\footnotesize{8}{10}%
+    \abovedisplayskip 6\p@ minus 3\p@
+    \belowdisplayskip\abovedisplayskip
+    \abovedisplayshortskip \z@ plus 3\p@
+    \belowdisplayshortskip 6\p@ plus 3\p@ minus 3\p@
+    \def\@listi{\topsep 3\p@ plus 1\p@ minus 1\p@
+       \parsep 2\p@ plus 1\p@ minus 1\p@\itemsep \parsep}}
+       
+\def\scriptsize{\@setfontsize\scriptsize{7pt}{9pt}}
+\def\tiny{\@setfontsize\tiny{5pt}{7pt}}
+\def\large{\@setfontsize\large{11.5pt}{12pt}}
+\def\Large{\@setfontsize\Large{14pt}{16}}
+\def\LARGE{\@setfontsize\LARGE{15pt}{17pt}}
+\def\huge{\@setfontsize\huge{22pt}{22pt}}
+\def\Huge{\@setfontsize\Huge{30pt}{30pt}}
+
+\DeclareOldFontCommand{\rm}{\normalfont\rmfamily}{\mathrm}
+\DeclareOldFontCommand{\sf}{\normalfont\sffamily}{\mathsf}
+\DeclareOldFontCommand{\tt}{\normalfont\ttfamily}{\mathtt}
+\DeclareOldFontCommand{\bf}{\normalfont\bfseries}{\mathbf}
+\DeclareOldFontCommand{\it}{\normalfont\itshape}{\mathit}
+\DeclareOldFontCommand{\sl}{\normalfont\slshape}{\@nomath\sl}
+\DeclareOldFontCommand{\sc}{\normalfont\scshape}{\@nomath\sc}
+
+% Line spacing
+\setlength\lineskip{1\p@}
+\setlength\normallineskip{1\p@}
+\renewcommand\baselinestretch{}
+
+% Paragraph dimensions and inter-para spacing
+\setlength\parskip{0\p@}
+\setlength\parindent{3mm}
+
+% Set inter-para skips
+\setlength\smallskipamount{3\p@ \@plus 1\p@ \@minus 1\p@}
+\setlength\medskipamount{6\p@ \@plus 2\p@}
+\setlength\bigskipamount{12\p@ \@plus 4\p@ \@minus 4\p@}
+
+% Page break penalties
+\@lowpenalty   51
+\@medpenalty  151
+\@highpenalty 301
+
+% Disallow widows and orphans
+\clubpenalty 10000
+\widowpenalty 10000
+
+% Disable page breaks before equations, allow pagebreaks after
+% equations and discourage widow lines before equations.
+\displaywidowpenalty 100
+\predisplaypenalty   10000
+\postdisplaypenalty  2500
+
+% Allow breaking the page in the middle of a paragraph
+\interlinepenalty 0
+
+% Disallow breaking the page after a hyphenated line
+\brokenpenalty 10000
+
+% Hyphenation; don't split words into less than three characters
+\lefthyphenmin=3
+\righthyphenmin=3
+
+%
+% Set page layout dimensions
+%
+\setlength\headheight{16\p@}        % height of running head
+\setlength\topmargin{2.9pc}         % head margin
+\addtolength\topmargin{-1in}        % subtract out the 1 inch driver margin
+
+\setlength\topskip{10\p@}           % height of first line of text
+\setlength\headsep{19\p@}        %   space below running head --
+
+\setlength\footskip{34\p@}          % space above footer line
+\setlength\maxdepth{.5\topskip}     % pages can be short or deep by half a line?
+
+\setlength\textwidth{42pc}          % text measure excluding margins
+
+\setlength\textheight{58\baselineskip} % 54 lines on a full page,
+\addtolength\textheight{\topskip}      %   including the first
+                                       %   line on the page
+
+% Set the margins
+\setlength\marginparsep{3\p@}
+\setlength\marginparpush{3\p@}
+\setlength\marginparwidth{35\p@}
+
+\setlength\oddsidemargin{4.5pc}
+\addtolength\oddsidemargin{-1in}     % subtract out the 1 inch driver margin
+\setlength\@tempdima{\paperwidth}
+\addtolength\@tempdima{-\textwidth}
+\addtolength\@tempdima{-4.5pc}
+\setlength\evensidemargin{\@tempdima}
+\addtolength\evensidemargin{-1in}
+
+\setlength\columnsep{1.5pc}          % space between columns for double-column text
+\setlength\columnseprule{0\p@}       % width of rule between two columns
+
+% Footnotes
+\setlength\footnotesep{9\p@}      % space between footnotes
+% space between text and footnote
+\setlength{\skip\footins}{12\p@ \@plus 6\p@ \@minus 1\p@}
+
+% Float placement parameters
+
+% The total number of floats that can be allowed on a page.
+\setcounter{totalnumber}{10}
+% The maximum number of floats at the top and bottom of a page.
+\setcounter{topnumber}{5}
+\setcounter{bottomnumber}{5}
+% The maximum part of the top or bottom of a text page that can be
+% occupied by floats. This is set so that at least four lines of text 
+% fit on the page.
+\renewcommand\topfraction{.9}
+\renewcommand\bottomfraction{.9}
+% The minimum amount of a text page that must be occupied by text.
+% This should accomodate four lines of text.
+\renewcommand\textfraction{.06}
+% The minimum amount of a float page that must be occupied by floats.
+\renewcommand\floatpagefraction{.94}
+
+% The same parameters repeated for double column output
+\renewcommand\dbltopfraction{.9}
+\renewcommand\dblfloatpagefraction{.9}
+
+% Space between floats
+\setlength\floatsep    {12\p@ \@plus 2\p@ \@minus 2\p@}
+% Space between floats and text
+\setlength\textfloatsep{20\p@ \@plus 2\p@ \@minus 4\p@}
+% Space above and below an inline figure
+\setlength\intextsep   {18\p@ \@plus 2\p@ \@minus 2\p@}
+
+% For double column floats
+\setlength\dblfloatsep    {12\p@ \@plus 2\p@ \@minus 2\p@}
+\setlength\dbltextfloatsep{20\p@ \@plus 2\p@ \@minus 4\p@}
+
+% Space left at top, bottom and inbetween floats on a float page.
+\setlength\@fptop{0\p@}         % no space above float page figures
+\setlength\@fpsep{12\p@ \@plus 1fil}
+\setlength\@fpbot{0\p@}
+
+% The same for double column
+\setlength\@dblfptop{0\p@}
+\setlength\@dblfpsep{12\p@ \@plus 1fil}
+\setlength\@dblfpbot{0\p@}
+
+% Override settings in mathtime back to TeX defaults
+\DeclareMathSizes{5}     {5}   {5}  {5}
+\DeclareMathSizes{6}     {6}   {5}  {5}
+\DeclareMathSizes{7}     {7}   {5}  {5}
+\DeclareMathSizes{8}     {8}   {6}  {5}
+\DeclareMathSizes{9}     {9}   {6.5}  {5}
+\DeclareMathSizes{10}   {10}   {7.5}  {5}
+\DeclareMathSizes{12}   {12}   {9}  {7}
+
+% Page styles
+\def\ps at headings
+  {%
+  \def\@oddfoot{\vbox to 12.5\p@{\hbox{\rule{\textwidth}{0.5\p@}}\vss
+        \hbox to \textwidth{\hfill\helveticabold\small\thepage}%
+        }}%
+  \def\@evenfoot{\vbox to 12.5\p@{\rule{\textwidth}{0.5\p@}\vss
+        \hbox to \textwidth{\helveticabold\small\thepage\hfill}%
+        }}%
+  \def\@evenhead{\vbox{\hbox to \textwidth{\fontsize{8}{10}\selectfont
+        \helveticabold{\fontshape{it}\selectfont
+        \strut\leftmark}\hfill}\vspace{6.5\p@}\rule{\textwidth}{0.5\p@}}}%
+  \def\@oddhead{\vbox{\hbox to \textwidth{\hfill\fontsize{8}{10}\selectfont
+        \helveticabold{\fontshape{it}\selectfont\strut\rightmark}}%
+        \vspace{6.5\p@}\rule{\textwidth}{0.5\p@}}}%
+  \def\titlemark##1{\markboth{##1}{##1}}%
+  \def\authormark##1{\gdef\leftmark{##1}}%
+  }
+
+\def\ps at opening
+  {%
+  \def\@oddfoot{\vbox to 13\p@{\hbox{\rule{\textwidth}{1\p@}}\vss
+        \hbox to \textwidth{\helvetica
+        \fontsize{7}{9}\fontshape{n}\selectfont%
+          \hfill\small\helveticabold\thepage}%
+        }}%
+  \def\@evenfoot{\vbox to 13\p@{\rule{\textwidth}\vss
+        \hbox to \textwidth{\helvetica\thepage\hfill
+          \fontsize{7}{9}\fontshape{n}\selectfont}%
+        }}%
+  \let\@evenhead\relax
+  \let\@oddhead\relax}
+
+% Page range
+\newif\iflastpagegiven   \lastpagegivenfalse
+\newcommand\firstpage[1]{%
+  \gdef\@firstpage{#1}%
+  \ifnum\@firstpage>\c at page  
+    \setcounter{page}{#1}%
+    \ClassWarning{BIO}{Increasing pagenumber to \@firstpage}%
+  \else \ifnum\@firstpage<\c at page 
+    \ClassWarning{BIO}{Firstpage lower than pagenumber}\fi\fi
+    \xdef\@firstpage{\the\c at page}%                                 
+    }
+\def\@firstpage{1}
+\def\pagenumbering#1{%
+    \global\c at page \@ne
+    \gdef\thepage{\csname @#1\endcsname \c at page}%
+    \gdef\thefirstpage{% 
+            \csname @#1\endcsname \@firstpage}%
+    \gdef\thelastpage{%
+            \csname @#1\endcsname \@lastpage}%
+    }  
+
+\newcommand\lastpage[1]{\xdef\@lastpage{#1}%
+  \global\lastpagegiventrue}
+\def\@lastpage{0}               
+\def\setlastpage{\iflastpagegiven\else
+    \edef\@tempa{@lastpage@}%
+    \expandafter 
+    \ifx \csname \@tempa \endcsname \relax
+        \gdef\@lastpage{0}%
+    \else
+        \xdef\@lastpage{\@nameuse{@lastpage@}}%
+        \fi
+    \fi }
+\def\writelastpage{%                                    
+    \iflastpagegiven \else
+    \immediate\write\@auxout%
+    {\string\global\string\@namedef{@lastpage@}{\the\c at page}}%
+    \fi                                                       
+    }
+\def\thepagerange{%                                    
+  \ifnum\@lastpage =0 {\ \bf ???} \else
+  \ifnum\@lastpage = \@firstpage \ \thefirstpage\else
+  \thefirstpage--\thelastpage \fi\fi}
+
+\AtBeginDocument{\setlastpage
+     \pagenumbering{arabic}%
+   }
+\AtEndDocument{%
+    \writelastpage
+    \if at final
+    \clearemptydoublepage
+    \else
+    \clearpage
+    \fi}
+
+%
+% Sectional units
+%
+  
+% Counters
+\newcounter{section}
+\newcounter{subsection}[section]
+\newcounter{subsubsection}[subsection]
+\newcounter{paragraph}[subsubsection]
+\newcounter{subparagraph}[paragraph]
+\newcounter{figure}
+\newcounter{table}
+ 
+% Form of the numbers
+\newcommand\thepage{\arabic{page}}
+\renewcommand\thesection{\arabic{section}}
+\renewcommand\thesubsection{{\thesection.\arabic{subsection}}}
+\renewcommand\thesubsubsection{{\thesubsection.\arabic{subsubsection}}}
+\renewcommand\theparagraph{\thesubsubsection.\arabic{paragraph}}
+\renewcommand\thesubparagraph{\theparagraph.\arabic{subparagraph}}
+\renewcommand\theequation{\arabic{equation}}
+
+% Form of the words
+\newcommand\contentsname{Contents}
+\newcommand\listfigurename{List of Figures}
+\newcommand\listtablename{List of Tables}
+\newcommand\partname{Part}
+\newcommand\appendixname{Appendix}
+\newcommand\abstractname{Abstract}
+\newcommand\refname{References}
+\newcommand\bibname{References}
+\newcommand\indexname{Index}
+\newcommand\figurename{Fig.}
+\newcommand\tablename{Table}
+
+% Clearemptydoublepage should really clear the running heads too
+\newcommand{\clearemptydoublepage}{\newpage{\pagestyle{empty}\cleardoublepage}}
+
+% Frontmatter, mainmatter and backmatter
+
+\newif\if at mainmatter \@mainmattertrue
+
+\newcommand\frontmatter{%
+  \clearpage
+  \@mainmatterfalse
+  \pagenumbering{roman}}
+
+\newcommand\mainmatter{%
+  \clearpage
+  \@mainmattertrue
+  \pagenumbering{arabic}}
+
+\newcommand\backmatter{%
+  \clearpage
+  \@mainmatterfalse}
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% TITLE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\newlength{\dropfromtop}
+\setlength{\dropfromtop}{\z@}
+
+% Application Notes
+\newif\if at appnotes
+\newcommand{\application}{%
+%  \setlength{\dropfromtop}{-2.25pc}%
+  \global\@appnotestrue}
+
+\long\def\title{\@ifnextchar[{\short at title}{\@@title}}
+\def\short at title[#1]{\titlemark{#1}\@@@title}
+\def\@@title#1{\authormark{#1}\@@@title{#1}}
+\long\def\@@@title#1{\gdef\@title{#1}}
+
+\long\def\author{\@ifnextchar[{\short at uthor}{\@uthor}}
+\def\short at uthor[#1]{\authormark{#1}\@@author}
+\def\@uthor#1{\authormark{#1}\@@author{#1}}
+\long\def\@@author#1{\gdef\@author{#1}}
+
+\def\vol#1{\global\def\@vol{#1}}
+\def\issue#1{\global\def\@issue{#1}}
+\def\address#1{\global\def\@issue{#1}}
+\def\history#1{\global\def\@history{#1}}
+\def\editor#1{\global\def\@editor{#1}}
+\def\pubyear#1{\global\def\@pubyear{#1}}
+\def\copyrightyear#1{\global\def\@copyrightyear{#1}}
+\def\address#1{\global\def\@address{#1}}
+\def\DOI#1{\global\def\@DOI{#1}}
+
+\definecolor{gray}{cmyk}{0, 0, 0, 0.15}
+\newlength{\extraspace}
+\setlength{\extraspace}{\z@}
+
+\newcommand\maketitle{\par
+  \begingroup
+    \renewcommand\thefootnote{\@fnsymbol\c at footnote}%
+    \def\@makefnmark{\rlap{\@textsuperscript{\normalfont\@thefnmark}}}%
+    \long\def\@makefntext##1{\parindent 3mm\noindent
+%      \@textsuperscript{\normalfont\@thefnmark}\raggedright##1}%
+      \@textsuperscript{\normalfont\@thefnmark}##1}%
+    \if at twocolumn
+      \ifnum \col at number=\@ne
+        \@maketitle
+      \else
+        \twocolumn[\@maketitle]%
+      \fi
+    \else
+      \newpage
+      \global\@topnum\z@   % Prevents figures from going at top of page.
+      \@maketitle
+    \fi
+    \thispagestyle{opening}\@thanks
+  \endgroup
+  \setcounter{footnote}{0}%
+  \global\let\thanks\relax
+  \global\let\maketitle\relax
+  \global\let\@maketitle\relax
+  \global\let\@address\@empty
+  \global\let\@history\@empty
+  \global\let\@editor\@empty
+  \global\let\@thanks\@empty
+  \global\let\@author\@empty
+  \global\let\@date\@empty
+  \global\let\@title\@empty
+  \global\let\@pubyear\@empty
+  \global\let\address\relax
+  \global\let\history\relax
+  \global\let\editor\relax
+  \global\let\title\relax
+  \global\let\author\relax
+  \global\let\date\relax
+  \global\let\pubyear\relax
+  \global\let\@copyrightline\@empty
+  \global\let\and\relax
+  \@afterindentfalse\@afterheading
+}
+
+\newlength{\aboveskipchk}%for checking oddpage or evenpage top skip
+\setlength{\aboveskipchk}{\z@}%
+
+\def\@maketitle{%
+  \let\footnote\thanks
+  \clearemptydoublepage
+    \checkoddpage\ifcpoddpage\setlength{\aboveskipchk}{-3pc}\else\setlength{\aboveskipchk}{-5pc}\fi%for checking oddpage or evenpage top skip%%
+  \vspace*{\aboveskipchk}%
+  \vspace{\dropfromtop}%
+  \hbox to \textwidth{%
+  {\helvetica\itshape\bfseries\fontsize{19}{12}\selectfont {\color{gray}MANUSCRIPT}
+    \hfil
+    \if at appnotes APPLICATIONS NOTE\hfil\fi
+    }%
+\enskip  \parbox[b]{11.3pc}{%
+    \helvetica
+    \flushright\fontsize{8}{10}\fontshape{it}\selectfont
+        \hfill
+  }}
+  \rule{\textwidth}{1\p@}\par%
+  \helvetica
+  \hbox to \textwidth{%
+  \parbox[t]{36.5pc}{%
+    \vspace*{1sp}
+    {\helveticabold\fontsize{16}{21}\selectfont\raggedright \@title \par}%
+    \vspace{4.5\p@}
+    {\helvetica\fontsize{13}{15}\selectfont\raggedright \@author \par}%
+        \vspace{4\p@}
+    {\helvetica\fontsize{10}{12}\selectfont\raggedright \@address \par}%
+    \vspace{4\p@}
+    %{\helvetica\fontsize{8}{10}\selectfont\raggedright \@history \par}
+    %\vspace{24\p@}
+    %{\helvetica\fontsize{10}{12}\selectfont\raggedright \@editor \par}
+    %\vspace{20\p@}
+    }%
+  }
+  \vspace{14.5\p@}%
+  \rule{\textwidth}{1\p@}%
+  \vspace{12\p@ plus 6\p@ minus 6\p@}%
+  \vspace{\extraspace}
+  }
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%% Abstract %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\newcommand{\absection}[1]{%
+  \par\noindent{\bfseries #1}\space\ignorespaces}
+
+\newenvironment{abstract}{%
+  \begingroup
+  \let\section\absection
+  \fontfamily{\sfdefault}\fontsize{8}{11}\sffamily\selectfont
+  {\fontseries{b}\selectfont ABSTRACT}\par}
+{\endgroup\bigskip\@afterheading\@afterindentfalse\vskip 12pt plus 3pt minus 1pt}
+
+% Section macros
+
+% Lowest level heading that takes a number by default
+\setcounter{secnumdepth}{3}
+
+\renewcommand{\@seccntformat}[1]{\csname the#1\endcsname\quad}
+
+\def\section{%
+  \@startsection{section}{1}{\z@}
+  {-22\p@ plus -3\p@}{3\p@}
+  {\reset at font\raggedright\helveticabold\fontsize{10}{12}\selectfont\MakeUppercase}}
+  
+\def\subsection{%
+  \@startsection{subsection}{2}{\z@}
+  {-11\p@ plus -2\p@}{3\p@}
+  {\reset at font\raggedright\mathversion{bold}\fontseries{b}\fontsize{10}{12}\selectfont}}
+ 
+\def\subsubsection{%
+  \@startsection{subsubsection}{3}{\z@}
+  {-11\p@ plus -1\p@}{-1em}
+  {\reset at font\normalfont\normalsize\itshape}}
+
+\def\textcolon{\text{\rm :}}
+
+ \def\paragraph{%
+   \@startsection{paragraph}{4}{\z@}
+   {-6\p@}
+   {-.4em}
+   {\reset at font\itshape}}
+
+% ********************
+% Figures and tables *
+% ********************
+
+% Table and array parameters
+\setlength\arraycolsep{.5em}
+\setlength\tabcolsep{.5em}
+\setlength\arrayrulewidth{.5pt}
+\setlength\doublerulesep{2.5pt}
+\setlength\extrarowheight{\z@}
+\renewcommand\arraystretch{1}
+
+\newlength{\abovecaptionskip}
+\newlength{\belowcaptionskip}
+\setlength{\abovecaptionskip}{13pt}
+\setlength{\belowcaptionskip}{10.5pt}
+
+\long\def\@makecaption#1#2{\vspace{\abovecaptionskip}%
+  \begingroup
+  \footnotesize
+  \textbf{#1.}\enskip{#2}\par
+  \endgroup}
+
+\long\def\@tablecaption#1#2{%
+  \begingroup
+  \footnotesize
+  \textbf{#1.}\enskip{#2\strut\par}
+  \endgroup\vspace{\belowcaptionskip}}
+
+% Table rules
+\def\toprule{\noalign{\ifnum0=`}\fi\hrule \@height 0.5pt \hrule \@height 6pt \@width 0pt \futurelet
+   \@tempa\@xhline}
+\def\midrule{\noalign{\ifnum0=`}\fi \hrule \@height 6.75pt \@width 0pt \hrule \@height 0.5pt
+    \hrule \@height 6pt \@width 0pt \futurelet \@tempa\@xhline}
+\def\botrule{\noalign{\ifnum0=`}\fi \hrule \@height 5.75pt \@width 0pt \hrule \@height 0.5pt \futurelet
+   \@tempa\@xhline}
+\def\hrulefill{\leavevmode\leaders\hrule height .5pt\hfill\kern\z@}
+
+\def\thefigure{\@arabic\c at figure}
+\def\fps at figure{tbp}
+\def\ftype at figure{1}
+\def\ext at figure{lof}
+\def\fnum at figure{\figurename~\thefigure}
+\def\figure{\@float{figure}}
+\let\endfigure\end at float
+\@namedef{figure*}{\@dblfloat{figure}}
+\@namedef{endfigure*}{\end at dblfloat}
+\def\thetable{\@arabic\c at table}
+\def\fps at table{tbp}
+\def\ftype at table{2}
+\def\ext at table{lot}
+\def\fnum at table{Table~\thetable}
+\def\table{\let\@makecaption\@tablecaption\let\source\tablesource\@float{table}}
+\def\endtable{\end at float}
+\@namedef{table*}{\let\@makecaption\@tablecaption\@dblfloat{table}}
+\@namedef{endtable*}{\end at dblfloat}
+
+\newif\if at rotate \@rotatefalse
+\newif\if at rotatecenter \@rotatecenterfalse
+\def\rotatecenter{\global\@rotatecentertrue}
+\def\rotateendcenter{\global\@rotatecenterfalse}
+\def\rotate{\global\@rotatetrue}
+\def\endrotate{\global\@rotatefalse}
+\newdimen\rotdimen
+\def\rotstart#1{\special{ps: gsave currentpoint currentpoint translate
+    #1 neg exch neg exch translate}}
+\def\rotfinish{\special{ps: currentpoint grestore moveto}}
+\def\rotl#1{\rotdimen=\ht#1\advance\rotdimen by \dp#1
+    \hbox to \rotdimen{\vbox to\wd#1{\vskip \wd#1
+    \rotstart{270 rotate}\box #1\vss}\hss}\rotfinish}
+\def\rotr#1{\rotdimen=\ht #1\advance\rotdimen by \dp#1
+    \hbox to \rotdimen{\vbox to \wd#1{\vskip \wd#1
+    \rotstart{90 rotate}\box #1\vss}\hss}\rotfinish}
+
+\newdimen\tempdime
+\newbox\temptbox
+
+% From ifmtarg.sty
+% Copyright Peter Wilson and Donald Arseneau, 2000
+\begingroup
+\catcode`\Q=3
+\long\gdef\@ifmtarg#1{\@xifmtarg#1QQ\@secondoftwo\@firstoftwo\@nil}
+\long\gdef\@xifmtarg#1#2Q#3#4#5\@nil{#4}
+\long\gdef\@ifnotmtarg#1{\@xifmtarg#1QQ\@firstofone\@gobble\@nil}
+\endgroup
+
+\def\tablesize{\@setfontsize\tablesize{8\p@}{10\p@}}
+
+\newenvironment{processtable}[3]{\setbox\temptbox=\hbox{{\tablesize #2}}%
+\tempdime\wd\temptbox\@processtable{#1}{#2}{#3}{\tempdime}}
+{\relax}
+
+\newcommand{\@processtable}[4]{%
+\if at rotate
+\setbox4=\vbox to \hsize{\vss\hbox to \textheight{%
+\begin{minipage}{#4}%
+\@ifmtarg{#1}{}{\caption{#1}}{\tablesize #2}%
+\vskip7\p@\noindent
+\parbox{#4}{\fontsize{7}{9}\selectfont #3\par}%
+\end{minipage}}\vss}%
+\rotr{4}
+\else
+\hbox to \hsize{\hss\begin{minipage}[t]{#4}%
+\vskip2.9pt
+\@ifmtarg{#1}{}{\caption{#1}}{\tablesize #2}%
+\vskip6\p@\noindent
+\parbox{#4}{\fontsize{7}{9}\selectfont #3\par}%
+\end{minipage}\hss}\fi}%
+
+\newcolumntype{P}[1]{>{\raggedright\let\\\@arraycr\hangindent1em}p{#1}}
+
+% ******************************
+% List numbering and lettering *
+% ******************************
+\def\labelenumi{{\rm\arabic{enumi}.}}
+\def\theenumi{\arabic{enumi}}
+\def\labelenumii{{\rm\alph{enumii}.}}
+\def\theenumii{\alph{enumii}}
+\def\p at enumii{\theenumi}
+\def\labelenumiii{{\rm(\arabic{enumiii})}}
+\def\theenumiii{\roman{enumiii}}
+\def\p at enumiii{\theenumi(\theenumii)}
+\def\labelenumiv{{\rm(\arabic{enumiv})}}
+\def\theenumiv{\Alph{enumiv}}
+\def\p at enumiv{\p at enumiii\theenumiii}
+\def\labelitemi{{\small$\bullet$}}
+\def\labelitemii{{\small$\bullet$}}
+\def\labelitemiii{{\small$\bullet$}}
+\def\labelitemiv{{\small$\bullet$}}
+
+\def\@listI{\leftmargin\leftmargini \topsep\medskipamount}
+\let\@listi\@listI
+\@listi
+\def\@listii{\topsep\z@\leftmargin\leftmarginii}
+\def\@listiii{\leftmargin\leftmarginiii \topsep\z@}
+\def\@listiv{\leftmargin\leftmarginiv \topsep\z@}
+\def\@listv{\leftmargin\leftmarginv \topsep\z@}
+\def\@listvi{\leftmargin\leftmarginvi \topsep\z@}
+
+\setlength{\leftmargini}{3mm}
+\setlength{\leftmarginii}{\z@}
+\setlength{\leftmarginiii}{\z@}
+\setlength{\leftmarginiv}{\z@}
+
+% Changes to the list parameters for enumerate
+\def\enumargs{%
+   \partopsep     \z@
+   \itemsep       3\p@
+   \parsep        \z@
+   \labelsep      0.5em
+   \listparindent \parindent
+   \itemindent    \z@
+   \topsep 11\p@
+}
+
+\def\enumerate{%
+    \@ifnextchar[{\@numerate}{\@numerate[0]}}
+
+\def\@numerate[#1]{%
+     \ifnum \@enumdepth >3 \@toodeep\else
+     \advance\@enumdepth \@ne
+     \edef\@enumctr{enum\romannumeral\the\@enumdepth}
+     \list{\csname label\@enumctr\endcsname}{%
+       \enumargs
+       \setlength{\leftmargin}{\csname leftmargin\romannumeral\the\@enumdepth\endcsname}
+       \usecounter{\@enumctr}
+       \settowidth\labelwidth{#1}
+       \addtolength{\leftmargin}{\labelwidth}
+       \addtolength{\leftmargin}{\labelsep}
+       \def\makelabel##1{\hss \llap{##1}}}%
+     \fi
+   }
+\let\endenumerate\endlist
+
+% Changes to the list parameters for itemize
+\def\itemargs{%
+   \partopsep     \z@
+   \itemsep       3\p@
+   \parsep        \z@
+   \labelsep      0.5em
+   \rightmargin   \z@
+   \listparindent \parindent
+   \itemindent    \z@
+   \topsep11\p@
+}
+
+\def\itemize{%
+    \@ifnextchar[{\@itemize}{\@itemize[$\bullet$]}}
+
+\def\@itemize[#1]{%
+     \ifnum \@itemdepth >3 \@toodeep\else
+     \advance\@itemdepth \@ne
+     \edef\@itemctr{item\romannumeral\the\@itemdepth}
+     \list{\csname label\@itemctr\endcsname}{%
+       \itemargs
+       \setlength{\leftmargin}{\csname leftmargin\romannumeral\the\@itemdepth\endcsname}
+       \settowidth\labelwidth{#1}
+       \addtolength{\leftmargin}{\labelwidth}
+       \addtolength{\leftmargin}{\labelsep}
+       \def\makelabel##1{\hss \llap{##1}}}%
+     \fi
+   }
+\let\enditemize\endlist
+
+\newenvironment{unlist}{%
+  \begin{list}{}%
+    {\setlength{\labelwidth}{\z@}%
+     \setlength{\labelsep}{\z@}%
+     \setlength{\topsep}{\medskipamount}%
+     \setlength{\itemsep}{3\p@}%
+     \setlength{\leftmargin}{2em}%
+     \setlength{\itemindent}{-2em}}}
+{\end{list}}
+
+      
+% ***********************
+% Quotes and Quotations *
+% ***********************
+\def\quotation{\par\begin{list}{}{
+      \setlength{\topsep}{\medskipamount}
+      \setlength{\leftmargin}{2em}%
+      \setlength{\rightmargin}{\z@}%
+      \setlength\labelwidth{0pt}%
+      \setlength\labelsep{0pt}%
+      \listparindent\parindent}%
+  \item[]}
+\def\endquotation{\end{list}}
+\let\quote\quotation
+\let\endquote\endquotation
+
+\skip\@mpfootins = \skip\footins
+\fboxsep=6\p@
+\fboxrule=1\p@
+
+% *******************
+% Table of contents *
+% *******************
+\newcommand\@pnumwidth{4em}
+\newcommand\@tocrmarg{2.55em plus 1fil}
+\newcommand\@dotsep{1000}
+\setcounter{tocdepth}{4}
+
+\def\numberline#1{\hbox to \@tempdima{{#1}}}
+
+\def\@authortocline#1#2#3#4#5{%
+  \vskip 1.5\p@
+  \ifnum #1>\c at tocdepth \else
+    {\leftskip #2\relax \rightskip \@tocrmarg \parfillskip -\rightskip
+     \parindent #2\relax\@afterindenttrue
+     \interlinepenalty\@M
+     \leavevmode
+     \@tempdima #3\relax
+     \advance\leftskip \@tempdima \null\nobreak\hskip -\leftskip
+     {\itshape #4}\nobreak
+     \leaders\hbox{$\m at th 
+        \mkern \@dotsep mu\hbox{.}\mkern \@dotsep
+        mu$}\hfill
+     \nobreak
+     \hb at xt@\@pnumwidth{\hfil}%
+     \par}%
+  \fi}
+
+\newcommand*\l at author{\@authortocline{2}{0pt}{30pt}}
+\newcommand*\l at section{\@dottedtocline{3}{11pt}{20pt}}
+\newcommand*\l at subsection{\@dottedtocline{4}{31pt}{29pt}}
+\newcommand*\l at subsubsection[2]{}
+
+
+
+% ***********
+% Footnotes *
+% ***********
+
+\def\footnoterule{\noindent\rule{\columnwidth}{0.5pt}}
+\def\@makefnmark{\@textsuperscript{\normalfont\@thefnmark}}%
+\newcommand\@makefntext[1]{\noindent{\@makefnmark}\enskip#1}
+
+% ***********
+% References *
+% ***********
+
+\providecommand{\newblock}{}
+\newenvironment{thebibliography}{%
+  \section{\bibname}%
+  \begingroup
+  \small
+  \begin{list}{}{%
+      \setlength{\topsep}{\z@}%
+      \setlength{\labelsep}{\z@}%
+      \settowidth{\labelwidth}{\z@}%
+      \setlength{\leftmargin}{4mm}%
+      \setlength{\itemindent}{-4mm}}\small}
+{\end{list}\endgroup}
+
+\RequirePackage{natbib}
+
+% **********
+% Appendix *
+% **********
+\newif\ifappend % Are we in the Appendix?
+\def\appendix{\par
+        \setcounter{section}{0}
+        \setcounter{subsection}{0}
+        \appendtrue
+}
+
+%Math parameters
+
+\setlength{\jot}{5\p@} 
+\mathchardef\@m=1500   % adapted value
+
+\def\frenchspacing{\sfcode`\.\@m \sfcode`\?\@m \sfcode`\!\@m
+  \sfcode`\:\@m \sfcode`\;\@m \sfcode`\,\@m}
+
+% Theorems
+\def\th at plain{%
+%%  \let\thm at indent\noindent % no indent
+\thm at headfont{\quad\scshape}% heading font is bold
+\thm at notefont{\upshape\mdseries}% same as heading font
+\thm at headpunct{.}% no period after heading
+\thm at headsep 5\p@ plus\p@ minus\p@\relax
+%%  \let\thm at swap\@gobble
+%%  \thm at preskip\topsep
+%%  \thm at postskip\theorempreskipamount
+\itshape % body font
+}
+
+\vbadness=9999
+\tolerance=9999
+\doublehyphendemerits=10000
+\doublehyphendemerits 640000   % corresponds to badness 800
+\finalhyphendemerits  1000000  % corresponds to badness 1000
+
+\flushbottom
+\frenchspacing
+\ps at headings
+\twocolumn
+
+% Screen PDF compatability
+\newcommand{\medline}[1]{%
+  \unskip\unskip\ignorespaces}
+
+
+%%%%for smaller size text
+\newenvironment{methods}{%
+  \begingroup
+\def\section{%
+  \@startsection{section}{1}{\z@}
+  {-24\p@ plus -3\p@}{4\p@}
+  {\reset at font\raggedright\helveticabold\fontsize{10}{12}\selectfont\MakeUppercase}}
+ \def\subsection{%
+  \@startsection{subsection}{2}{\z@}
+  {-5\p@ plus -2\p@}{4\p@}
+  {\reset at font\raggedright\mathversion{bold}\fontseries{b}\fontsize{10}{12}\selectfont}}
+ \def\subsubsection{%
+  \@startsection{subsubsection}{3}{\z@}
+  {-6\p@ plus -1\p@}{-1em}
+  {\reset at font\normalfont\normalsize\itshape}}
+\footnotesize
+  \par}
+{\par\endgroup\bigskip\@afterheading\@afterindentfalse}
+
+
+
+\graphicspath{{g:/artwork/oup/bioinfo/}}
+
+\language=2
+
+\hyphenation{Figure Table Figures Tables}
+
+\newcommand{\href}[2]{#2}
+
+\renewenvironment{proof}[1][\proofname]{\par
+  \normalfont \topsep6\p@\@plus6\p@\relax
+  \labelsep 0.5em
+  \trivlist
+  \item[\hskip\labelsep\hskip1em\textsc{#1}.]\ignorespaces
+}{\endtrivlist\@endpefalse}
+
+%%Different Bonds
+
+\def\sbond{\ensuremath{\raise.25ex\hbox{${-}\!\!\!\!{-}$}}\kern -.9pt}
+\def\dbond{\ensuremath{\raise.25ex\hbox{=$\!$=}}}
+\def\tbond{\ensuremath{\raise.20ex\hbox{${\equiv}\!\!\!{\equiv}$}}}
+
+% Author queries
+%\fboxsep=4\p@
+%\fboxrule=0.5\p@
+\newcommand{\query}[2][0pt]{}%
+%  \marginpar{\vspace*{#1}%
+%    {\parbox{\marginparwidth}{%
+%     \raggedright\fontsize{6}{8}\selectfont
+%         #2}}}}
+
+\renewcommand{\dag}{{\mathversion{normal}$^{\dagger}$}}
+
+\endinput
diff --git a/tex/ce.pdf b/tex/ce.pdf
new file mode 100644
index 0000000..6ac7074
Binary files /dev/null and b/tex/ce.pdf differ
diff --git a/tex/diagrams.graffle b/tex/diagrams.graffle
new file mode 100644
index 0000000..7112c24
--- /dev/null
+++ b/tex/diagrams.graffle
@@ -0,0 +1,908 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>ActiveLayerIndex</key>
+	<integer>0</integer>
+	<key>ApplicationVersion</key>
+	<array>
+		<string>com.omnigroup.OmniGraffle</string>
+		<string>139.18.0.187838</string>
+	</array>
+	<key>AutoAdjust</key>
+	<true/>
+	<key>BackgroundGraphic</key>
+	<dict>
+		<key>Bounds</key>
+		<string>{{0, 0}, {576, 733}}</string>
+		<key>Class</key>
+		<string>SolidGraphic</string>
+		<key>ID</key>
+		<integer>2</integer>
+		<key>Style</key>
+		<dict>
+			<key>shadow</key>
+			<dict>
+				<key>Draws</key>
+				<string>NO</string>
+			</dict>
+			<key>stroke</key>
+			<dict>
+				<key>Draws</key>
+				<string>NO</string>
+			</dict>
+		</dict>
+	</dict>
+	<key>BaseZoom</key>
+	<integer>0</integer>
+	<key>CanvasOrigin</key>
+	<string>{0, 0}</string>
+	<key>ColumnAlign</key>
+	<integer>1</integer>
+	<key>ColumnSpacing</key>
+	<real>36</real>
+	<key>CreationDate</key>
+	<string>2015-11-16 14:46:34 +0000</string>
+	<key>Creator</key>
+	<string>Heng Li</string>
+	<key>DisplayScale</key>
+	<string>1 0/72 in = 1 0/72 in</string>
+	<key>GraphDocumentVersion</key>
+	<integer>8</integer>
+	<key>GraphicsList</key>
+	<array>
+		<dict>
+			<key>Bounds</key>
+			<string>{{161, 173.5}, {9, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>ID</key>
+			<integer>23</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\i\fs24 \cf0 w}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{100, 137.5}, {6, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>ID</key>
+			<integer>22</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\i\fs24 \cf0 v}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{464, 185}, {18, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>FontInfo</key>
+			<dict>
+				<key>Font</key>
+				<string>Helvetica</string>
+				<key>Size</key>
+				<real>12</real>
+			</dict>
+			<key>ID</key>
+			<integer>21</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\i\b\fs24 \cf0 l
+\i0 [2]}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{399, 125}, {18, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>FontInfo</key>
+			<dict>
+				<key>Font</key>
+				<string>Helvetica</string>
+				<key>Size</key>
+				<real>12</real>
+			</dict>
+			<key>ID</key>
+			<integer>20</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\i\b\fs24 \cf0 l
+\i0 [1]}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{331, 185}, {22, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>FontInfo</key>
+			<dict>
+				<key>Font</key>
+				<string>Helvetica</string>
+				<key>Size</key>
+				<real>12</real>
+			</dict>
+			<key>ID</key>
+			<integer>19</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\i\b\fs24 \cf0 e
+\i0 [2]}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{209, 185}, {22, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>FontInfo</key>
+			<dict>
+				<key>Font</key>
+				<string>Helvetica</string>
+				<key>Size</key>
+				<real>12</real>
+			</dict>
+			<key>ID</key>
+			<integer>18</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\i\b\fs24 \cf0 b
+\i0 [2]}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{330.99999359250069, 124.99999651312828}, {22, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>FontInfo</key>
+			<dict>
+				<key>Font</key>
+				<string>Helvetica</string>
+				<key>Size</key>
+				<real>12</real>
+			</dict>
+			<key>ID</key>
+			<integer>17</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\i\b\fs24 \cf0 e
+\i0 [1]}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{209, 125}, {22, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>FontInfo</key>
+			<dict>
+				<key>Font</key>
+				<string>Helvetica</string>
+				<key>Size</key>
+				<real>12</real>
+			</dict>
+			<key>ID</key>
+			<integer>16</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\i\b\fs24 \cf0 b
+\i0 [1]}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Class</key>
+			<string>LineGraphic</string>
+			<key>ID</key>
+			<integer>15</integer>
+			<key>Points</key>
+			<array>
+				<string>{317, 117}</string>
+				<string>{378, 162}</string>
+			</array>
+			<key>Style</key>
+			<dict>
+				<key>stroke</key>
+				<dict>
+					<key>HeadArrow</key>
+					<string>FilledArrow</string>
+					<key>Legacy</key>
+					<true/>
+					<key>LineType</key>
+					<integer>1</integer>
+					<key>Pattern</key>
+					<integer>1</integer>
+					<key>TailArrow</key>
+					<string>0</string>
+				</dict>
+			</dict>
+		</dict>
+		<dict>
+			<key>Class</key>
+			<string>LineGraphic</string>
+			<key>ID</key>
+			<integer>13</integer>
+			<key>Points</key>
+			<array>
+				<string>{223, 117}</string>
+				<string>{189, 162}</string>
+			</array>
+			<key>Style</key>
+			<dict>
+				<key>stroke</key>
+				<dict>
+					<key>HeadArrow</key>
+					<string>FilledArrow</string>
+					<key>Legacy</key>
+					<true/>
+					<key>LineType</key>
+					<integer>1</integer>
+					<key>Pattern</key>
+					<integer>1</integer>
+					<key>TailArrow</key>
+					<string>0</string>
+				</dict>
+			</dict>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{225, 104}, {90, 14}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>FitText</key>
+			<string>YES</string>
+			<key>Flow</key>
+			<string>Resize</string>
+			<key>ID</key>
+			<integer>11</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Pad</key>
+				<integer>0</integer>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\fs24 \cf0 Overhang region}</string>
+				<key>VerticalPad</key>
+				<integer>0</integer>
+			</dict>
+			<key>Wrap</key>
+			<string>NO</string>
+		</dict>
+		<dict>
+			<key>Class</key>
+			<string>LineGraphic</string>
+			<key>ID</key>
+			<integer>6</integer>
+			<key>Points</key>
+			<array>
+				<string>{180, 180}</string>
+				<string>{468, 180}</string>
+			</array>
+			<key>Style</key>
+			<dict>
+				<key>stroke</key>
+				<dict>
+					<key>HeadArrow</key>
+					<string>0</string>
+					<key>Legacy</key>
+					<true/>
+					<key>LineType</key>
+					<integer>1</integer>
+					<key>TailArrow</key>
+					<string>FilledArrow</string>
+					<key>Width</key>
+					<real>3</real>
+				</dict>
+			</dict>
+		</dict>
+		<dict>
+			<key>Class</key>
+			<string>LineGraphic</string>
+			<key>ID</key>
+			<integer>5</integer>
+			<key>Points</key>
+			<array>
+				<string>{117, 144}</string>
+				<string>{405, 144}</string>
+			</array>
+			<key>Style</key>
+			<dict>
+				<key>stroke</key>
+				<dict>
+					<key>HeadArrow</key>
+					<string>FilledArrow</string>
+					<key>Legacy</key>
+					<true/>
+					<key>LineType</key>
+					<integer>1</integer>
+					<key>TailArrow</key>
+					<string>0</string>
+					<key>Width</key>
+					<real>3</real>
+				</dict>
+			</dict>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{216, 144}, {126, 36}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>ID</key>
+			<integer>4</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Color</key>
+					<dict>
+						<key>b</key>
+						<string>0.6</string>
+						<key>g</key>
+						<string>0.6</string>
+						<key>r</key>
+						<string>0.6</string>
+					</dict>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+			</dict>
+			<key>Text</key>
+			<dict>
+				<key>Text</key>
+				<string>{\rtf1\ansi\ansicpg1252\cocoartf1265\cocoasubrtf210
+\cocoascreenfonts1{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
+{\colortbl;\red255\green255\blue255;}
+\pard\tx560\tx1120\tx1680\tx2240\tx2800\tx3360\tx3920\tx4480\tx5040\tx5600\tx6160\tx6720\pardirnatural\qc
+
+\f0\fs24 \cf0 mapped region}</string>
+			</dict>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{342, 144}, {63, 36}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>ID</key>
+			<integer>10</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Color</key>
+					<dict>
+						<key>b</key>
+						<string>0.901961</string>
+						<key>g</key>
+						<string>0.901961</string>
+						<key>r</key>
+						<string>0.901961</string>
+					</dict>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Pattern</key>
+					<integer>1</integer>
+				</dict>
+			</dict>
+		</dict>
+		<dict>
+			<key>Bounds</key>
+			<string>{{180, 144}, {36, 36}}</string>
+			<key>Class</key>
+			<string>ShapedGraphic</string>
+			<key>ID</key>
+			<integer>8</integer>
+			<key>Shape</key>
+			<string>Rectangle</string>
+			<key>Style</key>
+			<dict>
+				<key>fill</key>
+				<dict>
+					<key>Color</key>
+					<dict>
+						<key>b</key>
+						<string>0.901961</string>
+						<key>g</key>
+						<string>0.901961</string>
+						<key>r</key>
+						<string>0.901961</string>
+					</dict>
+				</dict>
+				<key>shadow</key>
+				<dict>
+					<key>Draws</key>
+					<string>NO</string>
+				</dict>
+				<key>stroke</key>
+				<dict>
+					<key>Pattern</key>
+					<integer>1</integer>
+				</dict>
+			</dict>
+		</dict>
+	</array>
+	<key>GridInfo</key>
+	<dict/>
+	<key>GuidesLocked</key>
+	<string>NO</string>
+	<key>GuidesVisible</key>
+	<string>YES</string>
+	<key>HPages</key>
+	<integer>1</integer>
+	<key>ImageCounter</key>
+	<integer>1</integer>
+	<key>KeepToScale</key>
+	<false/>
+	<key>Layers</key>
+	<array>
+		<dict>
+			<key>Lock</key>
+			<string>NO</string>
+			<key>Name</key>
+			<string>Layer 1</string>
+			<key>Print</key>
+			<string>YES</string>
+			<key>View</key>
+			<string>YES</string>
+		</dict>
+	</array>
+	<key>LayoutInfo</key>
+	<dict>
+		<key>Animate</key>
+		<string>NO</string>
+		<key>circoMinDist</key>
+		<real>18</real>
+		<key>circoSeparation</key>
+		<real>0.0</real>
+		<key>layoutEngine</key>
+		<string>dot</string>
+		<key>neatoSeparation</key>
+		<real>0.0</real>
+		<key>twopiSeparation</key>
+		<real>0.0</real>
+	</dict>
+	<key>LinksVisible</key>
+	<string>NO</string>
+	<key>MagnetsVisible</key>
+	<string>NO</string>
+	<key>MasterSheets</key>
+	<array/>
+	<key>ModificationDate</key>
+	<string>2015-11-16 16:18:50 +0000</string>
+	<key>Modifier</key>
+	<string>Heng Li</string>
+	<key>NotesVisible</key>
+	<string>NO</string>
+	<key>Orientation</key>
+	<integer>2</integer>
+	<key>OriginVisible</key>
+	<string>NO</string>
+	<key>PageBreaks</key>
+	<string>YES</string>
+	<key>PrintInfo</key>
+	<dict>
+		<key>NSBottomMargin</key>
+		<array>
+			<string>float</string>
+			<string>41</string>
+		</array>
+		<key>NSHorizonalPagination</key>
+		<array>
+			<string>coded</string>
+			<string>BAtzdHJlYW10eXBlZIHoA4QBQISEhAhOU051bWJlcgCEhAdOU1ZhbHVlAISECE5TT2JqZWN0AIWEASqEhAFxlwCG</string>
+		</array>
+		<key>NSLeftMargin</key>
+		<array>
+			<string>float</string>
+			<string>18</string>
+		</array>
+		<key>NSPaperSize</key>
+		<array>
+			<string>size</string>
+			<string>{612, 792}</string>
+		</array>
+		<key>NSPrintReverseOrientation</key>
+		<array>
+			<string>int</string>
+			<string>0</string>
+		</array>
+		<key>NSRightMargin</key>
+		<array>
+			<string>float</string>
+			<string>18</string>
+		</array>
+		<key>NSTopMargin</key>
+		<array>
+			<string>float</string>
+			<string>18</string>
+		</array>
+	</dict>
+	<key>PrintOnePage</key>
+	<false/>
+	<key>ReadOnly</key>
+	<string>NO</string>
+	<key>RowAlign</key>
+	<integer>1</integer>
+	<key>RowSpacing</key>
+	<real>36</real>
+	<key>SheetTitle</key>
+	<string>Canvas 1</string>
+	<key>SmartAlignmentGuidesActive</key>
+	<string>YES</string>
+	<key>SmartDistanceGuidesActive</key>
+	<string>YES</string>
+	<key>UniqueID</key>
+	<integer>1</integer>
+	<key>UseEntirePage</key>
+	<false/>
+	<key>VPages</key>
+	<integer>1</integer>
+	<key>WindowInfo</key>
+	<dict>
+		<key>CurrentSheet</key>
+		<integer>0</integer>
+		<key>ExpandedCanvases</key>
+		<array>
+			<dict>
+				<key>name</key>
+				<string>Canvas 1</string>
+			</dict>
+		</array>
+		<key>Frame</key>
+		<string>{{367, 6}, {710, 872}}</string>
+		<key>ListView</key>
+		<true/>
+		<key>OutlineWidth</key>
+		<integer>142</integer>
+		<key>RightSidebar</key>
+		<false/>
+		<key>ShowRuler</key>
+		<true/>
+		<key>Sidebar</key>
+		<true/>
+		<key>SidebarWidth</key>
+		<integer>120</integer>
+		<key>VisibleRegion</key>
+		<string>{{0, 0}, {575, 733}}</string>
+		<key>Zoom</key>
+		<real>1</real>
+		<key>ZoomValues</key>
+		<array>
+			<array>
+				<string>Canvas 1</string>
+				<real>1</real>
+				<real>1</real>
+			</array>
+		</array>
+	</dict>
+</dict>
+</plist>
diff --git a/tex/miniasm.bib b/tex/miniasm.bib
new file mode 100644
index 0000000..f0f4f91
--- /dev/null
+++ b/tex/miniasm.bib
@@ -0,0 +1,236 @@
+ at inproceedings{DBLP:conf/wabi/Myers14,
+	Author = {Gene Myers},
+	Booktitle = {Algorithms in Bioinformatics - 14th International Workshop, {WABI} 2014, Wroclaw, Poland, September 8-10, 2014. Proceedings},
+	Crossref = {DBLP:conf/wabi/2014},
+	Pages = {52--67},
+	Title = {Efficient Local Alignment Discovery amongst Noisy Long Reads},
+	Year = {2014}}
+
+ at proceedings{DBLP:conf/wabi/2014,
+	Editor = {Daniel G. Brown and Burkhard Morgenstern},
+	Publisher = {Springer},
+	Title = {Algorithms in Bioinformatics - 14th International Workshop, {WABI} 2014, Wroclaw, Poland, September 8-10, 2014. Proceedings},
+	Volume = {8701},
+	Year = {2014}}
+
+ at article{Roberts:2004fv,
+	Author = {Roberts, Michael and others},
+	Journal = {Bioinformatics},
+	Pages = {3363-9},
+	Title = {Reducing storage requirements for biological sequence comparison},
+	Volume = {20},
+	Year = {2004}}
+
+ at article{Myers:2005bh,
+	Author = {Myers, Eugene W},
+	Journal = {Bioinformatics},
+	Pages = {ii79-85},
+	Title = {The fragment assembly string graph},
+	Volume = {21 Suppl 2},
+	Year = {2005}}
+
+ at article{Zerbino:2008uq,
+	Author = {Zerbino, Daniel R and Birney, Ewan},
+	Journal = {Genome Res},
+	Pages = {821-9},
+	Title = {Velvet: algorithms for de novo short read assembly using de {Bruijn} graphs},
+	Volume = {18},
+	Year = {2008}}
+
+ at article{Kahn62aa,
+	Author = {Kahn, Arthur B},
+	Journal = {Communications of the ACM},
+	Pages = {558--562},
+	Title = {Topological sorting of large networks},
+	Volume = {5},
+	Year = {1962}}
+
+ at article{Li:2012fk,
+	Author = {Li, Heng},
+	Journal = {Bioinformatics},
+	Pages = {1838-44},
+	Title = {Exploring single-sample {SNP} and {INDEL} calling with whole-genome de novo assembly},
+	Volume = {28},
+	Year = {2012}}
+
+ at article{Berlin:2015xy,
+	Author = {Berlin, Konstantin and others},
+	Journal = {Nat Biotechnol},
+	Pages = {623-30},
+	Title = {Assembling large genomes with single-molecule sequencing and locality-sensitive hashing},
+	Volume = {33},
+	Year = {2015}}
+
+ at article{sovic:2015aa,
+	Author = {Ivan Sovic and others},
+	Journal = {bioRxiv},
+	Title = {Fast and sensitive mapping of error-prone nanopore sequencing reads with GraphMap},
+	Year = {2015}}
+
+ at article{TCS15,
+  author    = {Ljiljana  Brankovic and others},
+  title     = {Linear-Time Superbubble Identification Algorithm for Genome Assembly},
+  journal   = {Theoretical Computer Science},
+  year      = {2015},
+  doi       = {10.1016/j.tcs.2015.10.021}
+}
+
+ at inproceedings{DBLP:conf/wabi/OnoderaSS13,
+	Author = {Taku Onodera and Kunihiko Sadakane and Tetsuo Shibuya},
+	Booktitle = {{WABI}},
+	Crossref = {DBLP:conf/wabi/2013},
+	Pages = {338--348},
+	Title = {Detecting Superbubbles in Assembly Graphs},
+	Year = {2013}}
+
+ at proceedings{DBLP:conf/wabi/2013,
+  editor    = {Aaron E. Darling and
+               Jens Stoye},
+  title     = {Algorithms in Bioinformatics - 13th International Workshop, {WABI}
+               2013, Sophia Antipolis, France, September 2-4, 2013. Proceedings},
+  series    = {Lecture Notes in Computer Science},
+  volume    = {8126},
+  publisher = {Springer},
+  year      = {2013},
+}
+
+ at article{Wick:2015qf,
+	Author = {Wick, Ryan R and others},
+	Journal = {Bioinformatics},
+	Pages = {3350-2},
+	Title = {Bandage: interactive visualization of de novo genome assemblies},
+	Volume = {31},
+	Year = {2015}}
+
+ at article{Li:2009ys,
+	Author = {Li, Heng and others},
+	Journal = {Bioinformatics},
+	Pages = {2078-9},
+	Title = {The Sequence Alignment/Map format and SAMtools},
+	Volume = {25},
+	Year = {2009}}
+
+ at article{Alkan:2011zr,
+	Author = {Alkan, Can and others},
+	Journal = {Nat Methods},
+	Pages = {61-5},
+	Title = {Limitations of next-generation genome sequence assembly},
+	Volume = {8},
+	Year = {2011}}
+
+ at article{Chaisson:2015wj,
+	Author = {Chaisson, Mark J P and others},
+	Journal = {Nat Rev Genet},
+	Pages = {627-40},
+	Title = {Genetic variation and the de novo assembly of human genomes},
+	Volume = {16},
+	Year = {2015}}
+
+ at article{Bashir:2012gb,
+	Author = {Bashir, Ali and others},
+	Journal = {Nat Biotechnol},
+	Pages = {701-7},
+	Title = {A hybrid approach for the automated finishing of bacterial genomes},
+	Volume = {30},
+	Year = {2012}}
+
+ at article{Ribeiro:2012bx,
+	Author = {Ribeiro, Filipe J and others},
+	Journal = {Genome Res},
+	Pages = {2270-7},
+	Title = {Finished bacterial genomes from shotgun sequence data},
+	Volume = {22},
+	Year = {2012}}
+
+ at article{Koren:2012pt,
+	Author = {Koren, Sergey and others},
+	Journal = {Nat Biotechnol},
+	Pages = {693-700},
+	Title = {Hybrid error correction and de novo assembly of single-molecule sequencing reads},
+	Volume = {30},
+	Year = {2012}}
+
+ at article{Chin:2013qr,
+	Author = {Chin, Chen-Shan and others},
+	Journal = {Nat Methods},
+	Pages = {563-9},
+	Title = {Nonhybrid, finished microbial genome assemblies from long-read SMRT sequencing data},
+	Volume = {10},
+	Year = {2013}}
+
+ at article{Koren:2013fc,
+	Author = {Koren, Sergey and others},
+	Journal = {Genome Biol},
+	Pages = {R101},
+	Title = {Reducing assembly complexity of microbial genomes with single-molecule sequencing},
+	Volume = {14},
+	Year = {2013}}
+
+ at article{Quick:2014uf,
+	Author = {Quick, Joshua and others},
+	Journal = {Gigascience},
+	Pages = {22},
+	Title = {A reference bacterial genome dataset generated on the MinION{\texttrademark} portable single-molecule nanopore sequencer},
+	Volume = {3},
+	Year = {2014}}
+
+ at article{Loman:2015xu,
+	Author = {Loman, Nicholas J and others},
+	Journal = {Nat Methods},
+	Pages = {733-5},
+	Title = {A complete bacterial genome assembled de novo using only nanopore sequencing data},
+	Volume = {12},
+	Year = {2015}}
+
+ at article{Chaisson:2012aa,
+	Author = {Chaisson, Mark J and Tesler, Glenn},
+	Journal = {BMC Bioinformatics},
+	Pages = {238},
+	Title = {Mapping single molecule sequencing reads using basic local alignment with successive refinement (BLASR): application and theory},
+	Volume = {13},
+	Year = {2012}}
+
+ at article{Myers:2000kl,
+	Author = {Myers, E W and others},
+	Journal = {Science},
+	Pages = {2196-204},
+	Title = {A whole-genome assembly of Drosophila},
+	Volume = {287},
+	Year = {2000}}
+
+ at inproceedings{Broder:1997aa,
+	Author = {Broder, Andrei Z},
+	Booktitle = {Compression and Complexity of Sequences},
+	Pages = {21-29},
+	Title = {On the resemblance and containment of documents},
+	Year = {1997}}
+
+ at article{Altschul:1997vn,
+	Author = {Altschul, S F and others},
+	Journal = {Nucleic Acids Res},
+	Pages = {3389-402},
+	Title = {Gapped {BLAST} and {PSI-BLAST}: a new generation of protein database search programs},
+	Volume = {25},
+	Year = {1997}}
+
+ at article{Kent:2002jk,
+	Author = {Kent, W James},
+	Journal = {Genome Res},
+	Pages = {656-64},
+	Title = {{BLAT}--the {BLAST}-like alignment tool},
+	Volume = {12},
+	Year = {2002}}
+
+ at phdthesis{harris:2007aa,
+	Author = {Harris, R.S.},
+	School = {The Pennsylvania State University},
+	Title = {Improved pairwise alignment of genomic DNA},
+	Year = {2007}}
+
+ at article{Kiebasa:2011aa,
+	Author = {Kie{\l}basa, Szymon M and others},
+	Journal = {Genome Res},
+	Pages = {487-93},
+	Title = {Adaptive seeds tame genomic sequence comparison},
+	Volume = {21},
+	Year = {2011}}
diff --git a/tex/miniasm.tex b/tex/miniasm.tex
new file mode 100644
index 0000000..6906ac9
--- /dev/null
+++ b/tex/miniasm.tex
@@ -0,0 +1,807 @@
+\documentclass{bioinfo}
+\copyrightyear{2015}
+\pubyear{2015}
+
+\usepackage{amsmath}
+\usepackage[ruled,vlined]{algorithm2e}
+\newcommand\mycommfont[1]{\footnotesize\rmfamily{\it #1}}
+\SetCommentSty{mycommfont}
+\SetKwComment{Comment}{$\triangleright$\ }{}
+
+\usepackage{natbib}
+\bibliographystyle{apalike}
+
+\begin{document}
+\firstpage{1}
+
+\title[Long-read mapping and assembly]{Minimap and miniasm: fast mapping and de novo assembly for noisy long sequences}
+\author[Li]{Heng Li}
+\address{Broad Institute, 75 Ames Street, Cambridge, MA 02142, USA}
+\maketitle
+
+\begin{abstract}
+
+\section{Motivation:} Single Molecule Real-Time (SMRT) sequencing technology and Oxford
+Nanopore technologies (ONT) produce reads over 10kbp in length, which have
+enabled high-quality genome assembly at an affordable cost.  However, at
+present, long reads have an error rate as high as 10--15\%.  Complex and
+computationally intensive pipelines are required to assemble such reads.
+
+\section{Results:} We present a new mapper, minimap, and a \emph{de novo}
+assembler, miniasm, for efficiently mapping and assembling SMRT and ONT reads
+without an error correction stage. They can often assemble a sequencing run of
+bacterial data into a single contig in a few minutes, and assemble 45-fold
+\emph{C. elegans} data in 9 minutes, orders of magnitude faster than the
+existing pipelines. We also introduce a pairwise read mapping format (PAF) and
+a graphical fragment assembly format (GFA), and demonstrate the
+interoperability between ours and current tools.
+
+\section{Availability and implementation:} https://github.com/lh3/minimap and
+https://github.com/lh3/miniasm
+
+\section{Contact:} hengli at broadinstitute.org
+
+\end{abstract}
+
+\section{Introduction}
+
+High-throughput short-read sequencing technologies, such as Illumina, have
+empowered a variety of biological researches and clinical applications that
+would not be practical with the older Sanger sequencing. However, the short
+read length (typically a few hundred basepairs) has posed a great challenge to
+\emph{de novo} assembly as many repetitive sequences and segmental duplications
+are longer than the read length and can hardly be resolved by short reads even
+with paired-end data~\citep{Alkan:2011zr}. Although with increased read length
+and improved algorithms we are now able to produce much better short-read
+assemblies than a few years ago, the contiguity and completeness of the
+assemblies are still not as good as Sanger assemblies~\citep{Chaisson:2015wj}.
+
+The PacBio's SMRT technology were developed partly as an answer to the
+problem with short-read \emph{de novo} assembly. However, due to the high
+per-base error rate, around 15\%, these reads were only used as a complement to
+short reads initially~\citep{Bashir:2012gb,Ribeiro:2012bx,Koren:2012pt},
+until~\citet{Chin:2013qr} and~\citet{Koren:2013fc} demonstrated the feasibility
+of SMRT-only assembly. Since then, SMRT is becoming the preferred technology
+for finishing small genomes and producing high-quality Eukaryotic
+genomes~\citep{Berlin:2015xy}.
+
+Oxford Nanopore Technologies (ONT) has recently offered another long-read
+sequencing technology. Although the per-base error rate was high at the
+early access phase~\citep{Quick:2014uf}, the latest data quality has been
+greatly improved. \citet{Loman:2015xu} confirmed that we can achieve
+high-quality bacterial assembly with ONT data alone.
+
+Published long-read assembly pipelines all include four stages: (i) all-vs-all
+raw read mapping, (ii) raw read error correction, (iii) assembly of error
+corrected reads and (iv) contig consensus polish. Stage (iii) may involve
+all-vs-all read mapping again, but as the error rate is much reduced at this
+step, it is easier and faster than stage (i). Table~\ref{tab:tools} shows the tools used for
+each stage. Notably, our tool minimap is a raw read overlapper and miniasm is
+an assembler. We do not correct sequencing errors, but instead directly produce
+unpolished and uncorrected contig sequences from raw read overlaps. The idea of
+correction-free assembly was inspired by talks given by Gene Myers.
+Sikic et al (personal communication) are also independently exploring such an
+approach.
+
+\begin{table}[b]
+\processtable{Tools for noisy long-read assembly}
+{\footnotesize\label{tab:tools}
+\begin{tabular}{p{2.4cm}p{2cm}l}
+\toprule
+Functionality & Program & Reference \\
+\midrule
+Raw read overlap & BLASR & \citet{Chaisson:2012aa}\\
+& DALIGNER & \citet{DBLP:conf/wabi/Myers14} \\
+& MHAP & \citet{Berlin:2015xy} \\
+& GraphMap & \citet{sovic:2015aa} \\
+& minimap & this article \\
+Error correction & pbdagcon & http://bit.ly/pbdagcon \\
+& falcon\_sense & http://bit.ly/pbfcasm \\
+& nanocorrect & \citet{Loman:2015xu} \\
+Assembly & wgs-assembler & \citet{Myers:2000kl} \\
+& Falcon & http://bit.ly/pbfcasm \\
+& ra-integrate & http://bit.ly/raitgasm \\
+& miniasm & this article \\
+Consensus polish & Quiver & http://bit.ly/pbquiver \\
+& nanopolish & \citet{Loman:2015xu} \\
+\botrule
+\end{tabular}
+}{}
+\end{table}
+
+As we can see from Table~\ref{tab:tools}, each stage can be achieved with multiple tools.
+Although we have successfully combined tools into different pipelines, we
+need to change or convert the input/output formats to make them work
+together. Another contribution of this article is the proposal of concise
+mapping and assembly formats, which will hopefully encourage modular design of
+assemblers and the associated tools.
+
+\begin{methods}
+\section{Methods}
+
+\subsection{General notations}
+
+Let $\Sigma=\{\mathrm{A},\mathrm{C},\mathrm{G},\mathrm{T}\}$ be the
+alphabet of nucleotides. For a symbol $a\in\Sigma$, $\overline{a}$ is the
+Watson-Crick complement of $a$. A string $s=a_1a_2\cdots a_n$ over
+$\Sigma$ is also called a \emph{DNA sequence}. Its length is $|s|=n$;
+its \emph{reverse complement} is $\overline{s}=\overline{a_1a_2\cdots
+a_n}=\overline{a}_n\overline{a}_{n-1}\cdots\overline{a}_1$.
+For convenience, we define strand function
+$\pi:\Sigma^*\times\{0,1\}\to\Sigma^*$ such that $\pi(s,0)=s$ and
+$\pi(s,1)=\overline{s}$. Here $\Sigma^*$ is the set of all DNA sequences.
+
+By convention, we call a $k$-long DNA sequence as a \emph{$k$-mer}. We use the
+notation $s^k_i=a_i\cdots a_{i+k-1}$ to denote a $k$-long substring of $s$
+starting at $i$. $\Sigma^k$ is the set of all $k$-mers.
+
+\subsection{Minimap}
+
+\subsubsection{Overview of $k$-mer based sequence similarity search}\label{sec:minimapov}
+
+BLAST~\citep{Altschul:1997vn} and BLAT~\citep{Kent:2002jk} are among the most
+popular sequence similarity search tools. They use one $k$-mer hash function
+$\phi:\Sigma^k\to\mathbb{Z}$ to hash $k$-mers at the positions
+$1,w+1,2w+1,\ldots$ of a target sequence and keep the hash values in a hash
+table. Upon query, they use the same hash function on every $k$-mer of the
+query sequence and look up the hash table for potential matches. If there are
+one or multiple $k$-mer matches in a small window, these aligners extend the
+matches with dynamic programming to construct the final alignment.
+
+DALIGNER~\citep{DBLP:conf/wabi/Myers14} does not use a hash table. It instead
+identifies $k$-mer matches between two sets of reads by sorting $k$-mers and
+merging the sorted lists. DALIGNER is fast primarily because sorting and
+merging are highly cache efficient.
+
+MHAP~\citep{Berlin:2015xy} differs from others in the use of MinHash
+sketch~\citep{Broder:1997aa}.  Briefly, given a read sequence $s$ and $m$
+$k$-mer hash functions $\{\phi_j\}_{1\le j\le m}$, MHAP computes
+$h_j=\min\{\phi_j(s_i^k):1\le i\le |s|-k+1\}$ with each hash function $\phi_j$, and
+takes list $(h_j)_{1\le j\le m}$, which is called the \emph{sketch} of
+$s$, as a reduced representation of $s$. Suppose $(h_j)_j$ and $(h'_j)_j$ are
+the sketches of two reads, respectively. When the two reads are similar to each
+other or have significant overlaps, there are likely to exist multiple $j$ such
+that $h_j=h'_j$. Potential matches can thus be identified. A limitation of
+MinHash sketch is that it always selects a fixed number of hash values
+regardless of the length of the sequences. This may waste space or hurt
+sensitivity when input sequences vary greatly in lengths.
+
+Minimap is heavily influenced by all these works. It adopts the idea of sketch
+like MHAP but takes minimizers \citep{Roberts:2004fv} as a reduced
+representation instead; it stores $k$-mers in a hash table like BLAT and MHAP
+but also uses sorting extensively like DALIGNER. In addition, minimap is
+designed not only as a read overlapper but also as a read-to-genome and
+genome-to-genome mapper. It has more potential applications.
+
+\subsubsection{Computing minimizers}
+
+\begin{algorithm}[tb]
+\DontPrintSemicolon
+\footnotesize
+\KwIn{Parameter $w$ and $k$ and sequence $s$ with $|s|\ge w+k-1$}
+\KwOut{($w$,$k$)-minimizers, their positions and strands}
+\BlankLine
+\textbf{Function} {\sc MinimizerSketch}$(s,w,k)$
+\Begin {
+	$\mathcal{M}\gets\emptyset$\Comment*[r]{NB: $\mathcal{M}$ is a set; no duplicates}
+	\For{$i\gets1$ \KwTo $|s|-w-k+1$} {
+		$m\gets\infty$\;
+		\nl\For (\Comment*[f]{Find the min value}) {$j\gets0$ \KwTo $w-1$} {
+			$(u,v)\gets(\phi(s^k_{i+j}),\phi(\overline{s}^k_{i+j}))$\;
+			\If (\Comment*[f]{Skip if strand ambiguous}) {$u\not=v$} { 
+				$m\gets\min(m,\min(u,v))$\;
+			}
+		}
+		\nl\For (\Comment*[f]{Collect minimizers}) {$j\gets0$ \KwTo $w-1$} {
+			$(u,v)\gets(\phi(s^k_{i+j}),\phi(\overline{s}^k_{i+j}))$\;
+			\uIf{$u<v$ {\bf and} $u=m$} {
+				$\mathcal{M}\gets\mathcal{M}\cup\{(m,i+j,0)\}$\;
+			}\ElseIf{$v<u$ {\bf and} $v=m$}{
+				$\mathcal{M}\gets\mathcal{M}\cup\{(m,i+j,1)\}$\;
+			}
+		}
+	}
+	\Return $M$\;
+}
+\caption{Compute minimizers}\label{alg:minimizer}
+\end{algorithm}
+
+Loosely speaking, a $(w,k)$-minimizer of a string is the
+smallest $k$-mer in a surrounding window of $w$ consecutive $k$-mers. Formally,
+let $\phi:\Sigma^k\to\mathbb{Z}$ be a $k$-mer hash function.
+A \emph{double-strand $(w,k,\phi)$-minimizer}, or simply a \emph{minimizer}, of a
+string $s$, $|s|\ge w+k-1$, is a triple $(h,i,r)$ such that there exists
+$\max(1,i-w+1)\le j\le\min(i,|s|-w-k+1)$ which renders
+$$
+h=\phi(\pi(s_i^k,r))=\min\big\{\phi(\pi(s_{j+p}^k,r')):0\le p<w,r'\in\{0,1\}\big\}
+$$
+Let $\mathcal{M}(s)$ be the set of minimizers of $s$.  Algorithm~\ref{alg:minimizer} gives the
+pseudocode to compute $\mathcal{M}(s)$ in $O(w\cdot|s|)$ time.  Our actual
+implementation is close to $O(|s|)$ in average case. It uses a queue to cache
+the previous minimals and avoids the loops at line~1 and~2 most of time. In
+practice, time spent on collecting minimizers is insignificant.
+
+A natural choice of hash function $\phi$ is to let $\phi(\mathrm{A})=0$,
+$\phi(\mathrm{C})=1$, $\phi(\mathrm{G})=2$ and $\phi(\mathrm{T})=3$ and for a
+$k$-mer $s=a_1\cdots a_k$, define
+$$
+\phi(s)=\phi(a_1)\times4^{k-1}+\phi(a_2)\times4^{k-2}+\cdots+\phi(a_k)
+$$
+This hash function always maps a $k$-mer to a distinct $2k$-bit integer. A
+problem with this $\phi$ is that poly-A, which is often highly enriched in
+genomes, always gets zero, the smallest value. We may oversample these
+non-informative poly-A and hurt practical performance. To alleviate this issue,
+we use function $\phi'=h\circ\phi$ instead, where $h$ is an invertible integer
+hash function on $[0,4^k)$ (Algorithm~\ref{alg:invhash}; http://bit.ly/invihgi). The
+invertibility of $h$ is not essential, but as such $\phi'$ never maps two
+distinct $k$-mers to the same $2k$-bit integer, it helps to reduce hash
+collisions.
+
+\begin{algorithm}[tb]
+\DontPrintSemicolon
+\footnotesize
+\KwIn{$p$-bit integer $x$}
+\KwOut{hashed $p$-bit integer}
+\BlankLine
+\textbf{Function} {\sc InvertibleHash}$(x,p)$
+\Begin {
+	$m\gets2^p-1$\;
+	$x\gets(\mbox{\tt\char126}x+(x\mbox{\tt\char60\char60}21))\mbox{ \tt\char38}\mbox{ }m$\;
+	$x\gets x\mbox{ \tt\char94}\mbox{ }x\mbox{\tt\char62\char62}24$\;
+	$x\gets(x+(x\mbox{\tt\char60\char60}3)+(x\mbox{\tt\char60\char60}8))\mbox{ \tt\char38}\mbox{ }m$\;
+	$x\gets x\mbox{ \tt\char94}\mbox{ }x\mbox{\tt\char62\char62}14$\;
+	$x\gets(x+(x\mbox{\tt\char60\char60}2)+(x\mbox{\tt\char60\char60}4))\mbox{ \tt\char38}\mbox{ }m$\;
+	$x\gets x\mbox{ \tt\char94}\mbox{ }x\mbox{\tt\char62\char62}28$\;
+	$x\gets(x+(x\mbox{\tt\char60\char60}31))\mbox{ \tt\char38}\mbox{ }m$\;
+	\Return $x$\;
+}
+\caption{Invertible integer hash function}\label{alg:invhash}
+\end{algorithm}
+
+Note that in a window of $w$ consecutive $k$-mers, there may be more than one
+minimizers. Algorithm~\ref{alg:minimizer} keeps them all with the loop at line~2. This way, a
+minimizer of $s$ always corresponds to a minimizer of $\overline{s}$.
+\citet{Roberts:2004fv} did not discuss the treatment of such equally good
+minimizers.
+
+For read overlapping, we use $k=15$ and $w=5$ to find minimizers.
+
+\subsubsection{Indexing}
+
+\begin{algorithm}[tb]
+\DontPrintSemicolon
+\footnotesize
+\KwIn{Set of target sequences $\mathcal{T}=\{s_1,\ldots,s_T\}$}
+\KwOut{Minimizer hash table $\mathcal{H}$}
+\BlankLine
+\textbf{Function} {\sc Index}$(\mathcal{T},w,k)$
+\Begin {
+	$\mathcal{H}\gets$ empty hash table\;
+	\For{$t\gets1$ \KwTo $T$} {
+		$\mathcal{M}\gets${\sc MinimizerSketch}$(s_t,w,k)$\;
+		\ForEach{$(h,i,r)\in \mathcal{M}$} {
+			$\mathcal{H}[h]\gets\mathcal{H}[h]\cup\{(t,i,r)\}$\;
+		}
+	}
+	\Return $\mathcal{H}$\;
+}
+\caption{Index target sequences}\label{alg:idx}
+\end{algorithm}
+
+Algorithm~\ref{alg:idx} describes indexing target sequences. It keeps minimizers of all target
+sequences in a hash table where the key is the minimizer hash and the value is
+a set of target sequence index, the position of the minimizer and the strand
+(packed into one 64-bit integer).
+
+In implementation, we do not directly insert minimizers to the hash table.
+Instead, we append minimizers to an array and sort the array after collecting
+all minimizers. The hash table keeps the intervals on the sorted array. This
+procedure dramatically reduces heap allocations and cache misses, and is
+supposedly faster than direct hash table insertion.
+
+\subsubsection{Mapping}
+
+Given two sequences $s$ and $s'$, we say we find a \emph{minimizer hit}
+$(h,x,i,i')$ if there exist $(h,i,r)\in\mathcal{M}(s)$ and
+$(h,i',r')\in\mathcal{M}(s')$ with $x=r\oplus r'$ ($\oplus$ is the XOR
+operator). Here $h$ is the minimizer hash value, $x$ indicates the relative
+strand and $i$ and $i'$ are the positions on the two sequences, respectively.
+We say two minimizer hits $(h_1,x,i_1,i'_1)$ and $(h_2,x,i_2,i'_2)$ are
+\emph{$\epsilon$-away} if 1) $x=0$ and $|(i_1-i'_1)-(i_2-i'_2)|<\epsilon$
+or 2) $x=1$ and $|(i_1+i'_1)-(i_2+i'_2)|<\epsilon$. Intuitively,
+$\epsilon$-away hits are approximately colinear within a band of width
+$\epsilon$ (500bp by default).  Given a set of minimizer hits $\{(h,x,i,i')\}$, we can cluster
+$i-i'$ for $x=0$ or $i+i'$ for $x=1$ to identify long colinear matches.
+This procedure is inspired by Hough Transformation mentioned
+by~\citet{sovic:2015aa}. 
+
+\begin{algorithm}[tb]
+\DontPrintSemicolon
+\footnotesize
+\KwIn{Hash table $\mathcal{H}$ and query sequence $q$}
+\KwOut{Print matching query and target intervals}
+\BlankLine
+\textbf{Function} {\sc Map}$(\mathcal{H},q,w,k,g)$
+\Begin {
+	$\mathcal{A}\gets$ empty array\;
+	$\mathcal{M}\gets${\sc MinimizerSketch}$(q,w,k)$\;
+	\nl\ForEach (\Comment*[f]{Collect minimizer hits}) {$(h,i,r)\in \mathcal{M}$} {
+		\ForEach{$(t,i',r')\in \mathcal{H}[h]$} {
+			\uIf (\Comment*[f]{Minimizers on the same strand}) {$r=r'$} {
+				Append $(t,0,i-i',i')$ to $\mathcal{A}$\;
+			} \Else (\Comment*[f]{On different strands}) {
+				Append $(t,1,i+i',i')$ to $\mathcal{A}$\;
+			}
+		}
+	}
+	Sort $\mathcal{A}=[(t,r,c,i')]$ in the order of the four values in tuples\;
+	$b\gets1$\;
+	\nl\For (\Comment*[f]{Cluster minimizer hits}) {$e=1$ \KwTo $|\mathcal{A}|$} {
+		\If{$e=|\mathcal{A}|$ {\bf or} $\mathcal{A}[e+1].t\not=\mathcal{A}[e].t$ {\bf or} $\mathcal{A}[e+1].r\not=\mathcal{A}[e].r$ {\bf or} $\mathcal{A}[e+1].c-\mathcal{A}[e].c>g$} {
+			\nl$\mathcal{C}\gets$ the maximal colinear subset of $\mathcal{A}[b..e]$\;
+			Print the left- and right-most query/target positions in $\mathcal{C}$\;
+			$b\gets e+1$\;
+		}
+	}
+}
+\caption{Map a query sequence}\label{alg:map}
+\end{algorithm}
+
+Algorithm~\ref{alg:map} gives the details of the mapping algorithm. The loop at line~1
+collects minimizer hits between the query and all the target sequences. The
+loop at line~2 performs a single-linkage clustering to group approximately
+colinear hits. Some hits in a cluster may not be colinear because two minimizer
+hits within distance $\epsilon$ are always $\epsilon$-away. To fix this issue,
+we find the maximal colinear subset of hits by solving a longest increasing
+sequencing problem (line~3). This subset is the final mapping result. In
+practical implementation, we set thresholds on the size of the subset (4 by
+default) and the number of matching bases in the subset to filter poor mappings
+(100 for read overlapping).
+
+\subsection{Assembly graph}
+
+Two strings $v$ and $w$ may be mapped to each other based on their sequence
+similarity. If $v$ can be mapped to a substring of $w$, we say $w$
+\emph{contains} $v$. If a suffix of $v$ and a prefix of $w$ can be mapped to
+each other, we say $v$ \emph{overlaps} $w$, written as $v\to w$.
+If we regard strings $v$ and $w$ as vertices, the overlap relationship defines
+a directed edge between them. The \emph{length} of $v\to w$ equals the length
+of $v$'s prefix that does not overlap $w$.
+
+Let $G=(V,E,\ell)$ be a graph without multi-edges, where $V$ is a
+set of DNA sequences (vertices), $E$ a set of overlaps between them (edges) and
+$\ell:E\to\Re_+$ is the edge length function. $G$ is said to be
+\emph{Watson-Crick complete} if i) $\forall v\in V$, $\overline{v}\in V$ and
+ii) $\forall v\to w\in E$, $\overline{w}\to\overline{v}\in E$. $G$ is said to
+be \emph{containment-free} if any sequence $v$ is not contained in other
+sequences in $V$. If $G$ is both Watson-Crick complete and containment-free, it
+is an \emph{assembly graph}. By definition, any vertex $v$ has a
+\emph{complement vertex} $\overline{v}$ in the graph and any edge $v\to w$ has
+a \emph{complement edge} $\overline{w}\to\overline{v}$.  Let
+$\mathrm{deg}^+(v)$ be the indegree of $v$ and $\mathrm{deg}^-(v)$ be the
+outdegree. It follows that $\mathrm{deg}^-(v)=\mathrm{deg}^+(\overline{v})$.
+
+An assembly graph has the same topology as a string graph~\citep{Myers:2005bh},
+though the interpretation of the vertex set $V$ is different. In a string
+graph, $V$ is the set of the two ends of sequences, not the set of forward and
+reverse-complemented sequences. De Bruijn graph can be regarded as a special
+case of overlap graph. It is also an assembly graph.
+
+In an assembly graph, an edge $v\to w$ is \emph{transitive} if there exist
+$v\to u$ and $u\to w$. Removing a transitive edge does not affect the
+connectivity of the graph. A vertex $v$ is a \emph{tip} if ${\rm deg}^+(v)=0$
+and ${\rm deg}^-(v)>0$. The majority of tips are caused by artifacts or missing
+overlaps. A \emph{bubble} is a directed acyclic subgraph with a single source
+$v$ and a single sink $w$ having at least two paths between $v$ and $w$. The
+bubble is tight if ${\rm deg}^+(v)>1$ and ${\rm deg}^-(w)>1$. A bubble may be
+caused by variants between homologous haplotypes as well as missing overlaps.
+
+\subsection{Miniasm}
+
+\subsubsection{Trimming reads}
+
+Raw read sequences may contain artifacts such as untrimmed adapters and
+chimaera. The first step of assembly to reduce such artifacts by examining
+read-to-read mappings. For each read, miniasm finds the longest region that is
+covered by three or more good mappings (longer than 2000bp with at least 100bp
+non-redundant bases on matching minimizers) and trims bases outside the region.
+
+\subsubsection{Generating assembly graph}
+
+\begin{figure}[tb]
+\centering
+\includegraphics[width=.45\textwidth]{overhang}
+\caption{Mapping between two reads. $b[1]$ and $e[1]$ are the starting the
+ending mapping coordinates of the first read $v$, respectively. $b[2]$ and
+$e[2]$, $b[2]<e[2]$, are the coordinates on the mapping strand of the second
+read $w$. Lightgray areas indicate regions that would be mapped together if the
+overlap was perfect. If the overhang regions are small enough, the figure
+implies an edge $v\to\overline{w}$ with $\ell(v\to\overline{w})=b[1]-b[2]$ and
+an edge $w\to\overline{v}$ with
+$\ell(w\to\overline{v})=(l[2]-e[2])-(l[1]-e[1])$.}\label{fig:overhang}
+\end{figure}
+
+\begin{algorithm}[bt]
+\DontPrintSemicolon
+\footnotesize
+\KwIn{Read length $l$, mapping begin coordinate $b$ and mapping end $e$ of the
+two reads; max overhang length $o$ (1000 by default) and max overhang to mapping length ratio
+$r$ (0.8 by default).}
+\KwOut{hashed $p$-bit integer}
+\BlankLine
+\textbf{Function} {\sc ClassifyMapping}$(l[2], b[2], e[2], o, r)$
+\Begin {
+	${\it overhang}\gets\min(b[1], b[2])+\min(l[1]-e[1],l[2]-e[2])$\;
+	${\it maplen}\gets\max(e[1]-b[1],e[2]-b[2])$\;
+	\uIf{${\it overhang}>\min(o,{\it maplen}\cdot r)$} {
+		\Return {\tt INTERNAL\_MATCH}
+	} \uElseIf {$b[1]\le b[2]$ {\bf and} $l[1]-e[1]\le l[2]-e[2]$} {
+		\Return {\tt FIRST\_CONTAINED}
+	}\uElseIf {$b[1]\ge b[2]$ {\bf and} $l[1]-e[1]\ge l[2]-e[2]$} {
+		\Return {\tt SECOND\_CONTAINED}
+	} \uElseIf {$b[1]>b[2]$} {
+		\Return {\tt FIRST\_TO\_SECOND\_OVERLAP}
+	} \Else {
+		\Return {\tt SECOND\_TO\_FIRST\_OVERLAP}
+	}
+}
+\caption{Mapping classification}\label{alg:ovlp}
+\end{algorithm}
+
+For each trimmed mapping, miniasm applies Algorithm~\ref{alg:ovlp} to classify the mapping
+(see also Figure~\ref{fig:overhang} for the explanation of input variables).
+It ignores internal matches, drops contained reads and adds overlaps to the
+assembly graph.
+
+
+\subsubsection{Graph cleaning}
+
+\begin{algorithm}[tb]
+\DontPrintSemicolon
+\footnotesize
+\KwIn{$G=(V,E)$, starting vertex $v_0$ and maximum probe distance $d$}
+\KwOut{the sink vertex of a bubble within $d$; or {\bf nil} if not found}
+\BlankLine
+\textbf{Function} {\sc DetectBubble}$(V,E,v_0,d)$
+\Begin {
+	\lIf{$\mathrm{deg}^+(v_0)<2$} { \Return {\bf nil} } \Comment*[r]{Not a source of bubble}
+	\lFor{$v\in V$} { $\delta[v]\gets\infty$ } \Comment*[r]{the min distance from $v_0$ to $v$}
+	$\delta[v_0]\gets0$\;
+	$S\gets$ empty stack \Comment*[r]{Vertices with all incoming edges visited}
+	{\sc Push}$(S,v_0)$\;
+	$p\gets0$ \Comment*[r]{Number of visited vertices never added to $S$}
+	\While{$S$ is not empty} {
+		$v\gets$ {\sc Pop}$(S)$\;
+		\ForEach{$v\to w\in E$} {
+			\If (\Comment*[f]{A circle involving the starting vertex}) {$w=v_0$} {
+				\Return {\bf nil}\;
+			}
+			\If (\Comment*[f]{Moving too far}) {$\delta[v]+\ell(v\to w)>d$} {
+				\Return {\bf nil}\;
+			}
+			\If (\Comment*[f]{Not visited before}) {$\delta[w]=\infty$} {
+				$\gamma[w]\gets \mathrm{deg}^-(w)$ \Comment*[r]{No. unvisited incoming edges}
+				$p\gets p+1$\;
+			}
+			\If{$\delta[v]+\ell(v\to w)<\delta[w]$} {
+				\nl$\delta[w]\gets \delta[v]+\ell(v\to w)$\;
+			}
+			$\gamma[w]\gets\gamma[w]-1$\;
+			\If (\Comment*[f]{All incoming edges visited}) {$\gamma[w]=0$} {
+				\If (\Comment*[f]{Not a tip}) {$\mathrm{deg}^+(w)\not=0$} {
+					{\sc Push}$(S,w)$\;
+				}
+				$p\gets p-1$\;
+			}
+		}
+		\If (\Comment*[f]{Found the sink}) {$|S|=1$ {\bf and} $p=0$} {
+			\Return {\sc Pop}$(S)$\;
+		}
+	}
+	\Return {\bf nil}\;
+}
+\caption{Bubble detection}\label{alg:popbub}
+\end{algorithm}
+
+After constructing the assembly graph, miniasm removes transitive
+edges~\citep{Myers:2005bh}, trims tips and pops small
+bubbles~\citep{Zerbino:2008uq}. Algorithm~\ref{alg:popbub} detects bubbles. It is
+adapted from Kahn's topological sorting algorithm~\citep{Kahn62aa}. It starts
+from the potential source and visits a vertex when all its incoming edges are
+visited before. Algorithm~6 only detects bubbles. We can keep track of the
+optimal parent vertex at line~1 and then backtrack to collapse bubbles to a
+single path. Fermi~\citep{Li:2012fk} uses a similar algorithm except that it
+keeps two optimal paths through the bubble.  \citet{DBLP:conf/wabi/OnoderaSS13}
+and \citet{TCS15} have also independently found similar algorithms.
+
+In addition, if $v\to w_1$ and $v\to w_2$ exist and $\ell(v\to w_1)<\ell(v\to
+w_2)$, miniasm removes $v\to w_2$ if $[|v|-\ell(v\to w_2)]/[|v|-\ell(v\to
+w_1)]$ is small enough (70\% by default). When there are longer overlaps,
+shorter overlaps after transitive reduction may be due to repeats.
+However, non-repetitive overlaps may also be removed at a small chance, which
+leads to missing overlaps and misassemblies.
+
+\subsubsection{Generating unitig sequences}
+
+If there are no multi-edges in the assembly graph, we can use $v_1\to
+v_2\to\cdots\to v_k$ to represent a path consisting of $k$ vertices. The
+sequence spelled from this path is the concatenation of vertex substrings:
+$v_1[1,\ell(v_1\to v_2)]\circ v_2[1,\ell(v_2\to v_3)]\circ\cdots\circ
+v_{k-1}[1,\ell(v_{k-1},v_k)]\circ v_k$, where $v[i,j]$ is the substring between
+$i$ and $j$ inclusive, and $\circ$ is the string concatenation operator.
+
+In a transitively reduced graph, a \emph{unitig} is a path $v_1\to
+v_2\to\cdots\to v_k$ such that ${\rm deg}^+(v_i)={\rm deg}^-(v_{i+1})=1$ and i)
+$v_1=v_k$ or ii) ${\rm deg}^-(v_1)\not=1$ and ${\rm deg}^-(v_k)\not=1$.
+Its sequence is the sequence spelled from the path. Intuitively, a unitig is a
+maximal path on which adjacent vertices can be ``unambiguously merged'' without
+affecting the connectivity of the original assembly graph.
+
+As miniasm does not correct sequencing errors, the error rate of unitig
+sequence is the same as the error rate of the raw input reads. It is in theory
+possible to derive a better unitig sequence by taking the advantage of read
+overlaps. We have not implemented such a consensus tool yet.
+
+\subsection{Formats: PAF and GFA}
+
+\subsubsection{Pairing mapping format (PAF)}
+
+\begin{table}[tb]
+\processtable{Pairwise mapping format (PAF)}
+{\footnotesize\label{tab:paf}
+\begin{tabular}{rcl}
+\toprule
+Col & Type & Description \\
+\midrule
+1 & string & Query sequence name \\
+2 & int    & Query sequence length \\
+3 & int    & Query start coordinate (0-based) \\
+4 & int    & Query end coordinate (0-based) \\
+5 & char   & `+' if query and target on the same strand; `-' if opposite \\
+6 & string & Target sequence name \\
+7 & int    & Target sequence length \\
+8 & int    & Target start coordinate on the original strand \\
+9 & int    & Target end coordinate on the original strand \\
+10& int    & Number of matching bases in the mapping \\
+11& int    & Number bases, including gaps, in the mapping \\
+12& int    & Mapping quality (0--255 with 255 for missing) \\
+\botrule
+\end{tabular}
+}{PAF is TAB-delimited text format with each line consisting of the above fixed
+fields. When the alignment is available, column 11 equals the total number of
+sequence matches, mismatches and gaps in the alignment. Column 10 divided by
+column 11 gives the alignment identity. If the detailed alignment is not
+available, column 10 and 11 can be approximate. PAF may optionally have
+additional fields in the SAM-like typed key-value format~\citep{Li:2009ys}.}
+\end{table}
+
+PAF is a lightweight format keeping the key mapping information (Table~\ref{tab:paf}).
+Minimap outputs mappings in PAF, which are taken by miniasm as input for
+assembly. We also provide scripts to convert DALIGNER, MHAP and SAM formats to
+PAF.
+
+\subsubsection{Graphical fragment assembly format (GFA)}
+
+\begin{table}[tb]
+\processtable{Graphical fragment assembly format (GFA)}
+{\footnotesize\label{tab:gfa}
+\begin{tabular}{clp{5.8cm}}
+\toprule
+Line & Comment & Fixed fields \\
+\midrule
+H    & Header  & N/A \\
+S    & Segment & segName,segSeq \\
+L    & Overlap & segName1,segOri1,segName2,segOri2,CIGAR \\
+\botrule
+\end{tabular}
+}{GFA is a line-based TAB-delimited format. Each line starts with a single
+letter determining the interpretation of the following TAB-delimited fields. In
+GFA, segment refers to a read or a unitig. A line start with `S' gives the name
+and sequence of a segment. When the sequence is not available, it can be a star
+`*'. Overlaps between segments are represented in lines starting with `L',
+giving the names and orientations of the two segments in an overlap. The last
+field `CIGAR' on an `L'-line describes the detailed alignment of the overlap if
+available. In addition to the types of lines in the table, GFA may contain
+other line types starting with different letters. Each line may optionally have
+additional SAM-like typed key-value pairs.}
+\end{table}
+
+GFA is a concise assembly format (Table~\ref{tab:gfa}; http://bit.ly/gfaspec) initially proposed by
+us prior to miniasm and later improved by community (P. Melsted, S.  Jackman,
+J. Simpson and E. Garrison, personal communication). GFA has an explicit
+relationship to an assembly graph -- an `S' line in the GFA corresponds to a
+vertex and its complement in the graph; an `L' line corresponds to an edge and
+its complement. GFA is able to represent graphs produced at all the stages of
+an assembly pipeline, from initial read overlaps to the unitig relationship in
+the final assembly.
+
+FASTG (http://bit.ly/fastgfmt) is another assembly format prior to GFA.
+It uses different terminologies. A vertex in an assembly graph is called an
+edge in FASTG, and an edge is called an adjacency. In FASTG, subgraphs can be
+nested, though no tools work with nested graphs due to technical complications.
+In addition, with nesting, one assembly graph can be represented in distinct
+ways, which we regard as a limitation of FASTG.
+
+\end{methods}
+
+\begin{table}[tb]
+\processtable{Evaluation data sets}
+{\footnotesize\label{tab:data}
+\begin{tabular}{llrrr}
+\toprule
+Name & Species & Size & Cov. & N50 \\
+\midrule
+PB-ce-40X     & {\it Caenorhabditis elegans}      & 104M & 45  & 16572 \\
+ERS473430     & {\it Citrobacter koseri}          & 4.9M & 106 & 7543  \\
+ERS544009     & {\it Yersinia pseudotuberculosis} & 4.7M & 147 & 9002  \\
+ERS554120     & {\it Pseudomonas aeruginosa}      & 6.4M & 90  & 7106  \\
+ERS605484     & {\it Vibrio vulnificus}           & 5.0M & 155 & 5091  \\
+ERS617393     & {\it Acinetobacter baumannii}     & 4.0M & 237 & 7911  \\
+ERS646601     & {\it Haemophilus influenzae}      & 1.9M & 258 & 4081  \\
+ERS659581     & {\it Klebsiella sp.}              & 5.1M & 129 & 8031  \\
+ERS670327     & {\it Shimwellia blattae}          & 4.2M & 155 & 6765  \\
+ERS685285     & {\it Streptococcus sanguinis}     & 2.4M & 224 & 5791  \\
+ERS743109     & {\it Salmonella enterica}         & 4.8M & 188 & 6051  \\
+PB-ecoli      & {\it Escherichia coli}            & 4.6M & 160 & 13976 \\
+PBcR-PB-ec    & {\it Escherichia coli}            & 4.6M & 30  & 11757 \\
+PBcR-ONT-ec   & {\it Escherichia coli}            & 4.6M & 29  & 9356  \\
+MAP-006-1     & {\it Escherichia coli}            & 4.6M & 54  & 10892 \\
+MAP-006-2     & {\it Escherichia coli}            & 4.6M & 30  & 10794 \\
+MAP-006-pcr-1 & {\it Escherichia coli}            & 4.6M & 30  & 8080  \\
+MAP-006-pcr-2 & {\it Escherichia coli}            & 4.6M & 60  & 8064  \\
+\botrule
+\end{tabular}
+}{Evaluation data set name, species, reference genome size, theoretical
+sequencing coverage and the N50 read length. Names starting with ``MAP'' are
+unpublished recent ONT data provided by the Loman lab (http://bit.ly/loman006).
+Names starting with ``ERS'' are accession numbers of unpublished PacBio data
+from the NCTC project (http://bit.ly/nctc3k). PB-ecoli and PB-ce-40X are PacBio
+public data sets sequenced with the P6/C4 chemistry (http://bit.ly/pbpubdat;
+retrieved on 11/03/2015). PBcR-PB-ec is the PacBio sample data (P5/C3
+chemistry) used in the tutorial of the PBcR pipeline; PBcR-ONT-ec is the ONT
+example originally used by \citet{Loman:2015xu}. `pls2fasta --trimByRegion' was
+applied to ERS* and PB-ecoli data sets as they do not provide read sequences in
+the FASTQ format.}
+\end{table}
+
+\section{Results}
+
+\subsection{Assembling bacterial genomes}
+
+We evaluated the performance of miniasm on 17 bacterial data sets
+(Table~\ref{tab:data}) with command line `minimap -Sw5 -L100 -m0 reads.fa reads.fa $|$
+miniasm -f reads.fa -'. Miniasm is able to derive a single contig per
+chromosome/plasmid for all but four data sets: 3 extra $>$50kb contigs for
+ERS554120, and 1 extra contig for ERS605484, PBcR-ONT-ec and MAP-006-pcr-1
+each. 
+
+Encouraged by the single-contig assembly for PBcR-PB-ec at only 30-fold
+coverage, we randomly down-sampled PacBio data sets and tried to assemble the
+subset. For PB-ecoli, miniasm still produced a single contig at 24-fold
+coverage, or two contigs at 20-fold. For the other data sets, however, miniasm
+generated fragmented assemblies when we sampled a third of reads. We speculate
+the shorter read lengths of the ERS* data sets made it more difficult to
+produce good assemblies at relatively low coverage.
+
+We have also run the PBcR pipeline~\citep{Berlin:2015xy}. PBcR requires a spec
+file. We took `pacbio.spec' from the PBcR-PB-ec example and `oxford.spec' from
+PBcR-ONT-ec, and applied them to all data sets based on their data types. MAP*
+data sets only provide FASTA sequences for download. We assigned quality 9 to
+all bases as PBcR requires base quality. PBcR assembled all PacBio data sets
+without extra contigs longer than 50kb -- better than miniasm. However, on the
+ONT data sets, PBcR produced more fragmented assemblies for MAP-006-2,
+MAP-006-pcr-1 and MAP-006-pcr-2, and deleted a 300kb region for the PBcR-ONT-ec
+data set. 
+
+With four CPU cores, it took miniasm 14 seconds to assemble the 30-fold
+PBcR-PB-ec data set and 2 minutes to assemble the 160-fold PB-ecoli data set.
+PBcR, with four CPU cores, too, is about 700 times slower on PBcR-PB-ecoli and
+60 times slower on PB-ecoli.  It is slower on low-coverage data
+because PBcR automatically switches to the slower sensitive mode. Here we
+should remind readers that without an error correction stage, the contig
+sequences generated by miniasm are of much lower accuracy in comparison to
+PBcR. The speed comparison is not fair. Nonetheless, miniasm is still tens of
+times faster than PBcR excluding the time spent on error correction.
+
+\subsection{Assembling a C. elegans genome}
+
+\begin{figure}[tb]
+\includegraphics[width=.48\textwidth]{ce}
+\caption{Dotter plot comparing the miniasm assembly and the {\it C. elegans}
+reference genome. Thin gray lines mark the contig or chromosome boundaries. The
+three arrows indicate large-scale misassemblies visible from the
+plot. The mapping is done with `minimap -L500'.}\label{fig:ce}
+\end{figure}
+
+We assembled a 45-fold {\it C. elegans} data set (Table~\ref{tab:data}). With 16 CPU cores,
+miniasm assembled the data in 9 minutes, achieving an N50 size 2.8Mb. From the
+dotter plot (Figure~\ref{fig:ce}), we observed three large-scale misassemblies
+(readers are advised to zoom into the vector graph to see the details).  Due to
+the high per-base error rate of the miniasm contigs, we have not been able to
+produce realible whole-genome alignment to analyze local misassemblies in a
+satisfactory manner.
+
+PacBio has assembled the same data set with HGAP3~\citep{Chin:2013qr}. HGAP3
+produces shorter contigs (N50=1.6Mb), but does not incur large-scale
+misassemblies visible from the dotter plot between the {\it C. elegans}
+reference genome and the contigs.
+
+We have also tried PBcR on this data set. Based on the intermediate progress
+report, we estimated that with 16 CPU cores, it would take a week or so to
+finish the assembly in the automatically chosen `sensitive' mode.
+
+\subsection{Switching read overlappers}
+
+Miniasm also works with other overlappers when we convert their output format
+to PAF. On the 30-fold PBcR-PB-ec data set, we are able to produce a single
+contig with DALIGNER (option -k15 -h50), MHAP (option
+\mbox{--pacbio-sensitive}) and GraphMap (option -w owler). DALIGNER is the
+fastest, taking 65 seconds with four CPUs.  Minimap is five times as fast on
+this data set and is 18 times as fast on PB-ecoli at 160-fold. Minimap is
+faster on larger data sets possibly because without staging all possible hits
+in RAM, minimap is able to process more reads in a batch while a large batch
+usually helps performance. We should note that DALIGNER generates alignments
+while minimap does not. Minimap would possibly be slower if it included an
+alignment step. In this regard, the performance of DALIGNER is very impressive.
+
+\section{Discussions}
+
+Miniasm confirms long noisy reads can be assembled without
+an error correction stage, and without this stage, the assembly process can be
+greatly accelerated and simplified, while achieving comparable contiguity and
+large-scale accuracy to existing pipelines, at least for genomes without
+excessive repetitive sequences. If we can develop a fast tool to generate
+high-quality contig sequences which miniasm currently lacks, we may be able to
+dramatically speed up the entire assembly pipeline. MinION allows
+users to pause sequencing and reload samples. Fast assembly will not only
+alleviate the computational burden, but also help us to decide if enough data
+have been collected.
+
+Our main concern with miniasm is that when we look at a low-identity match
+between two noisy reads, it is difficult to tell whether the low identity is
+caused by the stochastically higher base error rate on reads, or because
+reads come from two recent segmental duplications.
+In comparison, error correction takes the advantage of multiple reads and in
+theory has more power to distinguish high error rate from duplications/repeats.
+Bacteria and {\it C. elegans} evaluated in this article are repeat sparse.
+We are yet to know the performance of miniasm given repeat-rich genomes.  In addition, miniasm has
+not been optimized for large repeat-rich genomes. It reads all hits into RAM,
+which may not be practical when there are too many. We need to filter
+repetitive hits, introduce disk-based algorithms (e.g. for sorting) or stream
+hits before removing contained reads. Working with large complex genomes will
+be an important future direction.
+
+Minimap is primarily used as a read overlapper in this article. It in fact has a
+wider range of applications in addition to overlapping.  For example, with four
+CPU cores, it is able to map 1.6Gb PacBio reads to the human genome in 2.5
+minutes, map 1Gb {\it E. coli} reads to pre-indexed 9.6Gb bacterial genome in 3
+minutes and to pre-indexed 100Gb nt database in an hour with a third of time
+spent on loading the index from the network file system. It can also map 2800
+bacterial genomes to themselves in an hour. Minimap is fast, but is not as
+sensitive as proper whole-genome aligners and recent long-read aligners such as
+LASTZ~\citep{harris:2007aa}, LAST~\citep{Kiebasa:2011aa} and GraphMap. They use
+shorter spaced seeds which greatly help sensitivity at the cost of performance.
+
+Oxford Nanopore is working on PromethIon and PacBio will deliver PacBio Sequel
+next year. Both sequencers promise significantly reduced sequencing cost and
+increased throughput, which may stimulate the adoption of long-read sequencing
+and subsequently the development of long-read mappers and assemblers. We hope
+in this process, the community could standardize the input and output formats
+of various tools, so that a developer could focus on a component he or she
+understands best. Such a modular approach has been proved to be fruitful in the
+development of short-read tools -- in fact, the best short-read pipelines all
+consist of components developed by different groups -- and will be equally
+beneficial to the future development of long-read mappers and assemblers.
+
+\section*{Acknowledgement}
+
+We thank P\'all Melsted for maintaining the GFA spec and are grateful to Gene
+Myers, Jason Chin, Adam Phillippy, Jared Simpson, Zamin Iqbal, Nick Loman and
+Ivan Sovic for their presentations, talks, comments on social media and
+unpublished works which have greatly influenced and helped the development of
+minimap and miniasm.
+
+\paragraph{Funding\textcolon} NHGRI U54HG003037; NIH GM100233
+
+\bibliography{miniasm}
+\end{document}
diff --git a/tex/natbib.bst b/tex/natbib.bst
new file mode 100644
index 0000000..a679e1d
--- /dev/null
+++ b/tex/natbib.bst
@@ -0,0 +1,1288 @@
+%% 
+%% This is file `natbib.bst', generated 
+%% on <1994/9/16> with the docstrip utility (2.2h).
+%% 
+%% The original source files were:
+%% 
+%% genbst.mbs  (with options: `ay,nat,seq-lab,nm-rev,dt-beg,yr-par,vol-bf,
+%%                             volp-com,etal-it')
+%% ---------------------------------------- 
+%% *** Personal bib style, PWD *** 
+%% 
+%% (Here are the specifications of the source file)
+%% \ProvidesFile{genbst.mbs}[1994/09/16 1.5 (PWD)]
+%%   For use with BibTeX version 0.99a or later
+%%     and with LaTeX 2.09 or 2e
+%%-------------------------------------------------------------------
+%% NOTICE:
+%% This file may be used for non-profit purposes.
+%% It may not be distributed in exchange for money,
+%%   other than distribution costs.
+%%
+%% The author provides it `as is' and does not guarantee it in any way.
+%%
+%% Copyright (C) 1994 Patrick W. Daly
+%% Max-Planck-Institut f\"ur Aeronomie
+%% Postfach 20
+%% D-37189 Katlenburg-Lindau
+%% Germany
+%%
+%% E-mail:
+%% SPAN--     nsp::linmpi::daly    (note nsp also known as ecd1)
+%% Internet-- daly at linmpi.dnet.gwdg.de
+%%-----------------------------------------------------------
+%% \CharacterTable
+%%  {Upper-case    \A\B\C\D\E\F\G\H\I\J\K\L\M\N\O\P\Q\R\S\T\U\V\W\X\Y\Z
+%%   Lower-case    \a\b\c\d\e\f\g\h\i\j\k\l\m\n\o\p\q\r\s\t\u\v\w\x\y\z
+%%   Digits        \0\1\2\3\4\5\6\7\8\9
+%%   Exclamation   \!     Double quote  \"     Hash (number) \#
+%%   Dollar        \$     Percent       \%     Ampersand     \&
+%%   Acute accent  \'     Left paren    \(     Right paren   \)
+%%   Asterisk      \*     Plus          \+     Comma         \,
+%%   Minus         \-     Point         \.     Solidus       \/
+%%   Colon         \:     Semicolon     \;     Less than     \<
+%%   Equals        \=     Greater than  \>     Question mark \?
+%%   Commercial at \@     Left bracket  \[     Backslash     \\
+%%   Right bracket \]     Circumflex    \^     Underscore    \_
+%%   Grave accent  \`     Left brace    \{     Vertical bar  \|
+%%   Right brace   \}     Tilde         \~}
+%%---------------------------------------------------------------------
+ % This is an author-year citation style bibliography. As such, it is
+ % non-standard LaTeX, and requires a special package file to function properly.
+ % Such a package is    natbib.sty   by Patrick W. Daly
+ % The form of the \bibitem entries is
+ %   \bibitem[Jones et al.(1990)]{key}...
+ %   \bibitem[Jones et al.(1990)Jones, Baker, and Smith]{key}...
+ % The essential feature is that the label (the part in brackets) consists
+ % of the author names, as they should appear in the citation, with the year
+ % in parentheses following. There must be no space before the opening
+ % parenthesis!
+ % With natbib v5.3, a full list of authors may also follow the year.
+ % In natbib.sty, it is possible to define the type of enclosures that is
+ % really wanted (brackets or parentheses), but in either case, there must
+ % be parentheses in the label.
+ % The \cite command functions as follows:
+ %   \cite{key} ==>>                Jones et al. (1990)
+ %   \cite[]{key} ==>>              (Jones et al., 1990)
+ %   \cite[chap. 2]{key} ==>>       (Jones et al., 1990, chap. 2)
+ %   \cite[e.g.][]{key} ==>>        (e.g. Jones et al., 1990)
+ %   \cite[e.g.][p. 32]{key} ==>>   (e.g. Jones et al., p. 32)
+ %   \citeauthor{key}               Jones et al.
+ %   \citefullauthor{key}           Jones, Baker, and Smith
+ %   \citeyear{key}                 1990
+%%---------------------------------------------------------------------
+
+ENTRY
+  { address
+    author
+    booktitle
+    chapter
+    edition
+    editor
+    howpublished
+    institution
+    journal
+    key
+    month
+    note
+    number
+    organization
+    pages
+    publisher
+    school
+    series
+    title
+    type
+    volume
+    year
+  }
+  {}
+  { label extra.label sort.label }
+
+INTEGERS { output.state before.all mid.sentence after.sentence after.block }
+
+FUNCTION {init.state.consts}
+{ #0 'before.all :=
+  #1 'mid.sentence :=
+  #2 'after.sentence :=
+  #3 'after.block :=
+}
+
+STRINGS { s t }
+
+FUNCTION {output.nonnull}
+{ 's :=
+  output.state mid.sentence =
+    { ", " * write$ }
+    { output.state after.block =
+        { add.period$ write$
+          newline$
+          "\newblock " write$
+        }
+        { output.state before.all =
+            'write$
+            { add.period$ " " * write$ }
+          if$
+        }
+      if$
+      mid.sentence 'output.state :=
+    }
+  if$
+  s
+}
+
+FUNCTION {output}
+{ duplicate$ empty$
+    'pop$
+    'output.nonnull
+  if$
+}
+
+FUNCTION {output.check}
+{ 't :=
+  duplicate$ empty$
+    { pop$ "empty " t * " in " * cite$ * warning$ }
+    'output.nonnull
+  if$
+}
+
+FUNCTION {fin.entry}
+{ add.period$
+  write$
+  newline$
+}
+
+FUNCTION {new.block}
+{ output.state before.all =
+    'skip$
+    { after.block 'output.state := }
+  if$
+}
+
+FUNCTION {new.sentence}
+{ output.state after.block =
+    'skip$
+    { output.state before.all =
+        'skip$
+        { after.sentence 'output.state := }
+      if$
+    }
+  if$
+}
+
+FUNCTION {not}
+{   { #0 }
+    { #1 }
+  if$
+}
+
+FUNCTION {and}
+{   'skip$
+    { pop$ #0 }
+  if$
+}
+
+FUNCTION {or}
+{   { pop$ #1 }
+    'skip$
+  if$
+}
+
+FUNCTION {non.stop}
+{ duplicate$
+   "}" * add.period$
+   #-1 #1 substring$ "." =
+}
+
+FUNCTION {new.block.checkb}
+{ empty$
+  swap$ empty$
+  and
+    'skip$
+    'new.block
+  if$
+}
+
+FUNCTION {field.or.null}
+{ duplicate$ empty$
+    { pop$ "" }
+    'skip$
+  if$
+}
+
+FUNCTION {emphasize}
+{ duplicate$ empty$
+    { pop$ "" }
+    { "{\em " swap$ * non.stop
+        { "\/}" * }
+        { "}" * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {bolden}
+{ duplicate$ empty$
+    { pop$ "" }
+    { "{\bf " swap$ * "}" * }
+  if$
+}
+
+INTEGERS { nameptr namesleft numnames }
+
+FUNCTION {format.names}
+{ 's :=
+  #1 'nameptr :=
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    { s nameptr
+      "{vv~}{ll}{, jj}{, f.}" format.name$ 't :=
+      nameptr #1 >
+        {
+          namesleft #1 >
+            { ", " * t * }
+            {
+              numnames #2 >
+                { "," * }
+                'skip$
+              if$
+              t "others" =
+                { " " * "et~al." emphasize * }
+                { " and " * t * }
+              if$
+            }
+          if$
+        }
+        't
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+
+FUNCTION {format.names.ed}
+{ 's :=
+  #1 'nameptr :=
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    { s nameptr
+      "{f.~}{vv~}{ll}{, jj}"
+      format.name$ 't :=
+      nameptr #1 >
+        {
+          namesleft #1 >
+            { ", " * t * }
+            {
+              numnames #2 >
+                { "," * }
+                'skip$
+              if$
+              t "others" =
+                { " " * "et~al." emphasize * }
+                { " and " * t * }
+              if$
+            }
+          if$
+        }
+        't
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+
+FUNCTION {format.key}
+{ empty$
+    { key field.or.null }
+    { "" }
+  if$
+}
+
+FUNCTION {format.authors}
+{ author empty$
+    { "" }
+    { author format.names }
+  if$
+}
+
+FUNCTION {format.editors}
+{ editor empty$
+    { "" }
+    { editor format.names
+      editor num.names$ #1 >
+        { ", editors" * }
+        { ", editor" * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.in.editors}
+{ editor empty$
+    { "" }
+    { editor format.names.ed
+      editor num.names$ #1 >
+        { ", editors" * }
+        { ", editor" * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.title}
+{ title empty$
+    { "" }
+    { title "t" change.case$
+    }
+  if$
+}
+
+FUNCTION {format.full.names}
+{'s :=
+  #1 'nameptr :=
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    { s nameptr
+      "{vv~}{ll}" format.name$ 't :=
+      nameptr #1 >
+        {
+          namesleft #1 >
+            { ", " * t * }
+            {
+              numnames #2 >
+                { "," * }
+                'skip$
+              if$
+              t "others" =
+                { " " * "et~al." emphasize * }
+                { " and " * t * }
+              if$
+            }
+          if$
+        }
+        't
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+
+FUNCTION {author.editor.key.full}
+{ author empty$
+    { editor empty$
+        { key empty$
+            { cite$ #1 #3 substring$ }
+            'key
+          if$
+        }
+        { editor format.full.names }
+      if$
+    }
+    { author format.full.names }
+  if$
+}
+
+FUNCTION {author.key.full}
+{ author empty$
+    { key empty$
+         { cite$ #1 #3 substring$ }
+          'key
+      if$
+    }
+    { author format.full.names }
+  if$
+}
+
+FUNCTION {editor.key.full}
+{ editor empty$
+    { key empty$
+         { cite$ #1 #3 substring$ }
+          'key
+      if$
+    }
+    { editor format.full.names }
+  if$
+}
+
+FUNCTION {make.full.names}
+{ type$ "book" =
+  type$ "inbook" =
+  or
+    'author.editor.key.full
+    { type$ "proceedings" =
+        'editor.key.full
+        'author.key.full
+      if$
+    }
+  if$
+}
+
+FUNCTION {output.bibitem}
+{ newline$
+  "\bibitem[" write$
+  label write$
+  ")" make.full.names * "]{" * write$
+  cite$ write$
+  "}" write$
+  newline$
+  ""
+  before.all 'output.state :=
+}
+
+FUNCTION {n.dashify}
+{ 't :=
+  ""
+    { t empty$ not }
+    { t #1 #1 substring$ "-" =
+        { t #1 #2 substring$ "--" = not
+            { "--" *
+              t #2 global.max$ substring$ 't :=
+            }
+            {   { t #1 #1 substring$ "-" = }
+                { "-" *
+                  t #2 global.max$ substring$ 't :=
+                }
+              while$
+            }
+          if$
+        }
+        { t #1 #1 substring$ *
+          t #2 global.max$ substring$ 't :=
+        }
+      if$
+    }
+  while$
+}
+
+FUNCTION {word.in}
+{ "In " }
+
+FUNCTION {format.date}
+{ year duplicate$ empty$
+    { "empty year in " cite$ * "; set to ????" * warning$
+       pop$ "????" }
+    'skip$
+  if$
+  before.all 'output.state :=
+  " (" swap$ * extra.label * ")" *
+}
+
+FUNCTION {format.btitle}
+{ title emphasize
+}
+
+FUNCTION {tie.or.space.connect}
+{ duplicate$ text.length$ #3 <
+    { "~" }
+    { " " }
+  if$
+  swap$ * *
+}
+
+FUNCTION {either.or.check}
+{ empty$
+    'pop$
+    { "can't use both " swap$ * " fields in " * cite$ * warning$ }
+  if$
+}
+
+FUNCTION {format.bvolume}
+{ volume empty$
+    { "" }
+    { "volume" volume tie.or.space.connect
+      series empty$
+        'skip$
+        { " of " * series emphasize * }
+      if$
+      "volume and number" number either.or.check
+    }
+  if$
+}
+
+FUNCTION {format.number.series}
+{ volume empty$
+    { number empty$
+        { series field.or.null }
+        { output.state mid.sentence =
+            { "number" }
+            { "Number" }
+          if$
+          number tie.or.space.connect
+          series empty$
+            { "there's a number but no series in " cite$ * warning$ }
+            { " in " * series * }
+          if$
+        }
+      if$
+    }
+    { "" }
+  if$
+}
+
+FUNCTION {format.edition}
+{ edition empty$
+    { "" }
+    { output.state mid.sentence =
+        { edition "l" change.case$ " edition" * }
+        { edition "t" change.case$ " edition" * }
+      if$
+    }
+  if$
+}
+
+INTEGERS { multiresult }
+
+FUNCTION {multi.page.check}
+{ 't :=
+  #0 'multiresult :=
+    { multiresult not
+      t empty$ not
+      and
+    }
+    { t #1 #1 substring$
+      duplicate$ "-" =
+      swap$ duplicate$ "," =
+      swap$ "+" =
+      or or
+        { #1 'multiresult := }
+        { t #2 global.max$ substring$ 't := }
+      if$
+    }
+  while$
+  multiresult
+}
+
+FUNCTION {format.pages}
+{ pages empty$
+    { "" }
+    { pages multi.page.check
+        { "pages" pages n.dashify tie.or.space.connect }
+        { "page" pages tie.or.space.connect }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.vol.num.pages}
+{ volume field.or.null
+  bolden
+  number empty$
+    'skip$
+    { "(" number * ")" * *
+      volume empty$
+        { "there's a number but no volume in " cite$ * warning$ }
+        'skip$
+      if$
+    }
+  if$
+  pages empty$
+    'skip$
+    { duplicate$ empty$
+        { pop$ format.pages }
+        { ", " * pages n.dashify * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.chapter.pages}
+{ chapter empty$
+    'format.pages
+    { type empty$
+        { "chapter" }
+        { type "l" change.case$ }
+      if$
+      chapter tie.or.space.connect
+      pages empty$
+        'skip$
+        { ", " * format.pages * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.in.ed.booktitle}
+{ booktitle empty$
+    { "" }
+    { editor empty$
+        { word.in booktitle emphasize * }
+        { word.in format.in.editors * ", " * booktitle emphasize * }
+      if$
+    }
+  if$
+}
+
+FUNCTION {format.thesis.type}
+{ type empty$
+    'skip$
+    { pop$
+      type "t" change.case$
+    }
+  if$
+}
+
+FUNCTION {format.tr.number}
+{ type empty$
+    { "Technical Report" }
+    'type
+  if$
+  number empty$
+    { "t" change.case$ }
+    { number tie.or.space.connect }
+  if$
+}
+
+FUNCTION {format.article.crossref}
+{
+  word.in
+  "\cite{" * crossref * "}" *
+}
+
+FUNCTION {format.book.crossref}
+{ volume empty$
+    { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
+      word.in
+    }
+    { "Volume" volume tie.or.space.connect
+      " of " *
+    }
+  if$
+  "\cite{" * crossref * "}" *
+}
+
+FUNCTION {format.incoll.inproc.crossref}
+{
+  word.in
+  "\cite{" * crossref * "}" *
+}
+
+FUNCTION {article}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { journal emphasize "journal" output.check
+      format.vol.num.pages output
+    }
+    { format.article.crossref output.nonnull
+      format.pages output
+    }
+  if$
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {book}
+{ output.bibitem
+  author empty$
+    { format.editors "author and editor" output.check
+      editor format.key output
+    }
+    { format.authors output.nonnull
+      crossref missing$
+        { "author and editor" editor either.or.check }
+        'skip$
+      if$
+    }
+  if$
+  format.date "year" output.check
+  new.block
+  format.btitle "title" output.check
+  crossref missing$
+    { format.bvolume output
+      new.block
+      format.number.series output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+    }
+    {
+      new.block
+      format.book.crossref output.nonnull
+    }
+  if$
+  format.edition output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {booklet}
+{ output.bibitem
+  format.authors output
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  howpublished output
+  address output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {inbook}
+{ output.bibitem
+  author empty$
+    { format.editors "author and editor" output.check
+      editor format.key output
+    }
+    { format.authors output.nonnull
+      crossref missing$
+        { "author and editor" editor either.or.check }
+        'skip$
+      if$
+    }
+  if$
+  format.date "year" output.check
+  new.block
+  format.btitle "title" output.check
+  crossref missing$
+    { format.bvolume output
+      format.chapter.pages "chapter and pages" output.check
+      new.block
+      format.number.series output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+    }
+    { format.chapter.pages "chapter and pages" output.check
+      new.block
+      format.book.crossref output.nonnull
+    }
+  if$
+  format.edition output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {incollection}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { format.in.ed.booktitle "booktitle" output.check
+      format.bvolume output
+      format.number.series output
+      format.chapter.pages output
+      new.sentence
+      publisher "publisher" output.check
+      address output
+      format.edition output
+    }
+    { format.incoll.inproc.crossref output.nonnull
+      format.chapter.pages output
+    }
+  if$
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {inproceedings}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  crossref missing$
+    { format.in.ed.booktitle "booktitle" output.check
+      format.bvolume output
+      format.number.series output
+      format.pages output
+      address output
+      new.sentence
+      organization output
+      publisher output
+    }
+    { format.incoll.inproc.crossref output.nonnull
+      format.pages output
+    }
+  if$
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {conference} { inproceedings }
+
+FUNCTION {manual}
+{ output.bibitem
+  format.authors output
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.btitle "title" output.check
+  organization address new.block.checkb
+  organization output
+  address output
+  format.edition output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {mastersthesis}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.btitle "title" output.check
+  new.block
+  "Master's thesis" format.thesis.type output.nonnull
+  school "school" output.check
+  address output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {misc}
+{ output.bibitem
+  format.authors output
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.title output
+  new.block
+  howpublished output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {phdthesis}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.btitle "title" output.check
+  new.block
+  "Ph.D. thesis" format.thesis.type output.nonnull
+  school "school" output.check
+  address output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {proceedings}
+{ output.bibitem
+  format.editors output
+  editor format.key output
+  format.date "year" output.check
+  new.block
+  format.btitle "title" output.check
+  format.bvolume output
+  format.number.series output
+  address output
+  new.sentence
+  organization output
+  publisher output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {techreport}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  format.tr.number output.nonnull
+  institution "institution" output.check
+  address output
+  new.block
+  note output
+  fin.entry
+}
+
+FUNCTION {unpublished}
+{ output.bibitem
+  format.authors "author" output.check
+  author format.key output
+  format.date "year" output.check
+  new.block
+  format.title "title" output.check
+  new.block
+  note "note" output.check
+  fin.entry
+}
+
+FUNCTION {default.type} { misc }
+
+MACRO {jan} {"January"}
+
+MACRO {feb} {"February"}
+
+MACRO {mar} {"March"}
+
+MACRO {apr} {"April"}
+
+MACRO {may} {"May"}
+
+MACRO {jun} {"June"}
+
+MACRO {jul} {"July"}
+
+MACRO {aug} {"August"}
+
+MACRO {sep} {"September"}
+
+MACRO {oct} {"October"}
+
+MACRO {nov} {"November"}
+
+MACRO {dec} {"December"}
+
+MACRO {acmcs} {"ACM Computing Surveys"}
+
+MACRO {acta} {"Acta Informatica"}
+
+MACRO {cacm} {"Communications of the ACM"}
+
+MACRO {ibmjrd} {"IBM Journal of Research and Development"}
+
+MACRO {ibmsj} {"IBM Systems Journal"}
+
+MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
+
+MACRO {ieeetc} {"IEEE Transactions on Computers"}
+
+MACRO {ieeetcad}
+ {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
+
+MACRO {ipl} {"Information Processing Letters"}
+
+MACRO {jacm} {"Journal of the ACM"}
+
+MACRO {jcss} {"Journal of Computer and System Sciences"}
+
+MACRO {scp} {"Science of Computer Programming"}
+
+MACRO {sicomp} {"SIAM Journal on Computing"}
+
+MACRO {tocs} {"ACM Transactions on Computer Systems"}
+
+MACRO {tods} {"ACM Transactions on Database Systems"}
+
+MACRO {tog} {"ACM Transactions on Graphics"}
+
+MACRO {toms} {"ACM Transactions on Mathematical Software"}
+
+MACRO {toois} {"ACM Transactions on Office Information Systems"}
+
+MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
+
+MACRO {tcs} {"Theoretical Computer Science"}
+
+READ
+
+FUNCTION {sortify}
+{ purify$
+  "l" change.case$
+}
+
+INTEGERS { len }
+
+FUNCTION {chop.word}
+{ 's :=
+  'len :=
+  s #1 len substring$ =
+    { s len #1 + global.max$ substring$ }
+    's
+  if$
+}
+
+FUNCTION {format.lab.names}
+{ 's :=
+  s #1 "{vv~}{ll}" format.name$
+  s num.names$ duplicate$
+  #2 >
+    { pop$ " " * "et~al." emphasize * }
+    { #2 <
+        'skip$
+        { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
+            { " " * "et~al." emphasize * }
+            { " and " * s #2 "{vv~}{ll}" format.name$ * }
+          if$
+        }
+      if$
+    }
+  if$
+}
+
+FUNCTION {author.key.label}
+{ author empty$
+    { key empty$
+        { cite$ #1 #3 substring$ }
+        'key
+      if$
+    }
+    { author format.lab.names }
+  if$
+}
+
+FUNCTION {author.editor.key.label}
+{ author empty$
+    { editor empty$
+        { key empty$
+            { cite$ #1 #3 substring$ }
+            'key
+          if$
+        }
+        { editor format.lab.names }
+      if$
+    }
+    { author format.lab.names }
+  if$
+}
+
+FUNCTION {editor.key.label}
+{ editor empty$
+    { key empty$
+        { cite$ #1 #3 substring$ }
+        'key
+      if$
+    }
+    { editor format.lab.names }
+  if$
+}
+
+FUNCTION {calc.label}
+{ type$ "book" =
+  type$ "inbook" =
+  or
+    'author.editor.key.label
+    { type$ "proceedings" =
+        'editor.key.label
+        'author.key.label
+      if$
+    }
+  if$
+  "("
+  *
+  year duplicate$ empty$
+     { pop$ "????" }
+     { purify$ #-1 #4 substring$ }
+  if$
+  *
+  'label :=
+}
+
+FUNCTION {sort.format.names}
+{ 's :=
+  #1 'nameptr :=
+  ""
+  s num.names$ 'numnames :=
+  numnames 'namesleft :=
+    { namesleft #0 > }
+    { nameptr #1 >
+        { "   " * }
+        'skip$
+      if$
+      s nameptr
+      "{vv{ } }{ll{ }}{  f{ }}{  jj{ }}"
+      format.name$ 't :=
+      nameptr numnames = t "others" = and
+        { "et al" * }
+        { numnames #2 > nameptr #2 = and
+          { "zzzzzz" * #1 'namesleft := }
+          { t sortify * }
+        if$
+        }
+      if$
+      nameptr #1 + 'nameptr :=
+      namesleft #1 - 'namesleft :=
+    }
+  while$
+}
+
+FUNCTION {sort.format.title}
+{ 't :=
+  "A " #2
+    "An " #3
+      "The " #4 t chop.word
+    chop.word
+  chop.word
+  sortify
+  #1 global.max$ substring$
+}
+
+FUNCTION {author.sort}
+{ author empty$
+    { key empty$
+        { "to sort, need author or key in " cite$ * warning$
+          ""
+        }
+        { key sortify }
+      if$
+    }
+    { author sort.format.names }
+  if$
+}
+
+FUNCTION {author.editor.sort}
+{ author empty$
+    { editor empty$
+        { key empty$
+            { "to sort, need author, editor, or key in " cite$ * warning$
+              ""
+            }
+            { key sortify }
+          if$
+        }
+        { editor sort.format.names }
+      if$
+    }
+    { author sort.format.names }
+  if$
+}
+
+FUNCTION {editor.sort}
+{ editor empty$
+    { key empty$
+        { "to sort, need editor or key in " cite$ * warning$
+          ""
+        }
+        { key sortify }
+      if$
+    }
+    { editor sort.format.names }
+  if$
+}
+
+FUNCTION {presort}
+{ calc.label
+  label sortify
+  "    "
+  *
+  type$ "book" =
+  type$ "inbook" =
+  or
+    'author.editor.sort
+    { type$ "proceedings" =
+        'editor.sort
+        'author.sort
+      if$
+    }
+  if$
+  #1 entry.max$ substring$
+  'sort.label :=
+  sort.label
+  *
+  "    "
+  *
+  title field.or.null
+  sort.format.title
+  *
+  #1 entry.max$ substring$
+  'sort.key$ :=
+}
+
+ITERATE {presort}
+
+SORT
+
+STRINGS { last.label next.extra }
+
+INTEGERS { last.extra.num }
+
+FUNCTION {initialize.extra.label.stuff}
+{ #0 int.to.chr$ 'last.label :=
+  "" 'next.extra :=
+  #0 'last.extra.num :=
+}
+
+FUNCTION {forward.pass}
+{ last.label label =
+    { last.extra.num #1 + 'last.extra.num :=
+      last.extra.num int.to.chr$ 'extra.label :=
+    }
+    { "a" chr.to.int$ 'last.extra.num :=
+      "" 'extra.label :=
+      label 'last.label :=
+    }
+  if$
+}
+
+FUNCTION {reverse.pass}
+{ next.extra "b" =
+    { "a" 'extra.label := }
+    'skip$
+  if$
+  extra.label 'next.extra :=
+  label extra.label * 'label :=
+}
+
+EXECUTE {initialize.extra.label.stuff}
+
+ITERATE {forward.pass}
+
+REVERSE {reverse.pass}
+
+FUNCTION {bib.sort.order}
+{ sort.label
+  "    "
+  *
+  year field.or.null sortify
+  *
+  "    "
+  *
+  title field.or.null
+  sort.format.title
+  *
+  #1 entry.max$ substring$
+  'sort.key$ :=
+}
+
+ITERATE {bib.sort.order}
+
+SORT
+
+FUNCTION {begin.bib}
+{ preamble$ empty$
+    'skip$
+    { preamble$ write$ newline$ }
+  if$
+  "\begin{thebibliography}{}" write$ newline$
+}
+
+EXECUTE {begin.bib}
+
+EXECUTE {init.state.consts}
+
+ITERATE {call.type$}
+
+FUNCTION {end.bib}
+{ newline$
+  "\end{thebibliography}" write$ newline$
+}
+
+EXECUTE {end.bib}
+%% End of customized bst file 
+
diff --git a/tex/natbib.sty b/tex/natbib.sty
new file mode 100644
index 0000000..4c8c948
--- /dev/null
+++ b/tex/natbib.sty
@@ -0,0 +1,803 @@
+%%
+%% This is file `natbib.sty',
+%% generated with the docstrip utility.
+%%
+%% The original source files were:
+%%
+%% natbib.dtx  (with options: `package,all')
+%% =============================================
+%% IMPORTANT NOTICE:
+%% 
+%% This program can be redistributed and/or modified under the terms
+%% of the LaTeX Project Public License Distributed from CTAN
+%% archives in directory macros/latex/base/lppl.txt; either
+%% version 1 of the License, or any later version.
+%% 
+%% This is a generated file.
+%% It may not be distributed without the original source file natbib.dtx.
+%% 
+%% Full documentation can be obtained by LaTeXing that original file.
+%% Only a few abbreviated comments remain here to describe the usage.
+%% =============================================
+%% Copyright 1993-2000 Patrick W Daly
+%% Max-Planck-Institut f\"ur Aeronomie
+%% Max-Planck-Str. 2
+%% D-37191 Katlenburg-Lindau
+%% Germany
+%% E-mail: daly at linmpi.mpg.de
+\NeedsTeXFormat{LaTeX2e}[1995/06/01]
+\ProvidesPackage{natbib}
+        [2000/07/24 7.0a (PWD)]
+ % This package reimplements the LaTeX \cite command to be used for various
+ % citation styles, both author-year and numerical. It accepts BibTeX
+ % output intended for many other packages, and therefore acts as a
+ % general, all-purpose citation-style interface.
+ %
+ % With standard numerical .bst files, only numerical citations are
+ % possible. With an author-year .bst file, both numerical and
+ % author-year citations are possible.
+ %
+ % If author-year citations are selected, \bibitem must have one of the
+ %   following forms:
+ %   \bibitem[Jones et al.(1990)]{key}...
+ %   \bibitem[Jones et al.(1990)Jones, Baker, and Williams]{key}...
+ %   \bibitem[Jones et al., 1990]{key}...
+ %   \bibitem[\protect\citeauthoryear{Jones, Baker, and Williams}{Jones
+ %       et al.}{1990}]{key}...
+ %   \bibitem[\protect\citeauthoryear{Jones et al.}{1990}]{key}...
+ %   \bibitem[\protect\astroncite{Jones et al.}{1990}]{key}...
+ %   \bibitem[\protect\citename{Jones et al., }1990]{key}...
+ %   \harvarditem[Jones et al.]{Jones, Baker, and Williams}{1990}{key}...
+ %
+ % This is either to be made up manually, or to be generated by an
+ % appropriate .bst file with BibTeX.
+ %                            Author-year mode     ||   Numerical mode
+ % Then, \citet{key}  ==>>  Jones et al. (1990)    ||   Jones et al. [21]
+ %       \citep{key}  ==>> (Jones et al., 1990)    ||   [21]
+ % Multiple citations as normal:
+ % \citep{key1,key2}  ==>> (Jones et al., 1990; Smith, 1989) || [21,24]
+ %                           or  (Jones et al., 1990, 1991)  || [21,24]
+ %                           or  (Jones et al., 1990a,b)     || [21,24]
+ % \cite{key} is the equivalent of \citet{key} in author-year mode
+ %                         and  of \citep{key} in numerical mode
+ % Full author lists may be forced with \citet* or \citep*, e.g.
+ %       \citep*{key}      ==>> (Jones, Baker, and Williams, 1990)
+ % Optional notes as:
+ %   \citep[chap. 2]{key}    ==>> (Jones et al., 1990, chap. 2)
+ %   \citep[e.g.,][]{key}    ==>> (e.g., Jones et al., 1990)
+ %   \citep[see][pg. 34]{key}==>> (see Jones et al., 1990, pg. 34)
+ %  (Note: in standard LaTeX, only one note is allowed, after the ref.
+ %   Here, one note is like the standard, two make pre- and post-notes.)
+ %   \citealt{key}          ==>> Jones et al. 1990
+ %   \citealt*{key}         ==>> Jones, Baker, and Williams 1990
+ %   \citealp{key}          ==>> Jones et al., 1990
+ %   \citealp*{key}         ==>> Jones, Baker, and Williams, 1990
+ % Additional citation possibilities (both author-year and numerical modes)
+ %   \citeauthor{key}       ==>> Jones et al.
+ %   \citeauthor*{key}      ==>> Jones, Baker, and Williams
+ %   \citeyear{key}         ==>> 1990
+ %   \citeyearpar{key}      ==>> (1990)
+ %   \citetext{priv. comm.} ==>> (priv. comm.)
+ % Note: full author lists depends on whether the bib style supports them;
+ %       if not, the abbreviated list is printed even when full requested.
+ %
+ % For names like della Robbia at the start of a sentence, use
+ %   \Citet{dRob98}         ==>> Della Robbia (1998)
+ %   \Citep{dRob98}         ==>> (Della Robbia, 1998)
+ %   \Citeauthor{dRob98}    ==>> Della Robbia
+ %
+ %
+ % Citation aliasing is achieved with
+ %   \defcitealias{key}{text}
+ %   \citetalias{key}  ==>> text
+ %   \citepalias{key}  ==>> (text)
+ %
+ % Defining the citation style of a given bib style:
+ % Use \bibpunct (in the preamble only) with 6 mandatory arguments:
+ %    1. opening bracket for citation
+ %    2. closing bracket
+ %    3. citation separator (for multiple citations in one \cite)
+ %    4. the letter n for numerical styles, s for superscripts
+ %        else anything for author-year
+ %    5. punctuation between authors and date
+ %    6. punctuation between years (or numbers) when common authors missing
+ % One optional argument is the character coming before post-notes. It
+ %   appears in square braces before all other arguments. May be left off.
+ % Example (and default) \bibpunct[, ]{(}{)}{;}{a}{,}{,}
+ %
+ % To make this automatic for a given bib style, named newbib, say, make
+ % a local configuration file, natbib.cfg, with the definition
+ %   \newcommand{\bibstyle at newbib}{\bibpunct...}
+ % Then the \bibliographystyle{newbib} will cause \bibstyle at newbib to
+ % be called on THE NEXT LATEX RUN (via the aux file).
+ %
+ % Such preprogrammed definitions may be invoked in the text (preamble only)
+ %  by calling \citestyle{newbib}. This is only useful if the style specified
+ %  differs from that in \bibliographystyle.
+ %
+ % With \citeindextrue and \citeindexfalse, one can control whether the
+ % \cite commands make an automatic entry of the citation in the .idx
+ % indexing file. For this, \makeindex must also be given in the preamble.
+ %
+ % LaTeX2e Options: (for selecting punctuation)
+ %   round  -  round parentheses are used (default)
+ %   square -  square brackets are used   [option]
+ %   curly  -  curly braces are used      {option}
+ %   angle  -  angle brackets are used    <option>
+ %   colon  -  multiple citations separated by colon (default)
+ %   comma  -  separated by comma
+ %   authoryear - selects author-year citations (default)
+ %   numbers-  selects numerical citations
+ %   super  -  numerical citations as superscripts
+ %   sort   -  sorts multiple citations according to order in ref. list
+ %   sort&compress   -  like sort, but also compresses numerical citations
+ %   longnamesfirst  -  makes first citation full author list
+ %   sectionbib - puts bibliography in a \section* instead of \chapter*
+ % Punctuation so selected dominates over any predefined ones.
+ % LaTeX2e options are called as, e.g.
+ %        \usepackage[square,comma]{natbib}
+ % LaTeX the source file natbib.dtx to obtain more details
+ % or the file natnotes.tex for a brief reference sheet.
+ %-----------------------------------------------------------
+\@ifclassloaded{aguplus}{\PackageError{natbib}
+  {The aguplus class already includes natbib coding,\MessageBreak
+   so you should not add it explicitly}
+  {Type <Return> for now, but then later remove\MessageBreak
+   the command \protect\usepackage{natbib} from the document}
+  \endinput}{}
+\@ifclassloaded{nlinproc}{\PackageError{natbib}
+  {The nlinproc class already includes natbib coding,\MessageBreak
+   so you should not add it explicitly}
+  {Type <Return> for now, but then later remove\MessageBreak
+   the command \protect\usepackage{natbib} from the document}
+  \endinput}{}
+\@ifclassloaded{egs}{\PackageError{natbib}
+  {The egs class already includes natbib coding,\MessageBreak
+   so you should not add it explicitly}
+  {Type <Return> for now, but then later remove\MessageBreak
+   the command \protect\usepackage{natbib} from the document}
+  \endinput}{}
+ % Define citation punctuation for some author-year styles
+ % One may add and delete at this point
+ % Or put additions into local configuration file natbib.cfg
+\newcommand\bibstyle at chicago{\bibpunct{(}{)}{;}{a}{,}{,}}
+\newcommand\bibstyle at named{\bibpunct{[}{]}{;}{a}{,}{,}}
+\newcommand\bibstyle at agu{\bibpunct{[}{]}{;}{a}{,}{,~}}%Amer. Geophys. Union
+\newcommand\bibstyle at egs{\bibpunct{(}{)}{;}{a}{,}{,}}%Eur. Geophys. Soc.
+\newcommand\bibstyle at agsm{\bibpunct{(}{)}{,}{a}{}{,}\gdef\harvardand{\&}}
+\newcommand\bibstyle at kluwer{\bibpunct{(}{)}{,}{a}{}{,}\gdef\harvardand{\&}}
+\newcommand\bibstyle at dcu{\bibpunct{(}{)}{;}{a}{;}{,}\gdef\harvardand{and}}
+\newcommand\bibstyle at aa{\bibpunct{(}{)}{;}{a}{}{,}} %Astronomy & Astrophysics
+\newcommand\bibstyle at pass{\bibpunct{(}{)}{;}{a}{,}{,}}%Planet. & Space Sci
+\newcommand\bibstyle at anngeo{\bibpunct{(}{)}{;}{a}{,}{,}}%Annales Geophysicae
+\newcommand\bibstyle at nlinproc{\bibpunct{(}{)}{;}{a}{,}{,}}%Nonlin.Proc.Geophys.
+ % Define citation punctuation for some numerical styles
+\newcommand\bibstyle at cospar{\bibpunct{/}{/}{,}{n}{}{}%
+     \gdef\NAT at biblabelnum##1{##1.}}
+\newcommand\bibstyle at esa{\bibpunct{(Ref.~}{)}{,}{n}{}{}%
+     \gdef\NAT at biblabelnum##1{##1.\hspace{1em}}}
+\newcommand\bibstyle at nature{\bibpunct{}{}{,}{s}{}{\textsuperscript{,}}%
+     \gdef\NAT at biblabelnum##1{##1.}}
+ % The standard LaTeX styles
+\newcommand\bibstyle at plain{\bibpunct{[}{]}{,}{n}{}{,}}
+\let\bibstyle at alpha=\bibstyle at plain
+\let\bibstyle at abbrv=\bibstyle at plain
+\let\bibstyle at unsrt=\bibstyle at plain
+ % The author-year modifications of the standard styles
+\newcommand\bibstyle at plainnat{\bibpunct{[}{]}{,}{a}{,}{,}}
+\let\bibstyle at abbrvnat=\bibstyle at plainnat
+\let\bibstyle at unsrtnat=\bibstyle at plainnat
+\newif\ifNAT at numbers \NAT at numbersfalse
+\newif\ifNAT at super \NAT at superfalse
+\DeclareOption{numbers}{\NAT at numberstrue
+   \ExecuteOptions{square,comma,nobibstyle}}
+\DeclareOption{super}{\NAT at supertrue\NAT at numberstrue
+   \renewcommand\NAT at open{}\renewcommand\NAT at close{}
+   \ExecuteOptions{nobibstyle}}
+\DeclareOption{authoryear}{\NAT at numbersfalse
+   \ExecuteOptions{round,colon,bibstyle}}
+\DeclareOption{round}{%
+      \renewcommand\NAT at open{(} \renewcommand\NAT at close{)}
+   \ExecuteOptions{nobibstyle}}
+\DeclareOption{square}{%
+      \renewcommand\NAT at open{[} \renewcommand\NAT at close{]}
+   \ExecuteOptions{nobibstyle}}
+\DeclareOption{angle}{%
+      \renewcommand\NAT at open{$<$} \renewcommand\NAT at close{$>$}
+   \ExecuteOptions{nobibstyle}}
+\DeclareOption{curly}{%
+      \renewcommand\NAT at open{\{} \renewcommand\NAT at close{\}}
+   \ExecuteOptions{nobibstyle}}
+\DeclareOption{comma}{\renewcommand\NAT at sep{,}
+   \ExecuteOptions{nobibstyle}}
+\DeclareOption{colon}{\renewcommand\NAT at sep{;}
+   \ExecuteOptions{nobibstyle}}
+\DeclareOption{nobibstyle}{\let\bibstyle=\@gobble}
+\DeclareOption{bibstyle}{\let\bibstyle=\@citestyle}
+\newif\ifNAT at openbib \NAT at openbibfalse
+\DeclareOption{openbib}{\NAT at openbibtrue}
+\DeclareOption{sectionbib}{\def\NAT at sectionbib{on}}
+\def\NAT at sort{0}
+\DeclareOption{sort}{\def\NAT at sort{1}}
+\DeclareOption{sort&compress}{\def\NAT at sort{2}}
+\@ifpackageloaded{cite}{\PackageWarningNoLine{natbib}
+  {The `cite' package should not be used\MessageBreak
+   with natbib. Use option `sort' instead}\ExecuteOptions{sort}}{}
+\newif\ifNAT at longnames\NAT at longnamesfalse
+\DeclareOption{longnamesfirst}{\NAT at longnamestrue}
+\DeclareOption{nonamebreak}{\def\NAT at nmfmt#1{\mbox{\NAT at up#1}}}
+\def\NAT at nmfmt#1{{\NAT at up#1}}
+\renewcommand\bibstyle[1]{\@ifundefined{bibstyle@#1}{\relax}
+     {\csname bibstyle@#1\endcsname}}
+\AtBeginDocument{\global\let\bibstyle=\@gobble}
+\let\@citestyle\bibstyle
+\newcommand\citestyle[1]{\@citestyle{#1}\let\bibstyle\@gobble}
+\@onlypreamble{\citestyle}\@onlypreamble{\@citestyle}
+\newcommand\bibpunct[7][, ]%
+  {\gdef\NAT at open{#2}\gdef\NAT at close{#3}\gdef
+   \NAT at sep{#4}\global\NAT at numbersfalse\ifx #5n\global\NAT at numberstrue
+   \else
+   \ifx #5s\global\NAT at numberstrue\global\NAT at supertrue
+   \fi\fi
+   \gdef\NAT at aysep{#6}\gdef\NAT at yrsep{#7}%
+   \gdef\NAT at cmt{#1}%
+   \global\let\bibstyle\@gobble
+  }
+\@onlypreamble{\bibpunct}
+\newcommand\NAT at open{(} \newcommand\NAT at close{)}
+\newcommand\NAT at sep{;}
+\ProcessOptions
+\newcommand\NAT at aysep{,} \newcommand\NAT at yrsep{,}
+\newcommand\NAT at cmt{, }
+\newcommand\NAT at cite%
+    [3]{\ifNAT at swa\NAT@@open\if*#2*\else#2\ \fi
+        #1\if*#3*\else\NAT at cmt#3\fi\NAT@@close\else#1\fi\endgroup}
+\newcommand\NAT at citenum%
+    [3]{\ifNAT at swa\NAT@@open\if*#2*\else#2\ \fi
+        #1\if*#3*\else\NAT at cmt#3\fi\NAT@@close\else#1\fi\endgroup}
+\newcommand\NAT at citesuper[3]{\ifNAT at swa
+\unskip\hspace{1\p@}\textsuperscript{#1}%
+   \if*#3*\else\ (#3)\fi\else #1\fi\endgroup}
+\providecommand
+  \textsuperscript[1]{\mbox{$^{\mbox{\scriptsize#1}}$}}
+\providecommand\@firstofone[1]{#1}
+\newcommand\NAT at citexnum{}
+\def\NAT at citexnum[#1][#2]#3{%
+ \NAT at sort@cites{#3}%
+ \let\@citea\@empty
+  \@cite{\def\NAT at num{-1}\let\NAT at last@yr\relax\let\NAT at nm\@empty
+    \@for\@citeb:=\NAT at cite@list\do
+    {\edef\@citeb{\expandafter\@firstofone\@citeb}%
+     \if at filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
+     \@ifundefined{b@\@citeb\@extra at b@citeb}{%
+       {\reset at font\bfseries?}
+        \NAT at citeundefined\PackageWarning{natbib}%
+       {Citation `\@citeb' on page \thepage \space undefined}}%
+     {\let\NAT at last@num\NAT at num\let\NAT at last@nm\NAT at nm
+      \NAT at parse{\@citeb}%
+      \ifNAT at longnames\@ifundefined{bv@\@citeb\@extra at b@citeb}{%
+        \let\NAT at name=\NAT at all@names
+        \global\@namedef{bv@\@citeb\@extra at b@citeb}{}}{}%
+      \fi
+      \ifNAT at full\let\NAT at nm\NAT at all@names\else
+        \let\NAT at nm\NAT at name\fi
+      \ifNAT at swa
+       \ifnum\NAT at ctype>1\relax\@citea
+        \hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+            \ifnum\NAT at ctype=2\relax\NAT at test{\NAT at ctype}%
+            \else\NAT at alias
+            \fi\hyper at natlinkend\else
+       \ifnum\NAT at sort>1
+         \begingroup\catcode`\_=8
+            \ifcat _\ifnum\z@<0\NAT at num _\else A\fi
+              \global\let\NAT at nm=\NAT at num \else \gdef\NAT at nm{-2}\fi
+            \ifcat _\ifnum\z@<0\NAT at last@num _\else A\fi
+              \global\@tempcnta=\NAT at last@num \global\advance\@tempcnta by\@ne
+              \else \global\@tempcnta\m at ne\fi
+         \endgroup
+         \ifnum\NAT at nm=\@tempcnta
+           \ifx\NAT at last@yr\relax
+             \edef\NAT at last@yr{\@citea \mbox{\noexpand\citenumfont{\NAT at num}}}%
+           \else
+             \edef\NAT at last@yr{--\penalty\@m\mbox{\noexpand\citenumfont{\NAT at num}}}%
+           \fi
+         \else
+           \NAT at last@yr \@citea \mbox{\citenumfont{\NAT at num}}%
+           \let\NAT at last@yr\relax
+         \fi
+       \else
+         \@citea \mbox{\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+           {\citenumfont{\NAT at num}}\hyper at natlinkend}%
+       \fi
+       \fi
+       \def\@citea{\NAT at sep\penalty\@m\NAT at space}%
+      \else
+        \ifcase\NAT at ctype\relax
+          \ifx\NAT at last@nm\NAT at nm \NAT at yrsep\penalty\@m\NAT at space\else
+          \@citea \NAT at test{1}\ \NAT@@open
+          \if*#1*\else#1\ \fi\fi \NAT at mbox{%
+          \hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+          {\citenumfont{\NAT at num}}\hyper at natlinkend}%
+          \def\@citea{\NAT@@close\NAT at sep\penalty\@m\ }%
+        \or\@citea
+          \hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+           \NAT at test{\NAT at ctype}\hyper at natlinkend
+          \def\@citea{\NAT at sep\penalty\@m\ }%
+        \or\@citea
+          \hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+           \NAT at test{\NAT at ctype}\hyper at natlinkend
+          \def\@citea{\NAT at sep\penalty\@m\ }%
+        \or\@citea
+          \hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+           \NAT at alias\hyper at natlinkend
+          \def\@citea{\NAT at sep\penalty\@m\ }%
+        \fi
+      \fi
+      }}%
+      \ifnum\NAT at sort>1\relax\NAT at last@yr\fi
+      \ifNAT at swa\else\ifnum\NAT at ctype=0\if*#2*\else
+      \NAT at cmt#2\fi \NAT@@close\fi\fi}{#1}{#2}}
+\newcommand\NAT at test[1]{\ifnum#1=1 \ifx\NAT at nm\NAT at noname
+  {\reset at font\bfseries(author?)}\PackageWarning{natbib}
+  {Author undefined for citation`\@citeb'
+   \MessageBreak
+   on page \thepage}\else \NAT at nm \fi
+  \else \if\relax\NAT at date\relax
+  {\reset at font\bfseries(year?)}\PackageWarning{natbib}
+  {Year undefined for citation`\@citeb'
+   \MessageBreak
+   on page \thepage}\else \NAT at date \fi \fi}
+\let\citenumfont=\relax
+\newcommand\NAT at citex{}
+\def\NAT at citex%
+  [#1][#2]#3{%
+  \NAT at sort@cites{#3}%
+  \let\@citea\@empty
+  \@cite{\let\NAT at nm\@empty\let\NAT at year\@empty
+    \@for\@citeb:=\NAT at cite@list\do
+    {\edef\@citeb{\expandafter\@firstofone\@citeb}%
+     \if at filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
+     \@ifundefined{b@\@citeb\@extra at b@citeb}{\@citea%
+       {\reset at font\bfseries ?}\NAT at citeundefined
+                 \PackageWarning{natbib}%
+       {Citation `\@citeb' on page \thepage \space undefined}\def\NAT at date{}}%
+     {\let\NAT at last@nm=\NAT at nm\let\NAT at last@yr=\NAT at year
+     \NAT at parse{\@citeb}%
+      \ifNAT at longnames\@ifundefined{bv@\@citeb\@extra at b@citeb}{%
+        \let\NAT at name=\NAT at all@names
+        \global\@namedef{bv@\@citeb\@extra at b@citeb}{}}{}%
+      \fi
+     \ifNAT at full\let\NAT at nm\NAT at all@names\else
+       \let\NAT at nm\NAT at name\fi
+     \ifNAT at swa\ifcase\NAT at ctype
+       \if\relax\NAT at date\relax
+         \@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+         \NAT at nmfmt{\NAT at nm}\NAT at date\hyper at natlinkend
+       \else
+         \ifx\NAT at last@nm\NAT at nm\NAT at yrsep
+            \ifx\NAT at last@yr\NAT at year
+              \hyper at natlinkstart{\@citeb\@extra at b@citeb}\NAT at exlab
+              \hyper at natlinkend
+            \else\unskip\
+              \hyper at natlinkstart{\@citeb\@extra at b@citeb}\NAT at date
+              \hyper at natlinkend
+            \fi
+         \else\@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+           \NAT at nmfmt{\NAT at nm}%
+           \hyper at natlinkbreak{\NAT at aysep\ }{\@citeb\@extra at b@citeb}%
+           \NAT at date\hyper at natlinkend
+         \fi
+       \fi
+     \or\@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+         \NAT at nmfmt{\NAT at nm}\hyper at natlinkend
+     \or\@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+         \NAT at date\hyper at natlinkend
+     \or\@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+         \NAT at alias\hyper at natlinkend
+     \fi \def\@citea{\NAT at sep\ }%
+     \else\ifcase\NAT at ctype
+        \if\relax\NAT at date\relax
+          \@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+          \NAT at nmfmt{\NAT at nm}\hyper at natlinkend
+        \else
+         \ifx\NAT at last@nm\NAT at nm\NAT at yrsep
+            \ifx\NAT at last@yr\NAT at year
+              \hyper at natlinkstart{\@citeb\@extra at b@citeb}\NAT at exlab
+              \hyper at natlinkend
+            \else\unskip\
+              \hyper at natlinkstart{\@citeb\@extra at b@citeb}\NAT at date
+              \hyper at natlinkend
+            \fi
+         \else\@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+           \NAT at nmfmt{\NAT at nm}%
+           \hyper at natlinkbreak{\ \NAT@@open\if*#1*\else#1\ \fi}%
+              {\@citeb\@extra at b@citeb}%
+           \NAT at date\hyper at natlinkend\fi
+        \fi
+       \or\@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+         \NAT at nmfmt{\NAT at nm}\hyper at natlinkend
+       \or\@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+         \NAT at date\hyper at natlinkend
+       \or\@citea\hyper at natlinkstart{\@citeb\@extra at b@citeb}%
+         \NAT at alias\hyper at natlinkend
+       \fi \if\relax\NAT at date\relax\def\@citea{\NAT at sep\ }%
+           \else\def\@citea{\NAT@@close\NAT at sep\ }\fi
+     \fi
+     }}\ifNAT at swa\else\if*#2*\else\NAT at cmt#2\fi
+     \if\relax\NAT at date\relax\else\NAT@@close\fi\fi}{#1}{#2}}
+\newif\ifNAT at par \NAT at partrue
+\newcommand\NAT@@open{\ifNAT at par\NAT at open\fi}
+\newcommand\NAT@@close{\ifNAT at par\NAT at close\fi}
+\newcommand\NAT at alias{\@ifundefined{al@\@citeb\@extra at b@citeb}{%
+  {\reset at font\bfseries(alias?)}\PackageWarning{natbib}
+  {Alias undefined for citation `\@citeb'
+  \MessageBreak on page \thepage}}{\@nameuse{al@\@citeb\@extra at b@citeb}}}
+\let\NAT at up\relax
+\newcommand\NAT at Up[1]{{\let\protect\@unexpandable at protect\let~\relax
+  \expandafter\NAT at deftemp#1}\expandafter\NAT at UP\NAT at temp}
+\newcommand\NAT at deftemp[1]{\xdef\NAT at temp{#1}}
+\newcommand\NAT at UP[1]{\let\@tempa\NAT at UP\ifcat a#1\MakeUppercase{#1}%
+  \let\@tempa\relax\else#1\fi\@tempa}
+\newcommand\shortcites[1]{%
+  \@bsphack\@for\@citeb:=#1\do
+  {\edef\@citeb{\expandafter\@firstofone\@citeb}%
+   \global\@namedef{bv@\@citeb\@extra at b@citeb}{}}\@esphack}
+\newcommand\NAT at biblabel[1]{\hfill}
+\newcommand\NAT at biblabelnum[1]{\bibnumfmt{#1}}
+\newcommand\bibnumfmt[1]{[#1]}
+\def\@tempa#1{[#1]}
+\ifx\@tempa\@biblabel\let\@biblabel\@empty\fi
+\newcommand\NAT at bibsetnum[1]{\settowidth\labelwidth{\@biblabel{#1}}%
+   \setlength{\leftmargin}{\labelwidth}\addtolength{\leftmargin}{\labelsep}%
+   \setlength{\itemsep}{\bibsep}\setlength{\parsep}{\z@}%
+   \ifNAT at openbib
+     \addtolength{\leftmargin}{4mm}%
+     \setlength{\itemindent}{-4mm}%
+     \setlength{\listparindent}{\itemindent}%
+     \setlength{\parsep}{0pt}%
+   \fi
+}
+\newlength{\bibhang}
+\setlength{\bibhang}{1em}
+\newlength{\bibsep}
+{\@listi \global\bibsep\itemsep \global\advance\bibsep by\parsep}
+
+\newcommand\NAT at bibsetup%
+   [1]{\setlength{\leftmargin}{\bibhang}\setlength{\itemindent}{-\leftmargin}%
+       \setlength{\itemsep}{\bibsep}\setlength{\parsep}{\z@}}
+\newcommand\NAT at set@cites{\ifNAT at numbers
+  \ifNAT at super \let\@cite\NAT at citesuper
+     \def\NAT at mbox##1{\unskip\nobreak\hspace{1\p@}\textsuperscript{##1}}%
+     \let\citeyearpar=\citeyear
+     \let\NAT at space\relax\else
+     \let\NAT at mbox=\mbox
+     \let\@cite\NAT at citenum \def\NAT at space{ }\fi
+  \let\@citex\NAT at citexnum
+  \ifx\@biblabel\@empty\let\@biblabel\NAT at biblabelnum\fi
+  \let\@bibsetup\NAT at bibsetnum
+  \def\natexlab##1{}%
+ \else
+  \let\@cite\NAT at cite
+  \let\@citex\NAT at citex
+  \let\@biblabel\NAT at biblabel
+  \let\@bibsetup\NAT at bibsetup
+  \def\natexlab##1{##1}%
+ \fi}
+\AtBeginDocument{\NAT at set@cites}
+\AtBeginDocument{\ifx\SK at def\@undefined\else
+\ifx\SK at cite\@empty\else
+  \SK at def\@citex[#1][#2]#3{\SK@\SK@@ref{#3}\SK@@citex[#1][#2]{#3}}\fi
+\ifx\SK at citeauthor\@undefined\def\HAR at checkdef{}\else
+  \let\citeauthor\SK at citeauthor
+  \let\citefullauthor\SK at citefullauthor
+  \let\citeyear\SK at citeyear\fi
+\fi}
+\AtBeginDocument{\@ifpackageloaded{hyperref}{%
+  \ifnum\NAT at sort=2\def\NAT at sort{1}\fi}{}}
+\newif\ifNAT at full\NAT at fullfalse
+\newif\ifNAT at swa
+\DeclareRobustCommand\citet
+   {\begingroup\NAT at swafalse\def\NAT at ctype{0}\NAT at partrue
+     \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\newcommand\NAT at citetp{\@ifnextchar[{\NAT@@citetp}{\NAT@@citetp[]}}
+\newcommand\NAT@@citetp{}
+\def\NAT@@citetp[#1]{\@ifnextchar[{\@citex[#1]}{\@citex[][#1]}}
+\DeclareRobustCommand\citep
+   {\begingroup\NAT at swatrue\def\NAT at ctype{0}\NAT at partrue
+         \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\cite
+    {\begingroup\def\NAT at ctype{0}\NAT at partrue\NAT at swatrue
+      \@ifstar{\NAT at fulltrue\NAT at cites}{\NAT at fullfalse\NAT at cites}}
+\newcommand\NAT at cites{\@ifnextchar [{\NAT@@citetp}{%
+     \ifNAT at numbers\else
+     \NAT at swafalse
+     \fi
+    \NAT@@citetp[]}}
+\DeclareRobustCommand\citealt
+   {\begingroup\NAT at swafalse\def\NAT at ctype{0}\NAT at parfalse
+         \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\citealp
+   {\begingroup\NAT at swatrue\def\NAT at ctype{0}\NAT at parfalse
+         \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\citeauthor
+   {\begingroup\NAT at swafalse\def\NAT at ctype{1}\NAT at parfalse
+    \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\Citet
+   {\begingroup\NAT at swafalse\def\NAT at ctype{0}\NAT at partrue
+     \let\NAT at up\NAT at Up
+     \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\Citep
+   {\begingroup\NAT at swatrue\def\NAT at ctype{0}\NAT at partrue
+     \let\NAT at up\NAT at Up
+         \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\Citealt
+   {\begingroup\NAT at swafalse\def\NAT at ctype{0}\NAT at parfalse
+     \let\NAT at up\NAT at Up
+         \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\Citealp
+   {\begingroup\NAT at swatrue\def\NAT at ctype{0}\NAT at parfalse
+     \let\NAT at up\NAT at Up
+         \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\Citeauthor
+   {\begingroup\NAT at swafalse\def\NAT at ctype{1}\NAT at parfalse
+     \let\NAT at up\NAT at Up
+    \@ifstar{\NAT at fulltrue\NAT at citetp}{\NAT at fullfalse\NAT at citetp}}
+\DeclareRobustCommand\citeyear
+   {\begingroup\NAT at swafalse\def\NAT at ctype{2}\NAT at parfalse\NAT at citetp}
+\DeclareRobustCommand\citeyearpar
+   {\begingroup\NAT at swatrue\def\NAT at ctype{2}\NAT at partrue\NAT at citetp}
+\newcommand\citetext[1]{\NAT at open#1\NAT at close}
+\DeclareRobustCommand\citefullauthor
+   {\citeauthor*}
+\newcommand\defcitealias[2]{%
+   \@ifundefined{al@#1\@extra at b@citeb}{}
+   {\PackageWarning{natbib}{Overwriting existing alias for citation #1}}
+   \@namedef{al@#1\@extra at b@citeb}{#2}}
+\DeclareRobustCommand\citetalias{\begingroup
+   \NAT at swafalse\def\NAT at ctype{3}\NAT at parfalse\NAT at citetp}
+\DeclareRobustCommand\citepalias{\begingroup
+   \NAT at swatrue\def\NAT at ctype{3}\NAT at partrue\NAT at citetp}
+\renewcommand\nocite[1]{\@bsphack
+  \@for\@citeb:=#1\do{%
+    \edef\@citeb{\expandafter\@firstofone\@citeb}%
+    \if at filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
+    \if*\@citeb\else
+    \@ifundefined{b@\@citeb\@extra at b@citeb}{%
+       \NAT at citeundefined \PackageWarning{natbib}%
+       {Citation `\@citeb' undefined}}{}\fi}%
+  \@esphack}
+\newcommand\NAT at parse[1]{{%
+     \let\protect=\@unexpandable at protect\let~\relax
+     \let\active at prefix=\@gobble
+     \xdef\NAT at temp{\csname b@#1\@extra at b@citeb\endcsname}}%
+     \expandafter\NAT at split\NAT at temp
+     \expandafter\NAT at parse@date\NAT at date??????@@%
+     \ifciteindex\NAT at index\fi
+}
+\newcommand\NAT at split[4]{%
+  \gdef\NAT at num{#1}\gdef\NAT at name{#3}\gdef\NAT at date{#2}%
+  \gdef\NAT at all@names{#4}%
+  \ifx\NAT at noname\NAT at all@names \gdef\NAT at all@names{#3}\fi}
+\newcommand\NAT at parse@date{}
+\def\NAT at parse@date#1#2#3#4#5#6@@{%
+  \ifnum\the\catcode`#1=11\def\NAT at year{}\def\NAT at exlab{#1}\else
+  \ifnum\the\catcode`#2=11\def\NAT at year{#1}\def\NAT at exlab{#2}\else
+  \ifnum\the\catcode`#3=11\def\NAT at year{#1#2}\def\NAT at exlab{#3}\else
+  \ifnum\the\catcode`#4=11\def\NAT at year{#1#2#3}\def\NAT at exlab{#4}\else
+    \def\NAT at year{#1#2#3#4}\def\NAT at exlab{{#5}}\fi\fi\fi\fi}
+\newcommand\NAT at index{}
+\let\NAT at makeindex=\makeindex
+\renewcommand\makeindex{\NAT at makeindex
+  \renewcommand\NAT at index{\@bsphack\begingroup
+     \def~{\string~}\@wrindex{\NAT at idxtxt}}}
+\newcommand\NAT at idxtxt{\NAT at name\ \NAT at open\NAT at date\NAT at close}
+\@ifundefined{@indexfile}{}{\let\NAT at makeindex\relax\makeindex}
+\newif\ifciteindex \citeindexfalse
+\newcommand\citeindextype{default}
+\newcommand\NAT at index@alt{{\let\protect=\noexpand\let~\relax
+  \xdef\NAT at temp{\NAT at idxtxt}}\expandafter\NAT at exp\NAT at temp\@nil}
+\newcommand\NAT at exp{}
+\def\NAT at exp#1\@nil{\mbox{}\index[\citeindextype]{#1}}
+
+\AtBeginDocument{%
+\@ifpackageloaded{index}{\let\NAT at index=\NAT at index@alt}{}}
+\newcommand\NAT at ifcmd{\futurelet\NAT at temp\NAT at ifxcmd}
+\newcommand\NAT at ifxcmd{\ifx\NAT at temp\relax\else\expandafter\NAT at bare\fi}
+\def\NAT at bare#1(#2)#3(@)#4\@nil#5{%
+  \if @#2
+  \expandafter\NAT at apalk#1, , \@nil{#5}\else
+  \stepcounter{NAT at ctr}%
+  \NAT at wrout{\arabic {NAT at ctr}}{#2}{#1}{#3}{#5}
+\fi
+}
+\newcommand\NAT at wrout[5]{%
+\if at filesw
+      {\let\protect\noexpand\let~\relax
+       \immediate
+       \write\@auxout{\string\bibcite{#5}{{#1}{#2}{{#3}}{{#4}}}}}\fi
+\ignorespaces}
+\def\NAT at noname{{}}
+\renewcommand\bibitem{%
+  \@ifnextchar[{\@lbibitem}{%
+    \global\NAT at stdbsttrue
+    \stepcounter{NAT at ctr}\@lbibitem[\arabic{NAT at ctr}]}}
+\def\@lbibitem[#1]#2{%
+  \if\relax\@extra at b@citeb\relax\else
+    \@ifundefined{br@#2\@extra at b@citeb}{}{%
+     \@namedef{br@#2}{\@nameuse{br@#2\@extra at b@citeb}}}\fi
+   \@ifundefined{b@#2\@extra at b@citeb}{\def\NAT at num{}}{\NAT at parse{#2}}%
+   \item[\hfil\hyper at natanchorstart{#2\@extra at b@citeb}\@biblabel{\NAT at num}%
+    \hyper at natanchorend]%
+    \NAT at ifcmd#1(@)(@)\@nil{#2}}
+\ifx\SK at lbibitem\@undefined\else
+   \let\SK at lbibitem\@lbibitem
+   \def\@lbibitem[#1]#2{%
+     \SK at lbibitem[#1]{#2}\SK@\SK@@label{#2}\ignorespaces}\fi
+\newif\ifNAT at stdbst \NAT at stdbstfalse
+
+\AtEndDocument
+  {\ifNAT at stdbst\if at filesw\immediate\write\@auxout{\string
+   \global\string\NAT at numberstrue}\fi\fi
+  }
+\providecommand\bibcite{}
+\renewcommand\bibcite[2]{\@ifundefined{b@#1\@extra at binfo}\relax
+     {\NAT at citemultiple
+      \PackageWarningNoLine{natbib}{Citation `#1' multiply defined}}%
+  \global\@namedef{b@#1\@extra at binfo}{#2}}
+\AtEndDocument{\NAT at swatrue\let\bibcite\NAT at testdef}
+\newcommand\NAT at testdef[2]{%
+  \def\NAT at temp{#2}\expandafter \ifx \csname b@#1\@extra at binfo\endcsname
+    \NAT at temp \else \ifNAT at swa \NAT at swafalse
+       \PackageWarningNoLine{natbib}{Citation(s) may have
+          changed.\MessageBreak
+          Rerun to get citations correct}\fi\fi}
+\newcommand\NAT at apalk{}
+\def\NAT at apalk#1, #2, #3\@nil#4{\if\relax#2\relax
+  \global\NAT at stdbsttrue
+  \NAT at wrout{#1}{}{}{}{#4}\else
+  \stepcounter{NAT at ctr}%
+  \NAT at wrout{\arabic {NAT at ctr}}{#2}{#1}{}{#4}\fi}
+\newcommand\citeauthoryear{}
+\def\citeauthoryear#1#2#3(@)(@)\@nil#4{\stepcounter{NAT at ctr}\if\relax#3\relax
+   \NAT at wrout{\arabic {NAT at ctr}}{#2}{#1}{}{#4}\else
+   \NAT at wrout{\arabic {NAT at ctr}}{#3}{#2}{#1}{#4}\fi}
+\newcommand\citestarts{\NAT at open}
+\newcommand\citeends{\NAT at close}
+\newcommand\betweenauthors{and}
+\newcommand\astroncite{}
+\def\astroncite#1#2(@)(@)\@nil#3{\stepcounter{NAT at ctr}\NAT at wrout{\arabic
+{NAT at ctr}}{#2}{#1}{}{#3}}
+\newcommand\citename{}
+\def\citename#1#2(@)(@)\@nil#3{\expandafter\NAT at apalk#1#2, \@nil{#3}}
+\newcommand\harvarditem[4][]%
+    {\if\relax#1\relax\bibitem[#2(#3)]{#4}\else
+        \bibitem[#1(#3)#2]{#4}\fi }
+\newcommand\harvardleft{\NAT at open}
+\newcommand\harvardright{\NAT at close}
+\newcommand\harvardyearleft{\NAT at open}
+\newcommand\harvardyearright{\NAT at close}
+\AtBeginDocument{\providecommand{\harvardand}{and}}
+\newcommand\harvardurl[1]{\textbf{URL:} \textit{#1}}
+\providecommand\bibsection{}
+\@ifundefined{chapter}%
+  {\renewcommand\bibsection{\section*{\refname
+    \@mkboth{\MakeUppercase{\refname}}{\MakeUppercase{\refname}}}}}
+  {\@ifundefined{NAT at sectionbib}%
+    {\renewcommand\bibsection{\chapter*{\bibname
+     \@mkboth{\MakeUppercase{\bibname}}{\MakeUppercase{\bibname}}}}}
+    {\renewcommand\bibsection{\section*{\bibname
+     \ifx\@mkboth\@gobbletwo\else\markright{\MakeUppercase{\bibname}}\fi}}}}
+\@ifclassloaded{amsart}%
+  {\renewcommand\bibsection{\section*{\refname}}}{}
+\@ifclassloaded{amsbook}%
+  {\renewcommand\bibsection{\chapter*{\bibname}}}{}
+\@ifundefined{bib at heading}{}{\let\bibsection\bib at heading}
+\newcounter{NAT at ctr}
+\renewenvironment{thebibliography}[1]{%
+ \bibsection
+ \vspace{1\p@}\parindent \z@\bibpreamble\bibfont\list
+   {\@biblabel{\arabic{NAT at ctr}}}{\@bibsetup{#1}%
+    \setcounter{NAT at ctr}{0}}%
+    \ifNAT at openbib
+      \renewcommand\newblock{\par}
+    \else
+      \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}%
+    \fi
+    \sloppy\clubpenalty4000\widowpenalty4000
+    \sfcode`\.=1000\relax
+    \let\citeN\cite \let\shortcite\cite
+    \let\citeasnoun\cite\fontsize{7}{9}\selectfont
+ }{\def\@noitemerr{%
+  \PackageWarning{natbib}
+     {Empty `thebibliography' environment}}%
+  \endlist\vskip-\lastskip}
+\let\bibfont\relax
+\let\bibpreamble\relax
+\providecommand\reset at font{\relax}
+\providecommand\bibname{Bibliography}
+\providecommand\refname{References}
+\newcommand\NAT at citeundefined{\gdef \NAT at undefined {%
+    \PackageWarningNoLine{natbib}{There were undefined citations}}}
+\let \NAT at undefined \relax
+\newcommand\NAT at citemultiple{\gdef \NAT at multiple {%
+    \PackageWarningNoLine{natbib}{There were multiply defined citations}}}
+\let \NAT at multiple \relax
+\AtEndDocument{\NAT at undefined\NAT at multiple}
+\providecommand\@mkboth[2]{}
+\providecommand\MakeUppercase{\uppercase}
+\providecommand{\@extra at b@citeb}{}
+\gdef\@extra at binfo{}
+\providecommand\hyper at natanchorstart[1]{}
+\providecommand\hyper at natanchorend{}
+\providecommand\hyper at natlinkstart[1]{}
+\providecommand\hyper at natlinkend{}
+\providecommand\hyper at natlinkbreak[2]{#1}
+\@ifundefined{bbl at redefine}{}{%
+ \bbl at redefine\nocite#1{%
+  \@safe at activestrue\org at nocite{#1}\@safe at activesfalse}%
+\bbl at redefine\@lbibitem[#1]#2{%
+  \@safe at activestrue\org@@lbibitem[#1]{#2}\@safe at activesfalse}%
+}
+\AtBeginDocument{\@ifundefined{bbl at redefine}{}{%
+\bbl at redefine\@citex[#1][#2]#3{%
+  \@safe at activestrue\org@@citex[#1][#2]{#3}\@safe at activesfalse}%
+\bbl at redefine\NAT at testdef#1#2{%
+  \@safe at activestrue\org at NAT@testdef{#1}{#2}\@safe at activesfalse}%
+\@ifundefined{org@@lbibitem}{%
+\bbl at redefine\@lbibitem[#1]#2{%
+  \@safe at activestrue\org@@lbibitem[#1]{#2}\@safe at activesfalse}}{}%
+}}
+\ifnum\NAT at sort>0
+\newcommand\NAT at sort@cites[1]{%
+\@tempcntb\m at ne
+\let\@celt\delimiter
+\def\NAT at num@list{}%
+\def\NAT at cite@list{}%
+\def\NAT at nonsort@list{}%
+\@for \@citeb:=#1\do{\NAT at make@cite at list}%
+\edef\NAT at cite@list{\NAT at cite@list\NAT at nonsort@list}%
+\edef\NAT at cite@list{\expandafter\NAT at xcom\NAT at cite@list @@}}
+\begingroup \catcode`\_=8
+\gdef\NAT at make@cite at list{%
+     \edef\@citeb{\expandafter\@firstofone\@citeb}%
+    \@ifundefined{b@\@citeb\@extra at b@citeb}{\def\NAT at num{A}}%
+    {\NAT at parse{\@citeb}}%
+      \ifcat _\ifnum\z@<0\NAT at num _\else A\fi
+       \@tempcnta\NAT at num \relax
+       \ifnum \@tempcnta>\@tempcntb
+          \edef\NAT at num@list{\NAT at num@list \@celt{\NAT at num}}%
+          \edef\NAT at cite@list{\NAT at cite@list\@citeb,}%
+          \@tempcntb\@tempcnta
+       \else
+          \let\NAT@@cite at list=\NAT at cite@list \def\NAT at cite@list{}%
+          \edef\NAT at num@list{\expandafter\NAT at num@celt \NAT at num@list \@gobble @}%
+          {\let\@celt=\NAT at celt\NAT at num@list}%
+       \fi
+    \else
+       \edef\NAT at nonsort@list{\NAT at nonsort@list\@citeb,}%
+ \fi}
+\endgroup
+\def\NAT at celt#1{\ifnum #1<\@tempcnta
+  \xdef\NAT at cite@list{\NAT at cite@list\expandafter\NAT at nextc\NAT@@cite at list @@}%
+  \xdef\NAT@@cite at list{\expandafter\NAT at restc\NAT@@cite at list}%
+ \else
+  \xdef\NAT at cite@list{\NAT at cite@list\@citeb,\NAT@@cite at list}\let\@celt\@gobble%
+ \fi}
+\def\NAT at num@celt#1#2{\ifx \@celt #1%
+     \ifnum #2<\@tempcnta
+        \@celt{#2}%
+        \expandafter\expandafter\expandafter\NAT at num@celt
+     \else
+        \@celt{\number\@tempcnta}\@celt{#2}%
+  \fi\fi}
+\def\NAT at nextc#1,#2@@{#1,}
+\def\NAT at restc#1,#2{#2}
+\def\NAT at xcom#1,@@{#1}
+\else
+ \newcommand\NAT at sort@cites[1]{\edef\NAT at cite@list{#1}}\fi
+\InputIfFileExists{natbib.cfg}
+       {\typeout{Local config file natbib.cfg used}}{}
+%% 
+%% <<<<< End of generated file <<<<<<
+%%
+%% End of file `natbib.sty'.
diff --git a/tex/overhang.pdf b/tex/overhang.pdf
new file mode 100644
index 0000000..d3bfbd0
Binary files /dev/null and b/tex/overhang.pdf differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/miniasm.git