[med-svn] [bio-rainbow] 01/11: Imported Upstream version 2.0.4

Olivier Sallou osallou at debian.org
Mon Aug 17 15:09:15 UTC 2015


This is an automated email from the git hooks/post-receive script.

osallou pushed a commit to branch master
in repository bio-rainbow.

commit 226c6a4fff7ecb03aa91e6f93e65c605f7ffb169
Author: Olivier Sallou <osallou at debian.org>
Date:   Mon Aug 17 13:00:29 2015 +0000

    Imported Upstream version 2.0.4
---
 ._Makefile                         | Bin 0 -> 212 bytes
 ._tags                             | Bin 0 -> 212 bytes
 Makefile                           |  48 +++
 README.txt                         | 119 ++++++
 aln_cigar.h                        | 440 ++++++++++++++++++++
 asm_R2.c                           | 407 ++++++++++++++++++
 asm_R2.h                           | 107 +++++
 bitvec.h                           | 169 ++++++++
 bloom_filter.h                     |  79 ++++
 cluster.c                          | 434 ++++++++++++++++++++
 divide.c                           | 375 +++++++++++++++++
 dna.h                              | 193 +++++++++
 ezmsim.c                           | 770 ++++++++++++++++++++++++++++++++++
 file_reader.c                      | 416 +++++++++++++++++++
 file_reader.h                      | 246 +++++++++++
 hashset.h                          | 513 +++++++++++++++++++++++
 heap.h                             |  95 +++++
 list.h                             | 234 +++++++++++
 main.c                             | 250 +++++++++++
 mergecontig.c                      | 630 ++++++++++++++++++++++++++++
 mergecontig.h                      | 162 ++++++++
 mergectg.c                         | 654 +++++++++++++++++++++++++++++
 mergectg.h                         | 139 +++++++
 mergetag.c                         | 203 +++++++++
 rainbow.h                          | 127 ++++++
 rbasm_main.c                       |  41 ++
 select_all_rbcontig.pl             |  28 ++
 select_best_rbcontig.pl            |  34 ++
 select_best_rbcontig_plus_read1.pl |  88 ++++
 select_sec_rbcontig.pl             |  49 +++
 simp_asm.h                         | 287 +++++++++++++
 sort.h                             | 259 ++++++++++++
 stdaln.c                           | 797 +++++++++++++++++++++++++++++++++++
 stdaln.h                           | 105 +++++
 string.h                           | 217 ++++++++++
 tags                               | 821 +++++++++++++++++++++++++++++++++++++
 vector.h                           | 234 +++++++++++
 37 files changed, 9770 insertions(+)

diff --git a/._Makefile b/._Makefile
new file mode 100644
index 0000000..1a9e2f3
Binary files /dev/null and b/._Makefile differ
diff --git a/._tags b/._tags
new file mode 100644
index 0000000..1a9e2f3
Binary files /dev/null and b/._tags differ
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..187191b
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,48 @@
+CC=gcc
+CFLAGS= -W -O2 -Wall -Wno-self-assign -Wno-unused-function
+DFLAGS= -D_FILE_OFFSET_BITS=64
+GLIBS=-lm
+GENERIC_SRC= string.h bitvec.h file_reader.h hashset.h sort.h list.h dna.h heap.h stdaln.h vector.h
+
+.SUFFIXES:.c .o
+
+.c.o:
+	$(CC) -c $(CFLAGS) $(DFLAGS) $< -o $@
+
+#all: rainbow rbasm rbmergetag ezmsim 
+all: rainbow ezmsim rbasm
+
+rainbow: main.o  divide.o file_reader.o asm_R2.o mergectg.o cluster.o
+	$(CC) $(CFLAGS) -o $@ $^ $(GLIBS) 
+
+rbasm: asm_R2.o rbasm_main.o file_reader.o
+	$(CC) $(CFLAGS) -o $@ $^ $(GLIBS)
+
+ezmsim: ezmsim.o
+	$(CC) $(CFLAGS) -o $@ $^ $(GLIBS)
+
+asm_R2.o: asm_R2.c asm_R2.h string.h vector.h hashset.h file_reader.h \
+  dna.h
+cluster.o: cluster.c rainbow.h bitvec.h hashset.h list.h sort.h dna.h \
+  file_reader.h string.h vector.h mergectg.h stdaln.h asm_R2.h \
+  bloom_filter.h
+divide.o: divide.c rainbow.h bitvec.h hashset.h list.h sort.h dna.h \
+  file_reader.h string.h vector.h mergectg.h stdaln.h asm_R2.h \
+  bloom_filter.h
+ezmsim.o: ezmsim.c
+file_reader.o: file_reader.c file_reader.h string.h vector.h
+main.o: main.c rainbow.h bitvec.h hashset.h list.h sort.h dna.h \
+  file_reader.h string.h vector.h mergectg.h stdaln.h asm_R2.h \
+  bloom_filter.h
+mergectg.o: mergectg.c mergectg.h list.h sort.h file_reader.h string.h \
+  vector.h hashset.h stdaln.h asm_R2.h dna.h bloom_filter.h bitvec.h \
+  rainbow.h
+rbasm_main.o: rbasm_main.c asm_R2.h string.h vector.h hashset.h \
+  file_reader.h dna.h
+stdaln.o: stdaln.c stdaln.h
+
+clean:
+	rm -f *.o *.gcda *.gcno *.gcov gmon.out rainbow rbasm ezmsim rbmergetag *.exe
+
+clear:
+	rm -f *.o *.gcda *.gcno *.gcov gmon.out
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..d4fd3dd
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,119 @@
+Rainbow v2.0.4
+
+Description
+===========
+Rainbow package consists of several programs used for RAD-seq related 
+clustering and de novo assembly.
+
+Installation
+============
+Type 'make' to compile Rainbow package. You can copy the executables/scripts
+to your specific location (e.g. a directory in your $PATH). Or you can set
+the PATH environment that leads to this directory.
+
+
+Usage of Rainbow package
+========================
+EXAMPLE: a typical use of Rainbow step by step
+
+	rainbow cluster -1 1.fq  -2 2.fq > rbcluster.out 2> log
+	rainbow div -i rbcluster.out -o rbdiv.out
+	rainbow merge -o rbasm.out -a -i rbdiv.out -N500
+
+The final output file of 'rainbow merge -a' is based on the final merged
+clusters. Each cluster has been locally assembled by 'rainbow merge -a'. For
+each cluster, rainbow outputs all assembled contigs seperated by '//' for each
+record:
+E clusterID
+C contigID1
+L length
+S sequence
+N #reads
+R readIDs
+//
+C contigID2
+L length
+S sequence
+N #reads
+R readIDs
+.
+.
+.
+
+We have also provided four simple perl scripts that can be used to extract the assembly
+information: select_all_rbcontig.pl, select_best_rbcontig.pl, select_sec_rbcontig.pl, select_best_rbcontig_plus_read1.pl
+
+select_all_rbcontig.pl extracts all the assembled contigs, i.g., all the
+records
+
+select_best_rbcontig.pl and select_sec_rbcontig.pl extract the longest and
+the longest plus the second longest contigs for the final clusters,
+respectively
+
+select_best_rbcontig_plus_read1.pl, as select_best_rbcontig.pl, it  extracts the longest contig for each cluster. Besides, it also outputs the read1. If read1 overlaps with the contig, it joins the two as a whole. If read1 does not overlap with the contig, it pads 10 'X' to join the read1 and the contig, thus generating a long contig. 
+
+----------------------------------------------------------------------------------
+rainbow 2.0.3 -- <ruanjue at gmail.com, chongzechen at gmail.com>
+Usage: rainbow <cmd> [options]
+
+ cluster
+  Input  File Format: paired fasta/fastq file(s)
+  Output File Format: <seqid:int>\t<cluster_id:int>\t<read1:string>\t<read2:string>
+  -1 <string> Input fasta/fastq file, supports multiple '-1'
+  -2 <string> Input fasta/fastq file, supports multiple '-2' [null]
+  -l <int>    Read length, default: 0 variable
+  -m <int>    Maximum mismatches [4]
+  -e <int>    Exactly matching threshold [2000]
+  -L          Low level of polymorphism
+ div
+  Input File Format: <seqid:int>\t<cluster_id:int>\t<read1:string>\t<read2:string>
+  Output File Format: <seqid:int>\t<cluster_id:int>\t<read1:string>\t<read2:string>[\t<pre_cluster_id:int>]
+  -i <string> Input rainbow cluster output file [stdin]
+  -o <string> Output file [stdout]
+  -k <int>    K_allele, min variants to create a new group [2]
+  -K <int>    K_allele, divide regardless of frequency when num of variants exceed this value [50]
+  -f <float>  Frequency, min variant frequency to create a new group [0.2]
+ merge
+  Input File Format: <seqid:int>\t<cluster_id:int>\t<read1:string>\t<read2:string>[\t<pre_cluster_id:int>]
+  -i <string> Input rainbow div output file [stdin]
+  -a          output assembly 
+  -o <string> Output file [stdout]
+  -N <int>    Maximum number of divided clusters to merge [300]
+  -l <int>    Minimum overlap when assemble two reads (valid only when '-a' is opened) [5]
+  -f <float>  Minimum fraction of similarity when assembly (valid only when '-a' is opened) [0.90]
+  -r <int>    Minimum number of reads to assemble (valid only when '-a' is opened) [5]
+  -R <int>    Maximum number of reads to assemble (valid only when '-a' is opened) [300]
+
+----------------------------------------------------------------------------------
+rbasm: a greedy assembler to locally assemble each cluster produced by rainbow or the other
+tools. This has been integrated into the merge module. Please always open '-a' option when running
+'rainbow merge'.
+Local assemble fragments around restriction sites
+NOTE: the input file format should be: <seqid:int>\t<cluster_id:int>\t<read1:string>\t<read2:string>[\t<pre_cluster_id:int>]
+Usage: rbasm [options]
+ -i <string> Input file [STDIN] 
+ -o <string> Output file [STDOUT]
+ -l <int>    Minium length of overlap [5]
+ -s <float>  Minium similiarity of overlap [0.90]
+ -r <int>    Minium reads to execute assembly [5]
+ -R <int>    Maxium reads to execute assembly [200]
+
+----------------------------------------------------------------------------------
+<Obsoleted> rbmergetag: a program merges divided results to evaluate clustering performance.
+                  Users should omit this program when de novo assembling RAD-seq reads.
+Usage: rbmergetag [options]
+Options:
+ -i <string>    Input file name [stdin]
+ -o <string>    Output file name [stdout]
+ -j <cns|merge> Job type, cns: consensus, merge: merging, [merge]
+ -m <int>       Maximum mismatches to merge two groups [1]
+ -h             Show this document
+
+----------------------------------------------------------------------------------
+
+Change log:
+===========
+v2.0.1: README and usage infomation updated
+v2.0.2: 'merge' options are riched. The 'merge' assembly work can be customized like rbasm now. Thanks Ross Whetten in NCSU for advicing this.
+v2.0.3: changed the name of script 'select_best_rbcontig2.pl' to 'select_best_rbcontig_plus_read1.pl', and documented it. 
+v2.0.4: fixed a bug that rainbow cannot be compiled in Mac OS
diff --git a/aln_cigar.h b/aln_cigar.h
new file mode 100644
index 0000000..ec9574e
--- /dev/null
+++ b/aln_cigar.h
@@ -0,0 +1,440 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __ALN_CIGAR_RJ_H
+#define __ALN_CIGAR_RJ_H
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#define ALN_CIGAR_MAX_LEN	8191
+
+#define ALN_CIGAR_TYPE_NULL	0
+#define ALN_CIGAR_TYPE_MAT	3
+#define ALN_CIGAR_TYPE_INS	1
+#define ALN_CIGAR_TYPE_DEL	2
+#define ALN_CIGAR_TYPE_SKIP	7
+#define ALN_CIGAR_TYPE_CLIP1	5
+#define ALN_CIGAR_TYPE_CLIP2	6
+
+static const char aln_cigar_string[8] = "?IDM?SHN";
+
+typedef struct {
+	uint16_t len:13, type:3;
+} AlnCigar;
+
+static inline void cigars_lengths(AlnCigar *cigars, int n_cigar, int *aln_size, int *seq1_size, int *seq2_size){
+	int i;
+	if(aln_size) *aln_size = 0;
+	if(seq1_size) *seq1_size = 0;
+	if(seq2_size) *seq2_size = 0;
+	for(i=0;i<n_cigar;i++){
+		if(seq1_size &&  (cigars[i].type & 0x01)) *seq1_size += cigars[i].len;
+		if(seq2_size &&  (cigars[i].type & 0x02)) *seq2_size += cigars[i].len;
+		if(aln_size  && !(cigars[i].type & 0x04)) *aln_size += cigars[i].len;
+	}
+}
+
+static inline int _aln_cigar_h_num_str_len(int n){
+	int i;
+	i = 0;
+	while(n){
+		i ++;
+		n /= 10;
+	}
+	return i;
+}
+
+static inline int _aln_cigar_add_cigar(AlnCigar *cs, int n_cigar, int len, int type){
+	while(len){
+		if(len > ALN_CIGAR_MAX_LEN){
+			cs[n_cigar].len = ALN_CIGAR_MAX_LEN;
+			len -= ALN_CIGAR_MAX_LEN;
+		} else {
+			cs[n_cigar].len = len;
+			len = 0;
+		}
+		cs[n_cigar++].type = type;
+	}
+	return n_cigar;
+}
+
+static inline char* cigars2string(AlnCigar *cigars, int n_cigar, char *str){
+	int i, j, n, str_len, type;
+	char *p;
+	if(str == NULL){
+		str_len = 0;
+		for(i=0;i<n_cigar;i++) str_len += _aln_cigar_h_num_str_len(cigars[i].len) + 1;
+		str = malloc(str_len + 1);
+	}
+	p = str;
+	if(n_cigar){
+		n = cigars[0].len;
+		type = cigars[0].type;
+		for(i=1;i<=n_cigar;i++){
+			if(i == n_cigar || (type != cigars[i].type && n)){
+				str_len = _aln_cigar_h_num_str_len(n) - 1;
+				j = 0;
+				while(n){
+					p[str_len - j] = '0' + (n % 10);
+					n /= 10;
+					j ++;
+				}
+				p[str_len + 1] = aln_cigar_string[type];
+				p = p + str_len + 1 + 1;
+			}
+			if(i == n_cigar) break;
+			n += cigars[i].len;
+			type = cigars[i].type;
+		}
+	}
+	p[0] = 0;
+	return str;
+}
+
+static inline int string2cigars(AlnCigar *cigars, char *str, int len){
+	int i, n, x;
+	n = 0;
+	x = 0;
+	for(i=0;i<len;i++){
+		switch(str[i]){
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9': x = x * 10 + (str[i] - '0'); break;
+			case 'M':
+			case 'm': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_MAT); x = 0; break;
+			case 'I':
+			case 'i': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_INS); x = 0; break;
+			case 'D':
+			case 'd': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_DEL); x = 0; break;
+			case 'S':
+			case 's': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_CLIP1); x = 0; break;
+			case 'H':
+			case 'h': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_CLIP2); x = 0; break;
+			case 'N':
+			case 'n': n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_SKIP); x = 0; break;
+			default : n = _aln_cigar_add_cigar(cigars, n, x, ALN_CIGAR_TYPE_NULL); x = 0; break;
+		}
+	}
+	return n;
+}
+
+static inline int rank_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
+	int i, ret;
+	if(len < 0) len = 0x7FFFFFFF;
+	for(i=0,ret=0;i<n_cigar&&len>=0;i++){
+		if((cigars[i].type >> seq_idx) & 0x01){
+			if(len > (int)cigars[i].len){
+				ret += cigars[i].len;
+			} else {
+				ret += len;
+			}
+		}
+		len -= cigars[i].len;
+	}
+	return ret;
+}
+
+static inline int rev_rank_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
+	int i, ret;
+	if(len < 0) len = 0x7FFFFFFF;
+	for(i=n_cigar-1,ret=0;i>=0&&len>=0;i--){
+		if((cigars[i].type >> seq_idx) & 0x01){
+			if(len > (int)cigars[i].len){
+				ret += cigars[i].len;
+			} else {
+				ret += len;
+			}
+		}
+		len -= cigars[i].len;
+	}
+	return ret;
+}
+
+static inline int select_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
+	int i, ret;
+	for(i=0,ret=0;i<n_cigar;i++){
+		if((cigars[i].type >> seq_idx) & 0x01){
+			if(len > (int)cigars[i].len){
+				ret += cigars[i].len;
+			} else {
+				ret += len;
+				break;
+			}
+			len -= cigars[i].len;
+		} else {
+			ret += cigars[i].len;
+		}
+	}
+	return ret;
+}
+
+static inline int rev_select_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){
+	int i, ret;
+	for(i=n_cigar-1,ret=0;i>=0;i--){
+		if((cigars[i].type >> seq_idx) & 0x01){
+			if(len > (int)cigars[i].len){
+				ret += cigars[i].len;
+			} else {
+				ret += len;
+				break;
+			}
+			len -= cigars[i].len;
+		} else {
+			ret += cigars[i].len;
+		}
+	}
+	return ret;
+}
+
+static inline void flip_cigars(AlnCigar *cigars, int n_cigar){
+	int i;
+	for(i=0;i<n_cigar;i++){
+		cigars[i].type = (cigars[i].type & 0x04) | ((cigars[i].type & 0x01) << 1) | ((cigars[i].type & 0x02) >> 1);
+	}
+}
+
+static inline int sub_cigars(AlnCigar *dst, AlnCigar *cigars, int n_cigar, int off, int len){
+	int i, x, y, n_sub;
+	n_sub = 0;
+	if(len < 0) len = 0x3FFFFFFF;
+	else if(len == 0) return 0;
+	for(i=0,x=0;i<n_cigar;i++){
+		y = x + cigars[i].len;
+		if(x < off){
+			if(y < off){
+			} else if(y < off + len){
+				n_sub = _aln_cigar_add_cigar(dst, n_sub, y - off, cigars[i].type);
+			} else {
+				n_sub = _aln_cigar_add_cigar(dst, n_sub, off + len - x, cigars[i].type);
+				break;
+			}
+		} else if(x >= off + len){
+			break;
+		} else {
+			if(y < off + len){
+				n_sub = _aln_cigar_add_cigar(dst, n_sub, cigars[i].len, cigars[i].type);
+			} else {
+				n_sub = _aln_cigar_add_cigar(dst, n_sub, off + len - x, cigars[i].type);
+				break;
+			}
+		}
+		x = y;
+	}
+	return n_sub;
+}
+
+static inline int sub_seq_cigars(AlnCigar *dst, AlnCigar *c, int n, int seq_idx, int off, int len){
+	int d, i, x, y;
+	if(len < 0) len = 0x7FFFFFFF;
+	d = 0;
+	x = y = 0;
+	for(i=0;i<n;i++){
+		if(x > off + len) break;
+		if((c[i].type >> seq_idx) & 0x01){
+			y = x + c[i].len;
+		} else {
+			y = x;
+		}
+		if(y < off) continue;
+		if(x < off){
+			if(y < off + len){
+				d = _aln_cigar_add_cigar(dst, d, y - off, c[i].type);
+			} else {
+				d = _aln_cigar_add_cigar(dst, d, off + len - x, c[i].type);
+				break;
+			}
+		} else if(y < off + len){
+			d = _aln_cigar_add_cigar(dst, d, c[i].len, c[i].type);
+		} else {
+			d = _aln_cigar_add_cigar(dst, d, off + len - x, c[i].type);
+			break;
+		}
+		x = y;
+	}
+	return d;
+}
+
+static inline int cat_cigars(AlnCigar *cigars1, int n_cigar1, AlnCigar *cigars2, int n_cigar2){
+	int i;
+	if(n_cigar2 == 0) return n_cigar1;
+	if(n_cigar1){
+		if(cigars1[n_cigar1-1].type == cigars2[0].type){
+			n_cigar1 = _aln_cigar_add_cigar(cigars1, n_cigar1 - 1, cigars1[n_cigar1 - 1].len + cigars2[0].len, cigars2[0].type);
+			i = 1;
+		} else i = 0;
+	} else i = 0;
+	while(i < n_cigar2){
+		cigars1[n_cigar1].type = cigars2[i].type;
+		cigars1[n_cigar1].len = cigars2[i].len;
+		n_cigar1 ++;
+		i ++;
+	}
+	return n_cigar1;
+}
+
+static inline int append_cigars(AlnCigar *cs, int n, int type, int len){
+	if(n && cs[n-1].type == type){
+		return _aln_cigar_add_cigar(cs, n-1, cs[n-1].len + len, type);
+	} else {
+		return _aln_cigar_add_cigar(cs, n, len, type);
+	}
+}
+
+static inline void reverse_cigars(AlnCigar *cs, int n){
+	int i, j;
+	AlnCigar c;
+	i = 0;
+	j = n - 1;
+	while(i < j){
+		c = cs[i]; cs[i] = cs[j]; cs[j] = c;
+		i ++; j --;
+	}
+}
+
+// seq1_size of c1 must be no less than c2
+// seq1_size of dst will equal length of c1
+static inline int compile_cigars(AlnCigar *dst, AlnCigar *c1, int n1, AlnCigar *c2, int n2, int seq_idx){
+	int i, j, n3, x1, x2, x3, f1, f2, f3;
+	n3 = 0;
+	x2 = 0;
+	x3 = 0;
+	f2 = ALN_CIGAR_TYPE_MAT;
+	f3 = ALN_CIGAR_TYPE_INS;
+	for(i=j=0;i<n1;i++){
+		x1 = c1[i].len;
+		f1 = (c1[i].type >> seq_idx) & 0x01;
+		while(x1){
+			if(x2 == 0){
+				if(j < n2){
+					x2 = c2[j].len;
+					f2 = (c2[j].type >> seq_idx) & 0x01;
+					j ++;
+				} else {
+					x2 = x1;
+					f2 = f1;
+				}
+			}
+			if(f1){
+				if(f2){
+					if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
+					if(x1 < x2){ x2 -= x1; x3 += x1; x1 = 0; }
+					else { x1 -= x2; x3 += x2; x2 = 0; }
+					f3 = ALN_CIGAR_TYPE_INS;
+				} else {
+					if(x3 && f3 != ALN_CIGAR_TYPE_DEL){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
+					f3 = ALN_CIGAR_TYPE_DEL;
+					x3 += x2; x2 = 0;
+				}
+			} else {
+				if(f2){
+					if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
+					f3 = ALN_CIGAR_TYPE_INS;
+					x3 += x1; x1 = 0;
+				} else {
+					if(x3 && f3 != ALN_CIGAR_TYPE_INS){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); x3 = 0; }
+					f3 = ALN_CIGAR_TYPE_INS;
+					if(x1 < x2){ x2 -= x1; x3 += x1; x1 = 0; }
+					else { x1 -= x2; x3 += x2; x2 = 0; }
+				}
+			}
+		}
+	}
+	if(x3){ n3 = _aln_cigar_add_cigar(dst, n3, x3, f3); }
+	return n3;
+}
+
+// c2 is the dst in align_cigars
+static inline int apply_cigars(AlnCigar *dst, AlnCigar *c1, int n1, AlnCigar *c2, int n2){
+	int i, j, n3, x1, x2, x3, f1, f2;
+	n3 = 0;
+	x2 = 0;
+	f2 = ALN_CIGAR_TYPE_MAT;
+	for(i=j=0;i<n1;i++){
+		x1 = c1[i].len;
+		f1 = c1[i].type;
+		while(x1){
+			if(x2 == 0){
+				if(j < n2){
+					x2 = c2[j].len;
+					f2 = c2[j].type;
+					j ++;
+				} else {
+					x2 = x1;
+					f2 = ALN_CIGAR_TYPE_MAT;
+				}
+			}
+			if(f2 == ALN_CIGAR_TYPE_DEL){
+				n3 = _aln_cigar_add_cigar(dst, n3, x2, ALN_CIGAR_TYPE_DEL);
+				x2 = 0;
+			} else {
+				x3 = (x1 < x2)? x1 : x2;
+				n3 = _aln_cigar_add_cigar(dst, n3, x3, f1);
+				x1 -= x3;
+				x2 -= x3;
+			}
+		}
+	}
+	return n3;
+}
+
+static inline int refine_cigars(AlnCigar *c, int n){
+	int i, j, x, f;
+	x = 0;
+	f = ALN_CIGAR_TYPE_NULL;
+	for(i=j=0;i<n;i++){
+		if(c[i].type == f){
+			x += c[i].len;
+		} else {
+			if(x){
+				j = _aln_cigar_add_cigar(c, j, x, f);
+			}
+			x = c[i].len;
+			f = c[i].type;
+		}
+	}
+	if(x){ j = _aln_cigar_add_cigar(c, j, x, f); }
+	return j;
+}
+
+static inline int cigars_seq2aln(char *dst, AlnCigar *c, int n, int seq_idx, char *seq){
+	int i, j, k, m;
+	for(i=m=k=0;i<n;i++){
+		if((c[i].type >> seq_idx) & 0x01){
+			if(c[i].type & 0x04){
+				k += c[i].len;
+			} else {
+				for(j=0;j<c[i].len;j++) dst[m++] = seq[k++];
+			}
+		} else if(!(c[i].type & 0x04)){
+			for(j=0;j<c[i].len;j++) dst[m++] = '-';
+		}
+	}
+	dst[m] = '\0';
+	return m;
+}
+
+#endif
diff --git a/asm_R2.c b/asm_R2.c
new file mode 100644
index 0000000..546b6ac
--- /dev/null
+++ b/asm_R2.c
@@ -0,0 +1,407 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#include "asm_R2.h"
+
+Vector* get_pool_vec(EF *ef){
+	Vector *vec;
+	vec = NULL;
+	if(vec_size(ef->pool_vec)){
+		gpop_vec(ef->pool_vec, vec, Vector*);
+	} else {
+		vec = init_vec(sizeof(rp_t), 64);
+	}
+	return vec;
+}
+
+void put_pool_vec(EF *ef, Vector *vec){
+	clear_vec(vec);
+	gpush_vec(ef->pool_vec, vec, Vector*);
+}
+
+FContig* get_pool_ctg(EF *ef){
+	FContig* ctg;
+	ctg = NULL;
+	if(vec_size(ef->pool_ctg)){ // czc modified here to hold all ctgs
+		gpop_vec(ef->pool_ctg, ctg, FContig*);
+	} else {
+		ctg = malloc(sizeof(FContig));
+		ctg->rids   = init_vec(sizeof(uint32_t), 6);
+		ctg->seq    = init_string(1024);
+	}
+	return ctg;
+}
+
+int cmp_ol_func(const void *e1, const void *e2){
+	Overlap *o1, *o2;
+	o1 = (Overlap*)e1;
+	o2 = (Overlap*)e2;
+	if(o1->l_ol < o2->l_ol) return 1;
+	else if(o1->l_ol > o2->l_ol) return -1;
+	else return 0;
+}
+
+void put_pool_ctg(EF *ef, FContig *ctg){
+	clear_vec(ctg->rids);
+	clear_string(ctg->seq);
+	gpush_vec(ef->pool_ctg, ctg, FContig*);
+}
+
+void add_read2ef_core(EF *ef, char *seq, uint32_t seq_id, uint32_t rd_len, uint32_t rank){
+	FRead *rd;
+	FContig *ctg;
+	rp_t *p;
+	rhash_t RH, *rh;
+	Vector *vs;
+	uint32_t rid, cid, i, kmer;
+	int exists;
+	if(rd_len > MAX_RD_LEN) rd_len = MAX_RD_LEN;
+	if(rd_len == 0) return;
+	rid = vec_size(ef->rds);
+	rd  = get_next_vec_ref(ef->rds);
+	cid = vec_size(ef->ctgs);
+	ctg = get_pool_ctg(ef);
+	gpush_vec(ef->ctgs, ctg, FContig*);
+	rd->seq_id  = seq_id;
+	rd->rank    = rank;
+	rd->rd_len  = rd_len;
+	rd->ctg_id  = cid;
+	rd->ctg_off = 0;
+	rd->used    = 0;
+	ctg->len    = rd_len;
+	ctg->closed = 0;
+	gpush_vec(ctg->rids, rid, uint32_t);
+	append_string(ctg->seq, seq, rd_len);
+	memcpy(rd->seq, seq, rd_len);
+	rd->seq[rd_len] = 0;
+	kmer = 0;
+	RH.rps_idx = 0;
+	for(i=0;i<rd_len;i++){
+		kmer = (((kmer << 2) | base_bit_table[(int)rd->seq[i]])) & ASM_KMER_MASK;
+		if(i + 1 < ASM_KMER_SIZE) continue;
+		RH.kmer = kmer;
+		rh = prepare_rhash(ef->index, RH, &exists);
+		if(exists){
+			vs = gget_vec(ef->rps, rh->rps_idx, Vector*);
+		} else {
+			rh->kmer  = kmer;
+			rh->rps_idx = vec_size(ef->rps);
+			vs = get_pool_vec(ef);
+			gpush_vec(ef->rps, vs, Vector*);
+		}
+		p = get_next_vec_ref(vs);
+		p->rid  = rid;
+		p->roff = i;
+	}
+}
+
+EF* init_ef(uint32_t ef_id, char *eseq, uint32_t rd_len, uint32_t min_ol, float min_sm){
+	EF *ef;
+	ef = malloc(sizeof(EF));
+	ef->ef_id  = ef_id;
+	ef->min_ol = min_ol;
+	ef->min_sm = min_sm;
+	ef->inc_tag = 1;
+	ef->rds    = init_vec(sizeof(FRead), 64);
+	ef->ols    = init_vec(sizeof(Overlap), 64);
+	ef->rps    = init_vec(sizeof(Vector*), 64);
+	ef->ctgs   = init_vec(sizeof(FContig*), 6);
+	ef->index  = init_rhash(1023);
+	ef->uniq   = init_u64hash(1023);
+	ef->pool_vec = init_vec(sizeof(Vector*), 64);
+	ef->pool_ctg = init_vec(sizeof(FContig*), 64);
+	memcpy(ef->eseq, eseq, rd_len);
+	ef->eseq[rd_len] = 0;
+	add_read2ef_core(ef, eseq, ef_id, rd_len, 0);
+	return ef;
+}
+
+void set_inc_tag_ef(EF *ef, uint32_t inc){
+	ef->inc_tag = inc;
+}
+
+void add_read2ef(EF *ef, char *seq, uint32_t seq_id, uint32_t rd_len, uint32_t rank){ add_read2ef_core(ef, seq, seq_id, rd_len, (rank == 0)? 1 : rank); }
+
+
+void find_overlap(char *seq1, uint32_t len1, uint32_t off1, char *seq2, uint32_t len2, uint32_t off2, uint32_t *l_ol, uint32_t *r_ol, uint32_t *n_mm){
+	uint32_t i, l, r, ol, mm;
+	l = (off1 <= off2)? off1 : off2;
+	r = (len1 - off1 <= len2 - off2)? (len1 - off1) : (len2 - off2);
+	*r_ol = *l_ol = ol = l + r;
+	mm = 0;
+	for(i=0;i<ol;i++){
+		if(seq1[off1-l+i] != seq2[off2-l+i]) mm ++;
+	}
+	*n_mm = mm;
+}
+
+void align_reads_ef(EF *ef){
+	FRead *rd1, *rd2;
+	Vector *rp;
+	Overlap *ol;
+	rp_t   *p1, *p2;
+	uint32_t i, j, k, r_ol, l_ol, n_mm;
+	uint64_t *uniq, aln_id;
+	int exists;
+	for(i=0;i<vec_size(ef->rps);i++){
+		rp = gget_vec(ef->rps, i, Vector*);
+		for(j=0;j<vec_size(rp);j++){
+			p1  = get_vec_ref(rp, j);
+			rd1 = get_vec_ref(ef->rds, p1->rid);
+			for(k=j+1;k<vec_size(rp);k++){
+				p2  = get_vec_ref(rp, k);
+				if(p2->rid > p1->rid){
+					aln_id = (p1->rid << 16) | p2->rid;
+				} else {
+					aln_id = (p2->rid << 16) | p1->rid;
+				}
+				if(p1->roff >= p2->roff){
+					aln_id |= (((uint64_t)p1->roff - p2->roff) << 32);
+				} else {
+					aln_id |= (((uint64_t)p2->roff - p1->roff) << 48);
+				}
+				uniq = prepare_u64hash(ef->uniq, aln_id, &exists);
+				if(exists) continue;
+				*uniq = aln_id;
+				rd2 = get_vec_ref(ef->rds, p2->rid);
+				find_overlap(rd1->seq, rd1->rd_len, p1->roff, rd2->seq, rd2->rd_len, p2->roff, &l_ol, &r_ol, &n_mm);
+				if(r_ol < ef->min_ol && l_ol < ef->min_ol) continue;
+				if(n_mm > (uint32_t)((1 - ef->min_sm) * r_ol) || n_mm > (uint32_t)((1 - ef->min_sm) * l_ol)) continue;
+				ol = get_next_vec_ref(ef->ols);
+				ol->used = 0;
+				if(p1->roff >= p2->roff){
+					ol->l_rid = p1->rid;
+					ol->r_rid = p2->rid;
+					ol->l_ol  = l_ol;
+					ol->r_ol  = r_ol;
+					ol->n_mm  = n_mm;
+				} else {
+					ol->l_rid = p2->rid;
+					ol->r_rid = p1->rid;
+					ol->l_ol  = r_ol;
+					ol->r_ol  = l_ol;
+					ol->n_mm  = n_mm;
+				}
+			}
+		}
+	}
+	qsort_vec(ef->ols, cmp_ol_func);
+}
+
+void print_alignments(EF *ef){
+	uint32_t i, j;
+	Overlap *ol;
+	FRead *r1, *r2;
+	for(i=0;i<vec_size(ef->ols);i++){
+		ol = get_vec_ref(ef->ols, i);
+		r1 = get_vec_ref(ef->rds, ol->l_rid);
+		r2 = get_vec_ref(ef->rds, ol->r_rid);
+		printf("%u <-> %u = %u:%u\n", ol->l_rid, ol->r_rid, ol->l_ol, ol->n_mm);
+		printf("%s\n", r1->seq);
+		for(j=0;(int)j<r1->rd_len-ol->l_ol;j++) printf(" ");
+		printf("%s\n", r2->seq);
+	}
+}
+
+void asm_ef_ctgs(EF *ef){
+	Overlap *ol;
+	FRead *rd1, *rd2, *rd;
+	FContig *ctg1, *ctg2;
+	uint32_t i, off1, off2, l_ol, r_ol, n_mm, offset;
+	uint32_t j, rid, rank_type;
+	for(rank_type=0;rank_type<4;rank_type++){
+		for(i=0;i<vec_size(ef->ols);i++){
+			ol = get_vec_ref(ef->ols, i);
+			if(ol->used) continue;
+			if(ef->inc_tag == 0 && (ol->l_rid == 0 || ol->r_rid == 0)) continue;
+			rd1 = get_vec_ref(ef->rds, ol->l_rid);
+			rd2 = get_vec_ref(ef->rds, ol->r_rid);
+			if(rank_type == 0){
+				if(rd1->rank != rd2->rank) continue;
+			} else if(rank_type == 1){
+				if(rd1->rank + 1 != rd2->rank) continue;
+			} else if(rank_type == 2){
+				if(rd1->rank > rd2->rank) continue;
+			}
+			ctg1 = gget_vec(ef->ctgs, rd1->ctg_id, FContig*);
+			ctg2 = gget_vec(ef->ctgs, rd2->ctg_id, FContig*);
+			if(ctg1 == ctg2){ ol->used = 1; continue; }
+			off1 = rd1->ctg_off + rd1->rd_len - ol->l_ol;
+			off2 = rd2->ctg_off;
+			find_overlap(ctg1->seq->string, ctg1->len, off1, ctg2->seq->string, ctg2->len, off2, &l_ol, &r_ol, &n_mm);
+			if(l_ol < ef->min_ol && r_ol < ef->min_ol){ continue; }
+			if(n_mm > (uint32_t)(l_ol * (1 - ef->min_sm)) || n_mm > (uint32_t)(r_ol * (1 - ef->min_sm))){ continue; }
+			ol->used = 1;
+			if(off1 >= off2){
+				ctg2->closed = 1;
+				offset = off1 - off2;
+				for(j=0;j<vec_size(ctg2->rids);j++){
+					rid = gget_vec(ctg2->rids, j, uint32_t);
+					rd  = get_vec_ref(ef->rds, rid);
+					gpush_vec(ctg1->rids, rid, uint32_t);
+					rd->ctg_id  = rd1->ctg_id;
+					rd->ctg_off = rd->ctg_off + offset;
+				}
+				if(offset + ctg2->len > ctg1->len){
+					append_string(ctg1->seq, ctg2->seq->string + (ctg1->len - offset), ctg2->len - (ctg1->len - offset));
+					ctg1->len = offset + ctg2->len;
+				}
+			} else {
+				// ABCDEG
+				//continue;
+				ctg1->closed = 1;
+				offset = off2 - off1;
+				for(j=0;j<vec_size(ctg1->rids);j++){
+					rid = gget_vec(ctg1->rids, j, uint32_t);
+					rd  = get_vec_ref(ef->rds, rid);
+					gpush_vec(ctg2->rids, rid, uint32_t);
+					rd->ctg_id  = rd2->ctg_id;
+					rd->ctg_off = rd->ctg_off + offset;
+				}
+				if(offset + ctg1->len > ctg2->len){
+					append_string(ctg2->seq, ctg1->seq->string + ctg2->len - offset, ctg1->len - (ctg2->len - offset));
+					ctg2->len = offset + ctg1->len;
+				}
+			}
+		}
+	}
+}
+
+void output_ef_ctgs(EF *ef, FILE *out){
+	uint32_t i, j, cid;
+	FContig *ctg;
+	FRead *rd;
+	cid = 0;
+	fprintf(out, "E %u\n", ef->ef_id);
+	for(i=0;i<vec_size(ef->ctgs);i++){
+		ctg = gget_vec(ef->ctgs, i, FContig*);
+		if(ctg->closed) continue;
+		fprintf(out, "C %u\n", cid);
+		cid ++;
+		fprintf(out, "L %d\n", ctg->seq->size);
+		fprintf(out, "S %s\n", ctg->seq->string);
+		fprintf(out, "N %u\n", (uint32_t)vec_size(ctg->rids));
+		fprintf(out, "R");
+		for(j=0;j<vec_size(ctg->rids);j++){
+			rd = get_vec_ref(ef->rds, gget_vec(ctg->rids, j, uint32_t));
+			fprintf(out, " %u:%u:%u", rd->seq_id, rd->ctg_off, rd->rank);
+		}
+		fprintf(out, "\n//\n");
+		fflush(out);
+	}
+}
+
+void reset_ef(EF *ef, uint32_t ef_id, char *eseq, uint32_t rd_len, uint32_t min_ol, float min_sm){
+	uint32_t i;
+	ef->ef_id = ef_id;
+	ef->min_ol = min_ol;
+	ef->min_sm = min_sm;
+	clear_vec(ef->rds);
+	clear_vec(ef->ols);
+	for(i=0;i<vec_size(ef->rps);i++){ put_pool_vec(ef, gget_vec(ef->rps, i, Vector*)); }
+	clear_vec(ef->rps);
+	for(i=0;i<vec_size(ef->ctgs);i++){ put_pool_ctg(ef, gget_vec(ef->ctgs, i, FContig*)); }
+	clear_vec(ef->ctgs);
+	clear_rhash(ef->index);
+	clear_u64hash(ef->uniq);
+	ef->eseq[rd_len] = 0;
+	add_read2ef_core(ef, eseq, ef_id, rd_len, 0);
+}
+
+void free_ef(EF *ef){
+	FContig *ctg;
+	uint32_t i;
+	free_vec(ef->rds);
+	for(i=0;i<vec_size(ef->ctgs);i++){ put_pool_ctg(ef, gget_vec(ef->ctgs, i, FContig*)); }
+	free_vec(ef->ctgs);
+	for(i=0;i<vec_size(ef->rps);i++){ put_pool_vec(ef, gget_vec(ef->rps, i, Vector*)); }
+	free_vec(ef->rps);
+	free_vec(ef->ols);
+	free_rhash(ef->index);
+	free_u64hash(ef->uniq);
+	for(i=0;i<vec_size(ef->pool_vec);i++){ free_vec(gget_vec(ef->pool_vec, i, Vector*)); }
+	for(i=0;i<vec_size(ef->pool_ctg);i++){
+		ctg = gget_vec(ef->pool_ctg, i, FContig*);
+		free_vec(ctg->rids);
+		free_string(ctg->seq);
+		free(ctg);
+	}
+	free_vec(ef->pool_vec);
+	free_vec(ef->pool_ctg);
+	free(ef);
+	ef = NULL;
+}
+
+uint32_t asm_ef(FileReader *in, FILE *out, uint32_t min_ol, float min_sm, uint32_t min_read, uint32_t max_read){
+	EF *ef;
+	uint32_t ret, ef_id, eid, rank, seqid;
+	int n_col;
+	ef = NULL;
+	ret = 0;
+	ef_id = 0;
+	while((n_col = fread_table(in)) != -1){
+		if(n_col == 0) continue;
+		eid = atoi(get_col_str(in, 1));
+		if(eid != ef_id){
+			ef_id = eid;
+			ret ++;
+			reverse_dna(get_col_str(in, 2), get_col_len(in, 2));
+			if(ef){
+				if(vec_size(ef->rds) >= min_read){  //magic number 5
+					align_reads_ef(ef);
+					//print_alignments(ef);
+					asm_ef_ctgs(ef);
+					output_ef_ctgs(ef, out);
+				}
+				reset_ef(ef, ef_id, get_col_str(in, 2), get_col_len(in, 2), min_ol, min_sm);
+			} else {
+				ef = init_ef(ef_id, get_col_str(in, 2), get_col_len(in, 2), min_ol, min_sm);
+			}
+		}
+		//rank  = atoi(get_col_str(in, 1));
+		rank  = 1;
+		seqid = atol(get_col_str(in, 0));
+		if (vec_size(ef->rds) <= max_read) {  //magic number 200
+			add_read2ef(ef, get_col_str(in, 3), seqid, get_col_len(in, 3), rank);
+		}
+	}
+	if(ef && vec_size(ef->rds) >= min_read){
+		align_reads_ef(ef);
+		//print_alignments(ef);
+		asm_ef_ctgs(ef);
+		output_ef_ctgs(ef, out);
+		free_ef(ef);
+	}
+	return ret;
+}
+
+int ef_usage(){
+	printf(
+"Local assemble fragments around restriction sites\n"
+"Usage: ef [options]\n"
+" -i <string> Input file [STDIN]\n"
+" -o <string> Output file [STDOUT]\n"
+" -l <int>    Minium length of overlap [5]\n"
+" -s <float>  Minium similiarity of overlap [0.90]\n"
+" -r <int>    Minium reads to execute assembly [5]\n"
+" -R <int>    Maxium reads to execute assembly [200]\n"
+"\n"
+	);
+	return 1;
+}
diff --git a/asm_R2.h b/asm_R2.h
new file mode 100644
index 0000000..ef3135c
--- /dev/null
+++ b/asm_R2.h
@@ -0,0 +1,107 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef ASM_R2_H
+#define ASM_R2_H
+
+#include "string.h"
+#include "vector.h"
+#include "hashset.h"
+#include "file_reader.h"
+#include "dna.h"
+#include <stdint.h>
+#include <unistd.h>
+
+#define MAX_RD_LEN	255
+#define ASM_KMER_SIZE	5
+#define ASM_KMER_MASK	0x3FFu
+
+typedef struct { uint32_t rid:16, roff:16; } rp_t;
+
+typedef struct {
+	uint32_t kmer:10, rps_idx:22;
+} rhash_t;
+
+#define rhash_code(r) (r).kmer
+#define rhash_eq(r1, r2) ((r1).kmer == (r2).kmer)
+define_hashset(rhash, rhash_t, rhash_code, rhash_eq);
+
+typedef struct {
+	char     seq[MAX_RD_LEN+1];
+	uint32_t seq_id;
+	uint32_t rd_len:10, rank:10;
+	uint32_t ctg_id:24, ctg_off:19, used:1;
+} FRead;
+
+typedef struct {
+	uint32_t len:31, closed:1;
+	String   *seq;
+	Vector   *rids;
+} FContig;
+
+typedef struct {
+	uint32_t l_rid:20, r_rid:20, l_ol:8, r_ol:8, n_mm:7, used:1;
+} Overlap;
+
+typedef struct {
+	uint32_t ef_id;
+	char     eseq[MAX_RD_LEN];
+	Vector   *rds;
+	Vector   *ctgs;
+	Vector   *rps;
+	Vector   *ols;
+	rhash    *index;
+	u64hash  *uniq;
+	uint32_t min_ol;
+	float    min_sm;
+	uint32_t inc_tag;
+
+	Vector   *pool_vec;
+	Vector   *pool_ctg;
+} EF;
+
+
+#ifdef __CPLUSPLUS
+extern "c" {
+#endif
+
+
+Vector* get_pool_vec(EF *ef);
+void put_pool_vec(EF *ef, Vector *vec);
+FContig* get_pool_ctg(EF *ef);
+int cmp_ol_func(const void *e1, const void *e2);
+void put_pool_ctg(EF *ef, FContig *ctg);
+void add_read2ef_core(EF *ef, char *seq, uint32_t seq_id, uint32_t rd_len, uint32_t rank);
+EF* init_ef(uint32_t ef_id, char *eseq, uint32_t rd_len, uint32_t min_ol, float min_sm); 
+void set_inc_tag_ef(EF *ef, uint32_t inc_tag);
+void add_read2ef(EF *ef, char *seq, uint32_t seq_id, uint32_t rd_len, uint32_t rank); 
+void find_overlap(char *seq1, uint32_t len1, uint32_t off1, char *seq2, uint32_t len2, uint32_t off2, uint32_t *l_ol, uint32_t *r_ol, uint32_t *n_mm);
+void align_reads_ef(EF *ef);
+void print_alignments(EF *ef);
+void asm_ef_ctgs(EF *ef);
+void output_ef_ctgs(EF *ef, FILE *out);
+void reset_ef(EF *ef, uint32_t ef_id, char *eseq, uint32_t rd_len, uint32_t min_ol, float min_sm);
+void free_ef(EF *ef);
+int ef_usage(void );
+uint32_t asm_ef(FileReader *in, FILE *out, uint32_t min_ol, float min_sm, uint32_t min_read, uint32_t max_read);
+
+#ifdef __CPLUSPLUS
+}
+#endif
+
+#endif
diff --git a/bitvec.h b/bitvec.h
new file mode 100644
index 0000000..b2910fd
--- /dev/null
+++ b/bitvec.h
@@ -0,0 +1,169 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __BIT_VEC_RJ_H
+#define __BIT_VEC_RJ_H
+
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define get_bit8(bits, idx) ((((bits)[(idx) >> 3]) >> ((idx) & 0x07)) & 0x01)
+#define get_bit16(bits, idx) ((((bits)[(idx) >> 4]) >> ((idx) & 0x0F)) & 0x01)
+#define get_bit32(bits, idx) ((((bits)[(idx) >> 5]) >> ((idx) & 0x1F)) & 0x01)
+#define get_bit64(bits, idx) ((((bits)[(idx) >> 6]) >> ((idx) & 0x3F)) & 0x01)
+
+#define get_2bit8(bits, idx) ((((bits)[(idx) >> 2]) >> (((idx) & 0x03) << 1)) & 0x03)
+#define get_2bit16(bits, idx) ((((bits)[(idx) >> 3]) >> (((idx) & 0x07) << 1)) & 0x03)
+#define get_2bit32(bits, idx) ((((bits)[(idx) >> 4]) >> (((idx) & 0x0F) << 1)) & 0x03)
+#define get_2bit64(bits, idx) ((((bits)[(idx) >> 5]) >> (((idx) & 0x1F) << 1)) & 0x03)
+
+typedef struct {
+	uint64_t *bits;
+	uint64_t n_bit;
+	uint64_t n_cap;
+	uint64_t *sums;
+	uint64_t iter_idx;
+} BitVec;
+
+static inline BitVec* init_bitvec(uint64_t n_bit){
+	BitVec *bitv;
+	if(n_bit == 0) n_bit = 64 * 8;
+	bitv = (BitVec*)malloc(sizeof(BitVec));
+	bitv->n_bit = 0;
+	bitv->n_cap = (((n_bit + 63) / 64) + 7) / 8 * 64 * 8;
+	bitv->bits  = (uint64_t*)malloc(bitv->n_cap / 8);
+	memset(bitv->bits, 0, bitv->n_cap / 8);
+	bitv->sums = NULL;
+	return bitv;
+}
+
+static inline void clear_bitvec(BitVec *bitv){ bitv->n_bit = 0; }
+
+static inline void zeros_bitvec(BitVec *bitv){ memset(bitv->bits, 0, bitv->n_cap / 8); }
+
+static inline void ones_bitvec(BitVec *bitv){ memset(bitv->bits, 0xFFU, bitv->n_cap / 8); }
+
+static inline void flip_bitvec(BitVec *bitv, uint64_t idx){ bitv->bits[idx>>6] ^= 1LLU << (idx&0x3FU); }
+
+static inline void one_bitvec(BitVec *bitv, uint64_t idx){ bitv->bits[idx>>6] |= 1LLU << (idx&0x3FU); }
+
+static inline void zero_bitvec(BitVec *bitv, uint64_t idx){ bitv->bits[idx>>6] &= ~(1LLU << (idx&0x3FU)); }
+
+static inline uint64_t get_bitvec(BitVec *bitv, uint64_t idx){ return (bitv->bits[idx>>6] >> (idx&0x3FU)) & 0x01LLU; }
+
+static inline void encap_bitvec(BitVec *bitv, uint64_t num){
+	if(bitv->n_bit + num < bitv->n_cap) return;
+	while(bitv->n_bit + num >= bitv->n_cap){
+		if(bitv->n_cap < 1024 * 1024 * 8){
+			bitv->n_cap <<= 1;
+		} else bitv->n_cap += 1024 * 1024 * 8;
+	}
+	bitv->bits = (uint64_t*)realloc(bitv->bits, bitv->n_cap / 8);
+	memset(((void*)bitv->bits) + bitv->n_bit / 8, 0, (bitv->n_cap - bitv->n_bit) / 8);
+}
+
+static inline void one2bitvec(BitVec *bitv){ encap_bitvec(bitv, 1); one_bitvec(bitv, bitv->n_bit); bitv->n_bit ++; }
+
+static inline void zero2bitvec(BitVec *bitv){ encap_bitvec(bitv, 1); zero_bitvec(bitv, bitv->n_bit); bitv->n_bit ++; }
+
+static inline uint32_t count_ones_bit32(uint32_t v){
+	v = v - ((v >> 1) & 0x55555555U);                        // reuse input as temporary
+	v = (v & 0x33333333U) + ((v >> 2) & 0x33333333U);        // temp
+	return (((v + (v >> 4)) & 0xF0F0F0FU) * 0x1010101U) >> 24; // count
+}
+
+#define ONES_STEP_4 0x1111111111111111ULL
+#define ONES_STEP_8 0x0101010101010101ULL
+
+static inline int count_ones_bit64(const uint64_t x){
+	register uint64_t byte_sums = x - ((x & 0xa * ONES_STEP_4) >> 1);
+	byte_sums = (byte_sums & 3 * ONES_STEP_4) + ((byte_sums >> 2) & 3 * ONES_STEP_4);
+	byte_sums = (byte_sums + (byte_sums >> 4)) & 0x0f * ONES_STEP_8;
+	return byte_sums * ONES_STEP_8 >> 56;
+}
+
+static inline void index_bitvec(BitVec *bitv){
+	uint64_t i, s, t;
+	if(bitv->sums) free(bitv->sums);
+	bitv->sums = (uint64_t*)malloc((bitv->n_cap / 64 / 8 * 2 + 1) * 8);
+	memset(bitv->sums, 0, bitv->n_cap / 64 / 8 * 2 * 8);
+	t = 0;
+	for(i=0;i<bitv->n_cap;i+=64*8){
+		bitv->sums[((i>>6) >> 3) << 1] = t;
+		s = 0;
+		s += count_ones_bit64(bitv->bits[(i>>6)+0]);
+		bitv->sums[(((i>>6) >> 3) << 1)+1] |= s << 0;
+		s += count_ones_bit64(bitv->bits[(i>>6)+1]);
+		bitv->sums[(((i>>6) >> 3) << 1)+1] |= s << 9;
+		s += count_ones_bit64(bitv->bits[(i>>6)+2]);
+		bitv->sums[(((i>>6) >> 3) << 1)+1] |= s << 18;
+		s += count_ones_bit64(bitv->bits[(i>>6)+3]);
+		bitv->sums[(((i>>6) >> 3) << 1)+1] |= s << 27;
+		s += count_ones_bit64(bitv->bits[(i>>6)+4]);
+		bitv->sums[(((i>>6) >> 3) << 1)+1] |= s << 36;
+		s += count_ones_bit64(bitv->bits[(i>>6)+5]);
+		bitv->sums[(((i>>6) >> 3) << 1)+1] |= s << 45;
+		s += count_ones_bit64(bitv->bits[(i>>6)+6]);
+		bitv->sums[(((i>>6) >> 3) << 1)+1] |= s << 54;
+		s += count_ones_bit64(bitv->bits[(i>>6)+7]);
+		t += s;
+	}
+	bitv->sums[((i>>6) >> 3) << 1] = t;
+}
+
+static inline uint64_t rank_bitvec(BitVec *bitv, uint64_t idx){
+	uint64_t p, s, sum;
+	p = (idx>>6)>>3;
+	s = (idx >> 6) & 0x07U;
+	sum = bitv->sums[p<<1];
+	if(s) sum += (bitv->sums[(p<<1)+1] >> (9 * (s - 1))) & 0x1FFU;
+	if(idx & 0x3FU) sum += count_ones_bit64(bitv->bits[idx>>6]<<(64-(idx&0x3FU)));
+	return sum;
+}
+
+static inline void begin_iter_bitvec(BitVec *bitv){ bitv->iter_idx = 0; }
+
+static inline uint64_t iter_bitvec(BitVec *bitv){
+	while(bitv->iter_idx < bitv->n_cap){
+		if((bitv->iter_idx & 0x1FFU) == 0 && bitv->sums[(((bitv->iter_idx>>6)>>3)<<1)] == bitv->sums[((((bitv->iter_idx>>6)>>3)+1)<<1)]){
+			bitv->iter_idx += 64 * 8;
+			continue;
+		}
+		if((bitv->bits[bitv->iter_idx>>6] >> (bitv->iter_idx & 0x3FU)) == 0){
+			bitv->iter_idx = ((bitv->iter_idx >> 6) + 1) << 6;
+			continue;
+		}
+		if((bitv->bits[bitv->iter_idx>>6] >> (bitv->iter_idx&0x3FU) & 0x01U)){
+			bitv->iter_idx ++;
+			return bitv->iter_idx - 1;
+		} else {
+			bitv->iter_idx ++;
+		}
+	}
+	return 0xFFFFFFFFFFFFFFFFLLU;
+}
+
+static inline void free_bitvec(BitVec *bitv){
+	free(bitv->bits);
+	if(bitv->sums) free(bitv->sums);
+	free(bitv);
+}
+
+#endif
diff --git a/bloom_filter.h b/bloom_filter.h
new file mode 100644
index 0000000..b77c70e
--- /dev/null
+++ b/bloom_filter.h
@@ -0,0 +1,79 @@
+
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __BLOOM_FILTER_RJ_H
+#define __BLOOM_FILTER_RJ_H
+
+#include "bitvec.h"
+#include "hashset.h"
+
+static const uint32_t bloom_filter_total_seeds = 20;
+
+static const uint32_t seeds[20] = 
+{
+  100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
+  3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
+  98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
+  3079ul,       6151ul,       12289ul,      24593ul,      49157ul
+};
+
+typedef struct {
+	BitVec *bits;
+	size_t size;
+	uint32_t n_seed, seed_off;
+} BloomFilter;
+
+// size MUST be prime number
+static inline BloomFilter* init_bloomfilter(size_t size, uint32_t n_seed){
+	BloomFilter *bf;
+	if(n_seed > bloom_filter_total_seeds) n_seed = bloom_filter_total_seeds;
+	if(n_seed == 0) n_seed = 1;
+	size = _rj_hashset_find_prime(size);
+	bf = malloc(sizeof(BloomFilter));
+	bf->bits = init_bitvec(size);
+	bf->size = size;
+	bf->n_seed = n_seed;
+	bf->seed_off = 0;
+	return bf;
+}
+
+static inline void clear_bloomfilter(BloomFilter *bf){ zeros_bitvec(bf->bits); }
+
+static inline void change_seeds_bloomfilter(BloomFilter *bf){ bf->seed_off = (bf->seed_off + bf->n_seed) % bloom_filter_total_seeds; }
+
+static inline void put_bloomfilter(BloomFilter *bf, const void *key, uint32_t len){
+	uint32_t i;
+	for(i=0;i<bf->n_seed;i++) one_bitvec(bf->bits, MurmurHash64A(key, len, seeds[(i + bf->seed_off) % bloom_filter_total_seeds]) % bf->size);
+}
+
+static inline int  get_bloomfilter(BloomFilter *bf, const void *key, uint32_t len){
+	uint32_t i;
+	for(i=0;i<bf->n_seed;i++){
+		if(get_bitvec(bf->bits, MurmurHash64A(key, len, seeds[(i + bf->seed_off) % bloom_filter_total_seeds]) % bf->size) == 0) return 0;
+	}
+	return 1;
+}
+
+static inline void free_bloomfilter(BloomFilter *bf){
+	free_bitvec(bf->bits);
+	free(bf);
+}
+
+#endif
diff --git a/cluster.c b/cluster.c
new file mode 100644
index 0000000..800c124
--- /dev/null
+++ b/cluster.c
@@ -0,0 +1,434 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#include "rainbow.h"
+
+SeqDB* load_seqdb(FileReader *fr, int is_fq, int fix_rd_len){
+	SeqDB *sdb;
+	Sequence *seq;
+	u64list *seqs;
+	uint64_t offset;
+	uint8_t len;
+	sdb = malloc(sizeof(SeqDB));
+	sdb->n_rd = 0;
+	sdb->rd_len = 0;
+	sdb->max_rd_len = 0;
+	if(!fix_rd_len){
+		sdb->seqoffs = init_u64list(1024);
+		sdb->seqlens = init_u8list(1024);
+	} else {
+		sdb->rd_len = fix_rd_len;
+		sdb->max_rd_len = fix_rd_len;
+		sdb->seqoffs = NULL;
+		sdb->seqlens = NULL;
+	}
+	seq = NULL;
+	seqs = init_u64list(1024);
+	offset = 0;
+	while(is_fq? fread_fastq_adv(&seq, fr, 5) : fread_fasta_adv(&seq, fr, 1)){
+		sdb->n_rd ++;
+		len = seq->seq.size;
+		if(fix_rd_len){
+			if(len < sdb->rd_len){
+				continue;
+			} else if(len > sdb->rd_len){
+				len = sdb->rd_len;
+			}
+		} else {
+			if(sdb->max_rd_len < len) sdb->max_rd_len = len;
+			push_u64list(sdb->seqoffs, offset);
+			push_u8list(sdb->seqlens, len);
+		}
+		encap_u64list(seqs, (offset + len + 31) / 32 + 2);
+		seq2bits(seqs->buffer, offset, seq->seq.string, len);
+		offset += len;
+		if((sdb->n_rd & 0xFFF) == 0){ fprintf(stderr, "\r %u k reads  ", (sdb->n_rd >> 10)); fflush(stderr); }
+	}
+	sdb->seqs = seqs->buffer;
+	free(seqs);
+	return sdb;
+}
+
+uint8_t prepare_seq_seqdb(SeqDB *sdb, uint32_t rid, uint64_t *seqs){
+	uint64_t off;
+	uint32_t i, j;
+	uint8_t len;
+	if(sdb->rd_len){
+		off = sdb->rd_len * rid;
+		len = sdb->rd_len;
+	} else {
+		off = get_u64list(sdb->seqoffs, rid);
+		len = get_u8list(sdb->seqlens, rid);
+	}
+	j = 0;
+	for(i=0;i+32<len;i+=32){
+		seqs[j++] = sub32seqbits(sdb->seqs, off + i);
+	}
+	if(i < len){
+		seqs[j] = sub32seqbits(sdb->seqs, off + i);
+		seqs[j] >>= (32 - (len - i)) << 1;
+	}
+	return len;
+}
+
+uint8_t cal_2seq_mm_core(uint64_t *seq1, uint64_t *seq2, uint8_t len1, uint8_t len2){
+	uint32_t i, len, mm;
+	len = (len1 < len2)? len1 : len2;
+	mm = 0;
+	for(i=0;i<2&&i<len;i+=32){
+		mm += count_ones_bit64(dna_xor2ones(seq1[i >> 5] ^ seq2[i >> 5]));
+	}
+	return mm;
+}
+
+uint32_t linking_core(Cluster *cluster, uint32_t seqid, uint64_t *seq, uint32_t seqlen){
+	kmer_t K, *k;
+	uint32_t j, off, c;
+	uint32_t link;
+	int exists;
+	if(seqlen < (cluster->idxs[1] + 1) * cluster->KMER_SIZE) return seqid;
+	K.kmer1 = 0;
+	K.kmer2 = 0;
+	K.seqid = seqid;
+	{
+		off = cluster->idxs[0] * cluster->KMER_SIZE;
+		for(j=0;j<cluster->KMER_SIZE;j++){
+			c = bits2bit(seq, off + j);
+			K.kmer1 = (K.kmer1 << 2) | c;
+		}
+	}
+	{
+		off = cluster->idxs[1] * cluster->KMER_SIZE;
+		for(j=0;j<cluster->KMER_SIZE;j++){
+			c = bits2bit(seq, off + j);
+			K.kmer2 = (K.kmer2 << 2) | c;
+		}
+	}
+	/*
+	for(i=0;i<2;i++){
+		off = cluster->idxs[i] * KMER_SIZE;
+		for(j=0;j<KMER_SIZE;j++){
+			c = bits2bit(seq, off + j);
+			K.kmer1 = (K.kmer1 << 2) | c;
+		}
+	}
+	*/
+	k = prepare_khash(cluster->index, K, &exists);
+	if(exists){
+		link = k->seqid;
+	} else {
+		k->kmer1 = K.kmer1;
+		k->kmer2 = K.kmer2;
+		link = seqid;
+	}
+	k->seqid = seqid;
+	return link;
+}
+
+void tracing_core(Cluster *cluster, uint32_t bt){
+	uint32_t next;
+	clear_u32list(cluster->bts);
+	push_u32list(cluster->bts, bt);
+	while(1){
+		one_bitvec(cluster->flags, bt);
+		next = get_u32list(cluster->links, bt);
+		if(next == bt) break;
+		push_u32list(cluster->bts, next);
+		bt = next;
+	}
+}
+
+static inline int cmp_sbt(const void *e1, const void *e2){
+	SBT *t1, *t2;
+	uint32_t i, len;
+	t1 = (SBT*)e1;
+	t2 = (SBT*)e2;
+	len = (t1->len < t2->len)? t2->len : t1->len;
+	len = (31 + len) / 32;
+	for(i=0;i<len;i++){
+		if(t1->seq[i] < t2->seq[i]) return -1;
+		if(t1->seq[i] > t2->seq[i]) return 1;
+	}
+	return 0;
+}
+
+uint32_t sorting_core(Cluster *cluster){
+	SBT *sbt1, *sbt2;
+	uint32_t ret, i, *gid1, *gid2;
+	ret = 0;
+	clear_sbtv(cluster->sbts);
+	for(i=0;i<count_u32list(cluster->bts);i++){
+		sbt1 = next_ref_sbtv(cluster->sbts);
+		sbt1->bt  = get_u32list(cluster->bts, i);
+		sbt1->len = prepare_seq_seqdb(cluster->sdb, sbt1->bt, sbt1->seq);
+	}
+	qsort(as_array_sbtv(cluster->sbts), count_sbtv(cluster->sbts), sizeof(SBT), cmp_sbt);
+	sbt1 = ref_sbtv(cluster->sbts, 0);
+	gid1 = ref_u32list(cluster->gids, sbt1->bt);
+	*gid1 = get_u32list(cluster->gid_map, *gid1);
+	for(i=1;i<count_sbtv(cluster->sbts);i++){
+		sbt2 = ref_sbtv(cluster->sbts, i);
+		gid2 = ref_u32list(cluster->gids, sbt2->bt);
+		*gid2 = get_u32list(cluster->gid_map, *gid2);
+		if(cmp_sbt((const void *)sbt1, (const void *)sbt2) == 0){
+			ret ++;
+			if(*gid1){
+				if(*gid2){
+					if(*gid1 < *gid2){
+						set_u32list(cluster->gid_map, *gid2, *gid1);
+						*gid2 = *gid1;
+					} else if(*gid1 > *gid2){
+						set_u32list(cluster->gid_map, *gid1, *gid2);
+						*gid1 = *gid2;
+					}
+				} else {
+					*gid2 = *gid1;
+				}
+			} else {
+				if(*gid2){
+					*gid1 = *gid2;
+				} else {
+					push_u32list(cluster->gid_map, ++cluster->gidoff);
+					*gid1 = cluster->gidoff;
+					*gid2 = *gid1;
+				}
+			}
+		} else {
+			sbt1 = sbt2;
+			gid1 = gid2;
+		}
+	}
+	return ret;
+}
+
+uint32_t alning_core(Cluster *cluster){
+	uint32_t idx1, idx2, ret, m, n, mm, *gid1, *gid2;
+	uint8_t len1, len2;
+	ret = 0;
+	for(m=0;m+1<count_u32list(cluster->bts);m++){
+		idx1 = get_u32list(cluster->bts, m);
+		len1 = prepare_seq_seqdb(cluster->sdb, idx1, cluster->seq1);
+		gid1 = ref_u32list(cluster->gids, idx1);
+		*gid1 = get_u32list(cluster->gid_map, *gid1);
+		for(n=m+1;n<count_u32list(cluster->bts);n++){
+			idx2 = get_u32list(cluster->bts, n);
+			len2 = prepare_seq_seqdb(cluster->sdb, idx2, cluster->seq2);
+			gid2 = ref_u32list(cluster->gids, idx2);
+			*gid2 = get_u32list(cluster->gid_map, *gid2);
+			if(*gid1 && *gid1 == *gid2) continue;
+			mm = cal_2seq_mm_core(cluster->seq1, cluster->seq2, len1, len2);
+			if(mm > cluster->max_mm) continue;
+			ret ++;
+			if(*gid1){
+				if(*gid2){
+					if(*gid1 < *gid2){
+						set_u32list(cluster->gid_map, *gid2, *gid1);
+						*gid2 = *gid1;
+					} else {
+						set_u32list(cluster->gid_map, *gid1, *gid2);
+						*gid1 = *gid2;
+					}
+				} else {
+					*gid2 = *gid1;
+				}
+			} else {
+				if(*gid2){
+					*gid1 = *gid2;
+				} else {
+					push_u32list(cluster->gid_map, ++cluster->gidoff);
+					*gid1 = cluster->gidoff;
+					*gid2 = *gid1;
+				}
+			}
+		}
+	}
+	return ret;
+}
+
+void indexing_cluster(Cluster *cluster, FileReader *fr, int is_fq, int fix_rd_len){
+	uint64_t cnt;
+	uint32_t i, seqid, len1, bt, *gid, max_rd_len;
+	clock_t t0, t1;
+	t0 = clock();
+	fprintf(stderr, "Load pair1\n"); fflush(stderr);
+	cluster->sdb = load_seqdb(fr, is_fq, fix_rd_len);
+	max_rd_len = cluster->sdb->max_rd_len;
+	t1 = clock();
+	fprintf(stderr, "\r %u reads, %.2f secs [OK]\n", cluster->sdb->n_rd, ((double)t1 - t0) / CLOCKS_PER_SEC); fflush(stderr);
+	for(cluster->idxs[0]=0;cluster->idxs[0]<cluster->KMER_NUM;cluster->idxs[0]++){
+		for(cluster->idxs[1]=cluster->idxs[0]+1;cluster->idxs[1]<cluster->KMER_NUM;cluster->idxs[1]++){
+			fprintf(stderr, "Iterating %u/%u %u/%u\n", cluster->idxs[0], cluster->KMER_NUM, cluster->idxs[1], cluster->KMER_NUM); fflush(stderr);
+			if(max_rd_len && (cluster->idxs[1] + 1) * cluster->KMER_SIZE > max_rd_len){
+				fprintf(stderr, "- Skip\n"); fflush(stderr);
+				continue;
+			}
+			clear_u32list(cluster->links);
+			clear_bitvec(cluster->flags);
+			cluster->index = init_khash(1023);
+			t0 = clock();
+			fprintf(stderr, "- Linking\n0 k"); fflush(stderr);
+			for(seqid=0;seqid<cluster->sdb->n_rd;seqid++){
+				len1 = prepare_seq_seqdb(cluster->sdb, seqid, cluster->seq1);
+				push_u32list(cluster->links, linking_core(cluster, seqid, cluster->seq1, len1));
+				if((seqid & 0xFFFU) == 0){ fprintf(stderr, "\r %u k", (seqid>>10)); fflush(stderr); }
+			}
+			free_khash(cluster->index);
+			t1 = clock();
+			fprintf(stderr, "\r %u k reads, %0.2f secs [OK]\n", (seqid >> 10), ((double)t1 - t0) / CLOCKS_PER_SEC); fflush(stderr);
+			if(cluster->max_seqid == 0){
+				cluster->max_seqid = seqid;
+				clear_u32list(cluster->gids);
+				for(i=0;i<seqid;i++){
+					push_u32list(cluster->gids, 0);
+				}
+			}
+			t0 = clock();
+			cluster->gid_map = init_u32list(cluster->gidoff + 1024);
+			for(i=0;i<=cluster->gidoff;i++){ push_u32list(cluster->gid_map, i); }
+			fprintf(stderr, "- Aligning (%u mismatches)\n", cluster->max_mm); fflush(stderr);
+			t0 = clock();
+			encap_bitvec(cluster->flags, count_u32list(cluster->links));
+			zeros_bitvec(cluster->flags);
+			cnt = 0;
+			for(i=cluster->max_seqid;i;i--){
+				bt = i - 1;
+				if(get_bitvec(cluster->flags, bt) == 0){
+					tracing_core(cluster, bt);
+					if(count_u32list(cluster->bts) == 1){
+					} else if(count_u32list(cluster->bts) >= cluster->exact_limit){
+						cnt += sorting_core(cluster);
+					} else {
+						cnt += alning_core(cluster);
+					}
+				}
+				if((i&0xFFFF) == 0){ fprintf(stderr, "\r hits: %u", (unsigned)cnt); fflush(stderr); }
+			}
+			t1 = clock();
+			fprintf(stderr, "\r hits: %u, %0.2f secs", (unsigned)cnt, ((double)t1 - t0) / CLOCKS_PER_SEC);
+			fprintf(stderr, " [OK]\n"); fflush(stderr);
+			fprintf(stderr, "- Translating group ids "); fflush(stderr);
+			t0 = clock();
+			cnt = 0;
+			for(i=0;i<count_u32list(cluster->gids);i++){
+				gid = ref_u32list(cluster->gids, i);
+				if(get_u32list(cluster->gid_map, *gid) != *gid){ cnt ++;  *gid = get_u32list(cluster->gid_map, *gid); }
+			}
+			free_u32list(cluster->gid_map);
+			t1 = clock();
+			fprintf(stderr, " %llu in %0.2f secs [OK]\n", (unsigned long long)cnt, ((double)t1 - t0) / CLOCKS_PER_SEC); fflush(stderr);
+		}
+	}
+}
+
+void clustering(Cluster *cluster, FileReader *fr2, int is_fq2, int fix_rd_len, FILE *out){
+	uint32_t i, seqid, gid;
+	char seq1[256], seq2[256];
+	u32list *rids;
+	clock_t t0, t1;
+	t0 = clock();
+	fprintf(stderr, "sorting groups ... "); fflush(stderr);
+	rids = init_u32list(cluster->sdb->n_rd);
+	for(i=0;i<cluster->sdb->n_rd;i++) push_u32list(rids, i);
+	sort_array(rids->buffer, rids->size, uint32_t, (((int64_t)cluster->gids->buffer[a]) - ((int64_t)cluster->gids->buffer[b])));
+	t1 = clock();
+	fprintf(stderr, " %.2f secs [OK]\n", ((double)t1 - t0) / CLOCKS_PER_SEC); fflush(stderr);
+	if(fr2){
+		t0 = clock();
+		fprintf(stderr, "Load pair2\n"); fflush(stderr);
+		cluster->sdb2 = load_seqdb(fr2, is_fq2, fix_rd_len);
+		t1 = clock();
+		fprintf(stderr, "\r %u reads, %.2f secs [OK]\n", cluster->sdb2->n_rd, ((double)t1 - t0) / CLOCKS_PER_SEC); fflush(stderr);
+		if(cluster->sdb->n_rd != cluster->sdb2->n_rd){
+			fprintf(stderr, " Pair2 didn't match Pair1 -- in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__); fflush(stderr); abort();
+		}
+	}
+	for(i=0;i<cluster->sdb->n_rd;i++){
+		seqid = get_u32list(rids, i);
+		gid = get_u32list(cluster->gids, seqid);
+		if(gid == 0) continue;
+		if(cluster->sdb->rd_len){
+			bits2seq(seq1, cluster->sdb->seqs, seqid * cluster->sdb->rd_len, cluster->sdb->rd_len);
+		} else {
+			bits2seq(seq1, cluster->sdb->seqs, get_u64list(cluster->sdb->seqoffs, seqid), get_u8list(cluster->sdb->seqlens, seqid));
+		}
+		if(cluster->sdb2){
+			if(cluster->sdb2->rd_len){
+				bits2seq(seq2, cluster->sdb2->seqs, seqid * cluster->sdb2->rd_len, cluster->sdb2->rd_len);
+			} else {
+				bits2seq(seq2, cluster->sdb2->seqs, get_u64list(cluster->sdb2->seqoffs, seqid), get_u8list(cluster->sdb2->seqlens, seqid));
+			}
+		} else {
+			seq2[0] = 'N';
+			seq2[1] = '\0';
+		}
+		fprintf(out, "%u\t%u\t%s\t%s\n", seqid, gid, seq1, seq2);
+		if((i & 0xFFFU) == 0){ fprintf(stderr, "\r output %u k seq    ", (unsigned)(i >> 10)); fflush(stderr); }
+	}
+	fprintf(stderr, "\r output %u k seq, %.2f secs [OK]\n", (unsigned)(i >> 10), ((double)t1 - t0) / CLOCKS_PER_SEC); fflush(stderr);
+	free_u32list(rids);
+}
+
+Cluster* init_cluster(uint32_t max_mm, uint32_t exact_limit, uint32_t KMER_SIZE, uint32_t KMER_NUM){
+	Cluster *cluster;
+	cluster = malloc(sizeof(Cluster));
+	cluster->sdb  = NULL;
+	cluster->sdb2 = NULL;
+	cluster->gidoff  = 0;
+	cluster->max_seqid = 0;
+	cluster->max_mm  = max_mm;
+	cluster->exact_limit = exact_limit;
+	cluster->max_pair_len = 2 * KMER_SIZE;
+	cluster->idxs[0] = 0;
+	cluster->idxs[1] = 0;
+	cluster->index = NULL;
+	cluster->KMER_SIZE = KMER_SIZE;
+	cluster->KMER_NUM = KMER_NUM;
+	cluster->flags = init_bitvec(1024);
+	cluster->links = init_u32list(1024);
+	cluster->bts   = init_u32list(64);
+	cluster->sbts  = init_sbtv(1024);
+	cluster->gids  = init_u32list(1024);
+	cluster->gid_map = NULL;
+	return cluster;
+}
+
+void free_cluster(Cluster *cluster){
+	if(cluster->sdb){
+		free(cluster->sdb->seqs);
+		if(cluster->sdb->rd_len == 0){
+			free_u64list(cluster->sdb->seqoffs);
+			free_u8list(cluster->sdb->seqlens);
+		}
+		free(cluster->sdb);
+	}
+	if(cluster->sdb2){
+		free(cluster->sdb2->seqs);
+		if(cluster->sdb2->rd_len == 0){
+			free_u64list(cluster->sdb2->seqoffs);
+			free_u8list(cluster->sdb2->seqlens);
+		}
+		free(cluster->sdb2);
+	}
+	free_bitvec(cluster->flags);
+	free_u32list(cluster->links);
+	free_u32list(cluster->gids);
+	free_u32list(cluster->bts);
+	free_sbtv(cluster->sbts);
+	free(cluster);
+}
diff --git a/divide.c b/divide.c
new file mode 100644
index 0000000..9c165ae
--- /dev/null
+++ b/divide.c
@@ -0,0 +1,375 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#include "rainbow.h"
+
+u32list* lend_ulist_div(Div *div){
+	u32list *list;
+	if(!pop_u32slist(div->cache, &list)) list = init_u32list(4);
+	return list;
+}
+
+void return_ulist_div(Div *div, u32list *list){ if(list){ clear_u32list(list); push_u32slist(div->cache, list); } }
+
+typedef struct {
+	uint32_t cnt;
+	uint32_t base;
+} BaseCnt;
+
+static inline uint32_t C_N_2(uint32_t n){
+	if(n == 0) return 0;
+	else return n * (n - 1) / 2;
+}
+uint32_t _call_key_col(Div *div, uint32_t gid){
+	ReadInfo *rd;
+	u32list *grp;
+	uint32_t i, j, col, row, key, c, max_non, tol, base;
+	BaseCnt cnts[4];
+	key = div->n_col;
+	base = 0;
+	grp = get_u32slist(div->grps, gid);
+	max_non = 0;
+	for(col=0;col<div->n_col;col++){
+		for(i=0;i<4;i++){ cnts[i].base = i; cnts[i].cnt = 0; }
+		for(row=0;row<count_u32list(grp);row++){
+			rd = ref_rilist(div->rds, get_u32list(grp, row));
+			if(rd->seqlen1 <= col) continue;
+//			c  = base_bit_table[(int)get_u8list(div->seqs, rd->seqoff + col)];
+			c = div->seqs->buffer[rd->seqoff + col];
+			cnts[c&0x03].cnt ++;
+		}
+		tol = cnts[0].cnt + cnts[1].cnt + cnts[2].cnt + cnts[3].cnt;
+		if(tol == 0) break;
+		for(i=0;i<2;i++){
+			for(j=3;j>i;j--){
+				if(cnts[j].cnt > cnts[j-1].cnt){
+					swap_tmp(cnts[j].cnt, cnts[j-1].cnt, c);
+					swap_tmp(cnts[j].base, cnts[j-1].base, c);
+				}
+			}
+		}
+		if(cnts[1].cnt < div->k_allele) continue;
+		if(cnts[1].cnt < div->K_allele && cnts[1].cnt < div->min_freq * tol) continue;
+		if(cnts[1].cnt > max_non){
+			max_non = cnts[1].cnt;
+			key = col;
+			base = cnts[1].base;
+		}
+	}
+	return (key << 2) | base;
+}
+
+uint32_t call_key_col(Div *div, uint32_t gid){
+	ReadInfo *rd;
+	u32list *grp;
+	uint32_t i, j, k, col, row, key, c, tol, base, s1, s2;
+	BaseCnt cnts[4];
+	col_base_t *cb;
+	uint64_t MM1, MM2;
+	uint32_t n_p1, n_p2, idx;
+	double min_mm, mm1, mm2;
+	key = div->n_col;
+	base = 0;
+	grp = get_u32slist(div->grps, gid);
+	clear_cbv(div->cbs);
+	for(col=0;col<div->n_col;col++){
+		for(i=0;i<4;i++){ cnts[i].base = i; cnts[i].cnt = 0; }
+		for(row=0;row<count_u32list(grp);row++){
+			rd = ref_rilist(div->rds, get_u32list(grp, row));
+			if(rd->seqlen1 <= col) continue;
+			c = div->seqs->buffer[rd->seqoff + col];
+			cnts[c&0x03].cnt ++;
+		}
+		tol = cnts[0].cnt + cnts[1].cnt + cnts[2].cnt + cnts[3].cnt;
+		if(tol == 0) break;
+		for(i=0;i<2;i++){
+			for(j=3;j>i;j--){
+				if(cnts[j].cnt > cnts[j-1].cnt){
+					swap_tmp(cnts[j].cnt, cnts[j-1].cnt, c);
+					swap_tmp(cnts[j].base, cnts[j-1].base, c);
+				}
+			}
+		}
+		if(cnts[1].cnt < div->k_allele) continue;
+		if(cnts[1].cnt < div->K_allele && cnts[1].cnt < div->min_freq * tol) continue;
+		cb = next_ref_cbv(div->cbs);
+		cb->col  = col;
+		cb->base = cnts[1].base;
+		cb->cnt  = cnts[1].cnt;
+	}
+	if(div->cbs->size == 1){
+		key = ref_cbv(div->cbs, 0)->col;
+		base = ref_cbv(div->cbs, 0)->base;
+	}
+	if(div->cbs->size > 1){
+		encap_u32list(div->ps1, div->n_col * 4);
+		encap_u32list(div->ps2, div->n_col * 4);
+		min_mm = 10000000;
+		for(i=0;i<div->cbs->size;i++){
+			cb = ref_cbv(div->cbs, i);
+			n_p1 = cb->cnt;
+			n_p2 = grp->size - cb->cnt;
+			memset(div->ps1->buffer, 0, div->n_col * 4 * 4);
+			memset(div->ps2->buffer, 0, div->n_col * 4 * 4); 
+			for(row=0;row<grp->size;row++){
+				rd = ref_rilist(div->rds, get_u32list(grp, row));
+				if(rd->seqlen1 <= cb->col) idx = 1;
+				else idx = (div->seqs->buffer[rd->seqoff + cb->col] != cb->base);
+				if(idx){
+					for(j=0;j<div->n_col;j++){
+						div->ps2->buffer[div->seqs->buffer[rd->seqoff + j] + 4 * j] ++;
+					}
+				} else {
+					for(j=0;j<div->n_col;j++){
+						div->ps1->buffer[div->seqs->buffer[rd->seqoff + j] + 4 * j] ++;
+					}
+				}
+			}
+			MM1 = MM2 = 0; 
+			for(j=0;j<div->n_col;j++){
+				if(j == i) continue;
+				s1 = C_N_2(n_p1);
+				s2 = C_N_2(n_p2);
+				for (k = 0; k < 4; k++) {
+					s1 -= C_N_2(div->ps1->buffer[k + 4 * j]);
+					s2 -= C_N_2(div->ps2->buffer[k + 4 * j]);
+				}
+				MM1 += s1;
+				MM2 += s2;
+				//fprintf(stdout, " -- %u %u in %s -- %s:%d --\n", s1, s2, __FUNCTION__, __FILE__, __LINE__);
+			}
+			//fprintf(stdout, "col%d mm1 %lld\n", cb->col, MM1);
+			//fprintf(stdout, "col%d mm2 %lld\n", cb->col, MM2);
+			mm1 = ((long double)MM1) / (n_p1*(n_p1-1)/2);
+			mm2 = ((long double)MM2) / (n_p2*(n_p2-1)/2);
+			if(mm1 < mm2) mm1 = mm2;
+			//fprintf(stdout, "gid%u col%d %f\n", gid, cb->col, mm1);
+			if(mm1 - min_mm < 0.00000000001){
+				min_mm = mm1;
+				key = cb->col;
+				base = cb->base;
+			}
+		}
+	}
+	return (key << 2) | base;
+}
+
+void dividing_core(Div *div, uint32_t gid, int dep){
+	ReadInfo *rd;
+	u32list *grp, *sub;
+//	uint64_t mark0;
+	uint32_t i, j, col, rid, gids[2], b;
+	col = _call_key_col(div, gid);
+	b = col & 0x03;
+	col >>= 2;
+	if(col >= div->n_col || div->rds->size < div->K_allele || dep > 255){
+		push_u32list(div->gids, gid);
+		push_u32list(div->deps, dep);
+		return;
+	}
+	for(i=0;i<2;i++){
+		gids[i] = count_u32slist(div->grps);
+		sub = lend_ulist_div(div);
+		push_u32slist(div->grps, sub);
+	}
+	grp = get_u32slist(div->grps, gid);
+	/*
+	char str[257];
+	for(i=0;(int)i<dep;i++){
+		mark0 = get_u64list(div->markers[i/64], gid);
+		str[i] = '0' + ((mark0 >> (i%64))& 0x01);
+	}
+	str[i] = '\0';
+	fprintf(stderr, "%s\t%d\t%c\n", str, col, "ACGT"[b]);
+	for (j = 0; j < 4; j++) {
+		push_u64list(div->markers[j], get_u64list(div->markers[j], gid));
+		push_u64list(div->markers[j], get_u64list(div->markers[j], gid));
+	}
+	if (dep <= 255) {
+		set_u64list(div->markers[dep/64], gid+1, get_u64list(div->markers[j], gid) | (1LLU << (dep%64)));
+	}
+	*/
+	
+//	if (dep <= 255) {
+	for (j = 0; (int)j < dep/64; j++) {
+		push_u64list(div->markers[j], get_u64list(div->markers[j], gid));
+		push_u64list(div->markers[j], get_u64list(div->markers[j], gid));
+	}
+		push_u64list(div->markers[j], get_u64list(div->markers[j], gid));
+		push_u64list(div->markers[j], get_u64list(div->markers[j], gid) | (1LLU << (dep%64)));
+	j++;
+	for (; j < 4; j++) {
+		push_u64list(div->markers[j], 0);
+		push_u64list(div->markers[j], 0);
+	}
+//	}
+
+	for(i=0;i<count_u32list(grp);i++){
+		rid = get_u32list(grp, i);
+		rd = ref_rilist(div->rds, rid);
+		if(rd->seqlen1 >= col && div->seqs->buffer[rd->seqoff + col] == b){
+			push_u32list(get_u32slist(div->grps, gids[1]), rid);
+		} else {
+			push_u32list(get_u32slist(div->grps, gids[0]), rid);
+		}
+	}
+	for(i=0;i<2;i++){
+//		if(count_u32list(get_u32slist(div->grps, gids[i])) > 2 * div->k_allele) dividing_core(div, gids[i], dep + 1);
+		dividing_core(div, gids[i], dep + 1);
+	}
+}
+
+void dividing(Div *div, uint32_t old_gid, FILE *out){
+	ReadInfo *rd;
+	u32list *grp;
+	uint64_t marker;
+	uint32_t i, j, k, gid, dep;
+	char route[257];
+	String *seq1, *seq2;
+	seq1 = init_string(1024);
+	seq2 = init_string(1024);
+	for (i = 0; i < 4; i++) {
+		clear_u64list(div->markers[i]);
+		push_u64list(div->markers[i], 0); 
+	}
+	dividing_core(div, 0, 0);
+	for(i=0;i<count_u32list(div->gids);i++){
+		grp = get_u32slist(div->grps, get_u32list(div->gids, i));
+		dep = get_u32list(div->deps, i);
+		if (dep>255) dep = 255;
+//		marker1 = get_u64list(div->markers1, get_u32list(div->gids, i));
+//		marker2 = get_u64list(div->markers2, get_u32list(div->gids, i));
+		for(j=0;j<dep;j++){
+			marker = get_u64list(div->markers[j/64], get_u32list(div->gids, i)); 
+			route[j] = '0' + ((marker >> (j%64)) & 0x01);
+		}
+		route[dep] = 0;
+		gid = ++div->gidoff;
+		for(j=0;j<count_u32list(grp);j++){
+			rd = ref_rilist(div->rds, get_u32list(grp, j));
+			for(k=0;k<rd->seqlen1;k++) seq1->string[k] = bit_base_table[div->seqs->buffer[rd->seqoff + k]];
+			seq1->string[k] = 0;
+			for(k=0;k<rd->seqlen2;k++) seq2->string[k] = bit_base_table[div->seqs->buffer[rd->seqoff + rd->seqlen1 + k]];
+			seq2->string[k] = 0;
+			fprintf(out, "%u\t%u\t%s\t%s\t%u\t%s\n",
+				rd->seqid, gid, seq1->string, seq2->string, old_gid, route);
+		}
+	}
+	fflush(out);
+	old_gid = old_gid;
+	free_string(seq1);
+	free_string(seq2);
+}
+
+Div* init_div(uint32_t k_allele, uint32_t K_allele, float min_freq){
+	Div *div; int i;
+	div = malloc(sizeof(Div));
+	div->gidoff   = 0;
+	div->n_col    = 0;
+	div->k_allele = k_allele;
+	div->K_allele = K_allele;
+	div->min_freq = min_freq;
+	div->rds   = init_rilist(128);
+	div->seqs  = init_u8list(128 * 80);
+	div->grps  = init_u32slist(64);
+	for (i = 0; i < 4; i++) {
+		div->markers[i] = init_u64list(64);
+	}
+	div->deps = init_u32list(64);
+	div->cache = init_u32slist(64);
+	div->gids  = init_u32list(8);
+	div->cbs = init_cbv(12);
+	div->ps1 = init_u32list(32);
+	div->ps2 = init_u32list(32); 
+	return div;
+}
+
+void reset_div(Div *div){
+	uint32_t i;
+	clear_rilist(div->rds);
+	clear_u8list(div->seqs);
+	for(i=0;i<count_u32slist(div->grps);i++){
+		return_ulist_div(div, get_u32slist(div->grps, i));
+	}
+	clear_u32slist(div->grps);
+	clear_u32list(div->gids);
+	for (i = 0; i < 4; i++) {
+		clear_u64list(div->markers[i]);
+	}
+	clear_u32list(div->deps);
+	div->n_col = 0;
+}
+
+void free_div(Div *div){
+	uint32_t i;
+	reset_div(div);
+	free_rilist(div->rds);
+	free_u8list(div->seqs);
+	free_u32slist(div->grps);
+	for(i=0;i<count_u32slist(div->cache);i++){
+		free_u32list(get_u32slist(div->cache, i));
+	}
+	free_u32list(div->ps1);
+	free_u32list(div->ps2);
+	free_cbv(div->cbs);
+	free_u32slist(div->cache);
+	free_u32list(div->gids);
+	for (i = 0; i < 4; i++) {
+		free_u64list(div->markers[i]);
+	}
+	free_u32list(div->deps);
+	free(div);
+}
+
+uint32_t div_reads(Div *div, FileReader *fr, FILE *out){
+	ReadInfo *rd;
+	uint32_t seqid, rank, gid, last_gid, rid, ret;
+	char *seq1, *seq2;
+	int i;
+	last_gid = 0;
+	ret = 0;
+	while(fread_table(fr) != -1){
+		seqid = atoll(get_col_str(fr, 0));
+		rank  = 1;
+		gid   = atoll(get_col_str(fr, 1));
+		seq1  = get_col_str(fr, 2);
+		seq2  = get_col_str(fr, 3);
+		if(gid != last_gid){
+			ret ++;
+			if(last_gid) dividing(div, last_gid, out);
+			last_gid = gid;
+			reset_div(div);
+			push_u32slist(div->grps, lend_ulist_div(div));
+		}
+		if(get_col_len(fr, 2) > (int)div->n_col) div->n_col = get_col_len(fr, 2);
+		rid = count_rilist(div->rds);
+		rd  = next_ref_rilist(div->rds);
+		rd->seqid   = seqid;
+		rd->rank    = rank;
+		rd->seqoff  = div->seqs->size;
+		rd->seqlen1 = get_col_len(fr, 2);
+		rd->seqlen2 = get_col_len(fr, 3);
+		for(i=0;i<rd->seqlen1;i++) push_u8list(div->seqs, base_bit_table[(int)seq1[i]]);
+		for(i=0;i<rd->seqlen2;i++) push_u8list(div->seqs, base_bit_table[(int)seq2[i]]);
+		push_u32list(get_u32slist(div->grps, 0), rid);
+	}
+	if(last_gid) dividing(div, last_gid, out);
+	return ret;
+}
+
diff --git a/dna.h b/dna.h
new file mode 100644
index 0000000..58ec7be
--- /dev/null
+++ b/dna.h
@@ -0,0 +1,193 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __DNA_RJ_H
+#define __DNA_RJ_H
+
+#include <stdint.h>
+#include <stdlib.h>
+
+static const uint8_t base_bit_table[256] = {
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+
+static const uint8_t base_bit4_table[256] = {
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+
+	15,  1, 14,  2,  13, 15, 15,  4,  11, 15, 15, 12,  15,  3, 15, 15,
+	15, 15,  5,  6,   8, 15,  7,  9,  15, 10, 15, 15,  15, 15, 15, 15,
+	15,  1, 14,  2,  13, 15, 15,  4,  11, 15, 15, 12,  15,  3, 15, 15,
+	15, 15,  5,  6,   8, 15,  7,  9,  15, 10, 15, 15,  15, 15, 15, 15,
+
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,
+	15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15,  15, 15, 15, 15
+};
+
+static const uint8_t bit4_bit_table[16] = { 4, 0, 1, 4,  2, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4 };
+
+static const char bit_base_table[6] = "ACGTN-";
+static const char bit4_base_table[16] = "-ACMGRSVTWYHKDBN";
+
+static inline uint64_t dna_xor2ones(uint64_t seq){
+	return ((seq & 0xAAAAAAAAAAAAAAAALLU) >> 1) | (seq & 0x5555555555555555LLU);
+}
+
+static inline uint64_t dna_rev_seq(uint64_t seq, uint8_t seq_size){
+	seq = ~seq;
+	seq = ((seq & 0x3333333333333333LLU)<< 2) | ((seq & 0xCCCCCCCCCCCCCCCCLLU)>> 2);
+	seq = ((seq & 0x0F0F0F0F0F0F0F0FLLU)<< 4) | ((seq & 0xF0F0F0F0F0F0F0F0LLU)>> 4);
+	seq = ((seq & 0x00FF00FF00FF00FFLLU)<< 8) | ((seq & 0xFF00FF00FF00FF00LLU)>> 8);
+	seq = ((seq & 0x0000FFFF0000FFFFLLU)<<16) | ((seq & 0xFFFF0000FFFF0000LLU)>>16);
+	seq = ((seq & 0x00000000FFFFFFFFLLU)<<32) | ((seq & 0xFFFFFFFF00000000LLU)>>32);
+	return seq >> (64 - (seq_size<<1));
+}
+
+static inline uint64_t seq2kmer(char *seq, uint32_t ksize){
+	uint64_t kmer;
+	uint32_t i;
+	kmer = 0;
+	for(i=0;i<ksize;i++) kmer = (kmer << 2) | base_bit_table[(int)seq[i]];
+	return kmer;
+}
+
+static inline uint64_t seq2revkmer(char *seq, uint32_t ksize){
+	uint64_t kmer;
+	uint32_t i;
+	kmer = 0;
+	for(i=0;i<ksize;i++) kmer = (kmer << 2) | ((~base_bit_table[(int)seq[ksize - 1 - i]]) & 0x03);
+	return kmer;
+}
+
+#define kmer_mask(ksize) (0xFFFFFFFFFFFFFFFFLLU >> ((32 - (ksize)) * 2))
+
+#define beg_seq2kmers(seq, seqlen, ksize, kmask, kmer, idx) {	\
+kmer = 0;	\
+for(idx=0;idx<ksize-1;idx++) kmer = (((kmer) << 2) | base_bit_table[(int)(seq)[idx]]);	\
+for(idx=0;idx<=seqlen-ksize;idx++){	\
+	kmer = ((kmer << 2) | base_bit_table[(int)(seq)[idx + ksize - 1]]) & kmask;
+#define end_seq2kmers } }
+
+#define beg_seq2revkmers(seq, seqlen, ksize, kmask, kmer, idx) {	\
+kmer = 0;	\
+for(idx=0;idx<ksize-1;idx++) kmer = (((kmer) << 2) | base_bit_table[(int)(seq)[seqlen - 1 - idx]]);	\
+for(idx=0;idx<=seqlen-ksize;idx++){	\
+	kmer = ((kmer << 2) | base_bit_table[(int)(seq)[seqlen - idx - ksize]]) & kmask;
+#define end_seq2kmers } }
+
+static inline void reverse_dna(char *seq, int len){
+	int i, j;
+	char c;
+	i = 0;
+	j = len - 1;
+	while(i < j){
+		c = seq[i]; seq[i] = seq[j]; seq[j] = c;
+		i ++; j --;
+	}
+	for(i=0;i<len;i++){
+		switch(seq[i]){
+			case 'a': seq[i] = 't'; break;
+			case 'A': seq[i] = 'T'; break;
+			case 'c': seq[i] = 'g'; break;
+			case 'C': seq[i] = 'G'; break;
+			case 'g': seq[i] = 'c'; break;
+			case 'G': seq[i] = 'C'; break;
+			case 't': seq[i] = 'a'; break;
+			case 'T': seq[i] = 'A'; break;
+		}
+	}
+}
+
+#define bit2bits(bits, off, bit) { if(((off) & 0x1FU) == 0) (bits)[(off) >> 5] = 0; (bits)[(off) >> 5] |= ((uint64_t)(bit)) << (((~(off)) & 0x1FU) << 1); }
+
+static inline void seq2bits(uint64_t *bits, uint64_t bitoff, char *seq, uint32_t seqlen){
+	uint64_t i, c;
+	for(i=0;i<seqlen;i++){
+		c = base_bit_table[(int)seq[i]];
+		if(c == 4) c = lrand48() & 0x03;
+		bit2bits(bits, bitoff + i, c);
+	}
+}
+
+static inline void revseq2bits(uint64_t *bits, uint64_t bitoff, char *seq, uint32_t seqlen){
+	uint64_t i, c;
+	for(i=0;i<seqlen;i++){
+		c = base_bit_table[(int)seq[seqlen - i - 1]];
+		if(c == 4) c = lrand48();
+		c = (~c) & 0x03;
+		bit2bits(bits, bitoff + i, c);
+	}
+}
+
+#define bits2bit(bits, off) (((bits)[(off) >> 5] >> (((~(off)) & 0x1FU) << 1)) & 0x03U)
+
+static inline void bits2seq(char *seq, uint64_t *bits, uint64_t off, uint32_t len){
+	uint32_t i, c;
+	for(i=0;i<len;i++){
+		c = bits2bit(bits, off + i);
+		seq[i] = bit_base_table[c];
+	}
+	seq[i] = 0;
+}
+
+static inline void bits2revseq(char *seq, uint64_t *bits, uint64_t off, uint32_t len){
+	uint32_t i, c;
+	for(i=0;i<len;i++){
+		c = (bits[(off + i)>>5] >> (((~(off + i)) & 0x1FU) << 1)) & 0x03;
+		seq[len - i - 1] = bit_base_table[(~c)&0x03];
+	}
+	seq[i] = 0;
+}
+
+static inline uint64_t sub32seqbits(uint64_t *src, uint64_t off){
+	if((off & 0x1F) == 0){
+		return src[off>>5];
+	} else {
+		return (src[off>>5] << ((off & 0x1F) << 1)) | (src[(off>>5)+1] >> ((32 - (off & 0x1F)) << 1));
+	}
+}
+
+#endif
diff --git a/ezmsim.c b/ezmsim.c
new file mode 100644
index 0000000..f6c1bcd
--- /dev/null
+++ b/ezmsim.c
@@ -0,0 +1,770 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <assert.h>
+
+#define PACKAGE_VERSION "0.1.1"
+
+uint8_t nst_nt4_table[256] = {
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 5 /*'-'*/, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+
+enum muttype_t {NOCHANGE = 0, INSERT = 0x1000, SUBSTITUTE = 0xe000, DELETE = 0xf000};
+typedef unsigned short mut_t;
+static mut_t mutmsk = (mut_t)0xf000;
+
+typedef struct {
+	int l, m; /* length and maximum buffer size */
+	unsigned char *s; /* sequence */
+} seq_t;
+
+typedef struct {
+	int l, m; /* length and maximum buffer size */
+	mut_t *s; /* sequence */
+} mutseq_t;
+
+typedef struct {
+	uint64_t l, m;
+	uint64_t *idx;
+} idx_t;
+
+#define INIT_SEQ(seq) (seq).s = 0; (seq).l = (seq).m = 0
+#define INIT_IDX(index) (index).idx = 0; (index).l = (index).m = 0
+
+static int SEQ_BLOCK_SIZE = 512;
+
+void seq_set_block_size(int size)
+{
+	SEQ_BLOCK_SIZE = size;
+}
+
+int seq_read_fasta(FILE *fp, seq_t *seq, char *locus, char *comment)
+{
+	int c, l, max;
+	char *p;
+	
+	c = 0;
+	while (!feof(fp) && fgetc(fp) != '>');
+	if (feof(fp)) return -1;
+	p = locus;
+	while (!feof(fp) && (c = fgetc(fp)) != ' ' && c != '\t' && c != '\n')
+		if (c != '\r') *p++ = c;
+	*p = '\0';
+	if (comment) {
+		p = comment;
+		if (c != '\n') {
+			while (!feof(fp) && ((c = fgetc(fp)) == ' ' || c == '\t'));
+			if (c != '\n') {
+				*p++ = c;
+				while (!feof(fp) && (c = fgetc(fp)) != '\n')
+					if (c != '\r') *p++ = c;
+			}
+		}
+		*p = '\0';
+	} else if (c != '\n') while (!feof(fp) && fgetc(fp) != '\n');
+	l = 0; max = seq->m;
+	while (!feof(fp) && (c = fgetc(fp)) != '>') {
+		if (isalpha(c) || c == '-' || c == '.') {
+			if (l + 1 >= max) {
+				max += SEQ_BLOCK_SIZE;
+				seq->s = (unsigned char*)realloc(seq->s, sizeof(char) * max);
+			}
+			seq->s[l++] = (unsigned char)c;
+		}
+	}
+	if (c == '>') ungetc(c,fp);
+	seq->s[l] = 0;
+	seq->m = max; seq->l = l;
+	return l;
+}
+
+/* Error-checking open, copied from utils.c */
+
+#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
+
+FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
+{
+	FILE *fp = 0;
+	if (strcmp(fn, "-") == 0)
+		return (strstr(mode, "r"))? stdin : stdout;
+	if ((fp = fopen(fn, mode)) == 0) {
+		fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
+		abort();
+	}
+	return fp;
+}
+
+static double ERR_RATE = 0.02;
+static double DEPTH = 10.0;
+static double MUT_RATE = 0.001;
+static double HOM_RATE = 0.0;
+static double INDEL_FRAC = 0.1;
+static double INDEL_EXTEND = 0.3;
+
+void uc(unsigned char *s)
+{
+	while (*s) {
+		*s = toupper(*s);
+		s++;
+	}
+}
+
+
+int strindex(idx_t *index, unsigned char *s, unsigned char *t)
+{
+	uint64_t i, j, k;
+	uint64_t l, max;
+
+	i = 0;
+	while (t[i]) {
+		if (nst_nt4_table[(int)t[i]] == 4) {
+			//printf("here%c\n", *t);
+			return -1;
+		}
+		i++;
+	}
+	
+	l = max = 0;
+	for (i = 0; s[i] != '\0'; i++) {
+		for (j=i, k=0; t[k]!='\0' && s[j]==t[k]; j++, k++)
+			;
+		if (k > 0 && t[k] == '\0') {
+			if (l + 1 >= max) {
+		  		max += SEQ_BLOCK_SIZE;
+				index->idx = (uint64_t*)realloc(index->idx, sizeof(uint64_t) * max);
+			}
+			index->idx[l++] = i;
+		}
+	}
+
+	if (l) {
+		index->l = l;
+		index->m = max;
+		index->idx[l] = -1;
+		return l;
+	}
+	else 
+		return -1;
+}
+
+/* Simple normal random number generator, copied from genran.c */
+
+double ran_normal()
+{ 
+	static int iset = 0; 
+	static double gset; 
+	double fac, rsq, v1, v2; 
+	if (iset == 0) {
+		do { 
+			v1 = 2.0 * drand48() - 1.0;
+			v2 = 2.0 * drand48() - 1.0; 
+			rsq = v1 * v1 + v2 * v2;
+		} while (rsq >= 1.0 || rsq == 0.0);
+		fac = sqrt(-2.0 * log(rsq) / rsq); 
+		gset = v1 * fac; 
+		iset = 1;
+		return v2 * fac;
+	} else {
+		iset = 0;
+		return gset;
+	}
+}
+void maq_mut_diref(const seq_t *seq, int is_hap, mutseq_t *hap1, mutseq_t *hap2)
+{
+	int i, deleting = 0;
+	mutseq_t *ret[2];
+
+	ret[0] = hap1; ret[1] = hap2;
+	ret[0]->l = seq->l; ret[1]->l = seq->l;
+	ret[0]->m = seq->m; ret[1]->m = seq->m;
+	ret[0]->s = (mut_t *)calloc(seq->m, sizeof(mut_t));
+	ret[1]->s = (mut_t *)calloc(seq->m, sizeof(mut_t));
+	for (i = 0; i != seq->l; ++i) {
+		int c;
+		c = ret[0]->s[i] = ret[1]->s[i] = (mut_t)nst_nt4_table[(int)seq->s[i]];
+        if (deleting) {
+            if (drand48() < INDEL_EXTEND) {
+                if (deleting & 1) ret[0]->s[i] |= DELETE;
+                if (deleting & 2) ret[1]->s[i] |= DELETE;
+                continue;
+            } else deleting = 0;
+        }
+		if (c < 4 && drand48() < MUT_RATE) { // mutation
+			if (drand48() >= INDEL_FRAC) { // substitution
+				double r = drand48();
+				c = (c + (int)(r * 3.0 + 1)) & 3;
+				if (is_hap || drand48() < HOM_RATE) { // hom
+					ret[0]->s[i] = ret[1]->s[i] = SUBSTITUTE|c;
+				} else { // het
+					ret[drand48()<0.5?0:1]->s[i] = SUBSTITUTE|c;
+				}
+			} else { // indel
+				if (drand48() < 0.5) { // deletion
+					if (is_hap || drand48() < HOM_RATE) { // hom-del
+						ret[0]->s[i] = ret[1]->s[i] = DELETE;
+                        deleting = 3;
+					} else { // het-del
+                        deleting = drand48()<0.5?1:2;
+						ret[deleting-1]->s[i] = DELETE;
+					}
+				} else { // insertion
+                    int num_ins = 0, ins = 0;
+                    do {
+                        num_ins++;
+                        ins = (ins << 2) | (int)(drand48() * 4.0);
+                    } while(num_ins < 4 && drand48() < INDEL_EXTEND);
+
+					if (is_hap || drand48() < HOM_RATE) { // hom-ins
+						ret[0]->s[i] = ret[1]->s[i] = (num_ins << 12) | (ins << 4) | c;
+					} else { // het-ins
+						ret[drand48()<0.5?0:1]->s[i] = (num_ins << 12) | (ins << 4) | c;
+					}
+				}
+			}
+		}
+	}
+}
+void maq_print_mutref(const char *name, const seq_t *seq, mutseq_t *hap1, mutseq_t *hap2)
+{
+	int i;
+	for (i = 0; i != seq->l; ++i) {
+		int c[3];
+		c[0] = nst_nt4_table[(int)seq->s[i]];
+		c[1] = hap1->s[i]; c[2] = hap2->s[i];
+		if (c[0] >= 4) continue;
+		if ((c[1] & mutmsk) != NOCHANGE || (c[2] & mutmsk) != NOCHANGE) {
+			printf("%s\t%d\t", name, i+1);
+			if (c[1] == c[2]) { // hom
+				if ((c[1]&mutmsk) == SUBSTITUTE) { // substitution
+					printf("%c\t%c\t-\n", "ACGTN"[c[0]], "ACGTN"[c[1]&0xf]);
+				} else if ((c[1]&mutmsk) == DELETE) { // del
+					printf("%c\t-\t-\n", "ACGTN"[c[0]]);
+				} else if (((c[1] & mutmsk) >> 12) <= 5) { // ins
+					printf("-\t");
+                    int n = (c[1]&mutmsk) >> 12, ins = c[1] >> 4;
+                    while(n > 0) {
+                        putchar("ACGTN"[ins & 0x3]);
+                        n--;
+                    }
+                    printf("\t-\n");
+				}  else assert(0);
+			} else { // het
+				if ((c[1]&mutmsk) == SUBSTITUTE || (c[2]&mutmsk) == SUBSTITUTE) { // substitution
+					printf("%c\t%c\t+\n", "ACGTN"[c[0]], "XACMGRSVTWYHKDBN"[1<<(c[1]&0x3)|1<<(c[2]&0x3)]);
+				} else if ((c[1]&mutmsk) == DELETE) {
+					printf("%c\t-\t+\n", "ACGTN"[c[0]]);
+				} else if ((c[2]&mutmsk) == DELETE) {
+					printf("%c\t-\t+\n", "ACGTN"[c[0]]);
+				} else if (((c[1] & mutmsk) >> 12) <= 4) { // ins1
+					printf("-\t");
+                    int n = (c[1]&mutmsk) >> 12, ins = c[1] >> 4;
+                    while (n > 0) {
+                        putchar("ACGTN"[ins & 0x3]);
+                        n--;
+                    }
+                    printf("\t+\n");
+				} else if (((c[2] & mutmsk) >> 12) <= 5) { // ins2
+					printf("-\t");
+                    int n = (c[2]&mutmsk) >> 12, ins = c[2] >> 4;
+                    while (n > 0) {
+                        putchar("ACGTN"[ins & 0x3]);
+                        ins >>= 2;
+                        n--;
+                    }
+                    printf("\t+\n");
+				} else assert(0);
+			}
+		}
+	}
+}
+
+
+//wiki knuth method
+int poisson_num_gen(double lamda)
+{
+	int k = 0;
+	double L = exp(-lamda);
+	double p = 1.0;
+	
+	do {
+		k++;
+		p = p * drand48();
+	} while (p > L);
+
+	return k-1;
+}
+
+void ezmsim_LR_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int size_l, int size_r, unsigned char *cut, int pos)
+{
+	idx_t index;
+	seq_t seq;
+	uint64_t tot_len, dep;
+	unsigned int i, k;
+	int len, n_ref, j, size[2], Q, m;
+	char name[256], *qstr;
+	uint8_t *tmp_seq[2], c;
+	uint64_t id;
+
+	INIT_SEQ(seq);
+	INIT_IDX(index);
+
+	Q = (int)(-10.0 * log(ERR_RATE) / log(10.0) + 0.499) + 33;
+
+	srand48(time(0));
+	seq_set_block_size(0x1000000);
+	len = size_l > size_r?size_l:size_r;
+	qstr = (char*)calloc(len+1, 1);
+	tmp_seq[0] = (uint8_t*)calloc(len+2, 1);
+	tmp_seq[1] = (uint8_t*)calloc(len+2, 1);
+	size[0] = size_l; size[1] = size_r;
+	tot_len = n_ref = 0; id = 0;
+	while ((len = seq_read_fasta(fp_fa, &seq, name, 0)) >= 0) {
+		uc(seq.s);
+		uc(cut);
+		if (strindex(&index, seq.s, cut) != -1)
+			printf("chromsome %s has %llu digest sites\n", name, (unsigned long long)index.l);
+		
+		for (i = 0; i < index.l; i++) {
+			if (i - size_l <= 0 || i + size_r > (unsigned int)len)
+				continue;
+			dep = poisson_num_gen(DEPTH);
+			for (k = 0; k < dep; k++) {
+				FILE *fpo[2];
+				int is_flip = 0, s[2];
+				id++;
+
+				if (drand48() < 0.5) {
+					fpo[0] = fpout1; fpo[1] = fpout2;
+					s[0] = size[0]; s[1] = size[1];
+				} else {
+					fpo[1] = fpout1; fpo[0] = fpout2;
+					s[1] = size[0]; s[0] = size[1];
+					is_flip = 1;
+				}
+
+				for (j = 0; j < s[0]; j++) {
+					c = nst_nt4_table[(int)seq.s[index.idx[i]+pos-j-1]];
+					if (c >= 4) c = 4;
+					else if (drand48() < ERR_RATE) {
+						c = (c + (int)(drand48()*3.0 + 1)) & 3;
+					}
+					tmp_seq[0][j] = c;
+				}
+
+				for (j = 0; j < s[1]; j++) {
+					c = nst_nt4_table[(int)seq.s[index.idx[i]+pos+j]];
+					if (c >= 4) c = 4;
+					else if (drand48() < ERR_RATE) {
+						c = (c + (int)(drand48()*3.0 + 1)) & 3;
+					}
+					tmp_seq[1][j] = c < 4?3-c:4;
+				}
+
+				for (m = 0; m < 2; m++) {
+					fprintf(fpo[m], "@%s_%s_%llu_%llu/%d\n", name, cut, (unsigned long long)index.idx[i], (unsigned long long)id, m==0?is_flip+1:2-is_flip);
+					for (j = 0; j < s[m]; j++) {
+						qstr[j] = Q;
+						fputc("ACGTN"[(int)tmp_seq[m][j]], fpo[m]);
+					} 
+					qstr[j] = 0;
+					fprintf(fpo[m], "\n+\n%s\n", qstr);
+				}
+			}
+			//fprintf(stderr, "%llu ", index.idx[i]);
+		}
+		printf("\n");
+		
+		tot_len += len;
+		++n_ref;
+	}
+	fprintf(stderr, "-- %d sequences, total length: %llu\n", n_ref, (unsigned long long)tot_len);
+	rewind(fp_fa);
+	
+	free(seq.s);
+	free(index.idx);
+}
+
+int LR_usage() {
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Program: ezmsim (simulate enzyme cut assembly sequences)\n");
+	fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
+	fprintf(stderr, "Contact: Zechen Chong <chongzechen at gmail.com>\n\n");
+	fprintf(stderr, "Usage: ezmsim LR [options] <-z enzyme> <in.ref.fa> <out.read1.fq> <out.read2.fq>\n\n");
+	fprintf(stderr, "Options: -e FLOAT    base error rate [%.3f]\n", ERR_RATE);
+	fprintf(stderr, "         -D FLOAT    read depth [%.1f]\n", DEPTH);
+	fprintf(stderr, "         -1 INT      length of the first read [100]\n");
+	fprintf(stderr, "         -2 INT      length of the second read [100]\n");
+	fprintf(stderr, "         -z STRING   enzyme sequence (must be specified)\n");
+	fprintf(stderr, "         -p INT      enzyme cut position [length(enzyme)/2]\n");
+	fprintf(stderr, "\n");
+	return 1;
+}
+
+int LR_main(int argc, char *argv[])
+{
+	unsigned int size_l, size_r;
+	FILE *fpout1, *fpout2, *fp_fa;
+	char *cut = "";
+
+	int c, pos; 
+	pos = -1;
+	//dist = 250; 
+	//std_dev = 20;
+	size_l = size_r = 100;
+	while ((c = getopt(argc, argv, "e:D:1:2:d:s:z:p:")) != -1) {
+	switch (c) {
+		case 'e': ERR_RATE = atof(optarg); break;
+		case 'D': DEPTH = atof(optarg); break;
+		case '1': size_l = atoi(optarg); break;
+		case '2': size_r = atoi(optarg); break;
+		case 'z': cut = strdup(optarg);
+		case 'p': pos = atoi(optarg);
+				  uc((unsigned char*)cut); break;
+		//case 'd': dist = atoi(optarg); break;
+		//case 's': std_dev = atoi(optarg); break;
+		default:
+				  //printf("unknown option: %c\n", optopt);
+				  //return 1;
+				  return LR_usage();
+		}
+	}
+	if (strlen(cut)==0) {
+		//fprintf(stderr, "parameter z (enzyme cut) must be specified\n");
+		return LR_usage();
+	}
+	if (argc - optind < 3) {
+		fprintf(stderr, "files must be specified\n");
+		return LR_usage();
+	}
+	fp_fa = (strcmp(argv[optind+0], "-") == 0)?stdin:xopen(argv[optind+0], "r");
+	fpout1 = xopen(argv[optind+1], "w");
+	fpout2 = xopen(argv[optind+2], "w");
+
+	if (pos == -1) {
+		pos = strlen(cut)/2;
+	}
+	ezmsim_LR_core(fpout1, fpout2, fp_fa, size_l, size_r, (unsigned char*)cut, pos);
+	
+	fclose(fp_fa); fclose(fpout1); fclose(fpout2);
+	return 0;
+}
+
+void ezmsim_EF_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, unsigned int size_l, unsigned int size_r, unsigned char *cut, int pos, int distance, int ovlp, int stp, int reverse, int is_hap)
+{
+	idx_t index;
+	seq_t seq;
+	uint64_t tot_len, dep, i, k;
+	int len, n_ref, j, size[2], Q, m, n;
+	char name[256], *qstr;
+	uint8_t *tmp_seq[2], c;
+	uint64_t id;
+	int dist, overlap, step, rev;
+	mutseq_t rseq[2];
+	mut_t *target;
+
+	INIT_SEQ(seq);
+	INIT_IDX(index);
+
+	Q = (int)(-10.0 * log(ERR_RATE) / log(10.0) + 0.499) + 33;
+
+	srand48(time(0));
+	seq_set_block_size(0x1000000);
+	len = size_l > size_r?size_l:size_r;
+	qstr = (char*)calloc(len+1, 1);
+	tmp_seq[0] = (uint8_t*)calloc(len+2, 1);
+	tmp_seq[1] = (uint8_t*)calloc(len+2, 1);
+	size[0] = size_l; size[1] = size_r;
+	tot_len = n_ref = 0; id = 0;
+	dist = distance; overlap = ovlp; step = stp; rev = reverse;
+
+	while ((len = seq_read_fasta(fp_fa, &seq, name, 0)) >= 0) {
+		uc(seq.s);
+		uc(cut);
+		if (strindex(&index, seq.s, cut) != -1)
+			printf("chromsome %s has %llu digest sites\n", name, (unsigned long long)index.l);
+		
+		maq_mut_diref(&seq, is_hap, rseq, rseq+1);
+		maq_print_mutref(name, &seq, rseq, rseq+1);
+
+		for (i = 0; i < index.l; i++) {
+			if (len < (dist + overlap*step) * 2) {
+				fprintf(stderr, "[ezmsim_core] skip sequence '%s' as it is shorter than %d!\n", name, (dist + overlap*step)*2);
+				continue;
+			}
+			
+			for (n = 0; n < step; n++) {
+				dep = poisson_num_gen(DEPTH);
+				for (k = 0; k < dep; k++) {
+					FILE *fpo[2];
+					int is_flip = 0, s[2], d;
+					id++;
+
+					d = dist + (int)(drand48()*overlap);
+
+					//if (drand48() < 0.5) {
+					if (!rev) {
+						fpo[0] = fpout1; fpo[1] = fpout2;
+						s[0] = size[0]; s[1] = size[1];
+					} else {
+						fpo[1] = fpout1; fpo[0] = fpout2;
+						s[1] = size[0]; s[0] = size[1];
+						is_flip = 1;
+					}
+					//generate the read sequences
+					target = rseq[drand48()<0.5?0:1].s;
+					int ii, begin, end;
+					for (ii = index.idx[i]+pos, j = 0, begin = 0; ii < seq.l && j < s[0]; ++ii) {
+						int c = target[ii];
+						int mut_type = c & mutmsk;
+						if (mut_type == DELETE) continue; // deletion
+						if (begin == 0) {
+							begin = ii;
+							if (mut_type != NOCHANGE && mut_type != SUBSTITUTE) mut_type = NOCHANGE; // skip ins at the first base
+						}
+						if(mut_type == NOCHANGE || mut_type == SUBSTITUTE) {
+							tmp_seq[0][j++] = c&0xf;
+							continue;
+						}
+						int n = mut_type >> 12, ins = c >> 4;
+						while (n > 0) {
+							tmp_seq[0][j++] = ins & 0x3;
+							ins >>= 2;
+							n--;
+							if ((int)k == s[0]) break;
+						}
+						tmp_seq[0][j++] = c&0xf;
+					}
+					for (ii = index.idx[i]+pos+d-1, j = 0, end = 0; ii>=0 && j < s[1];--ii) {
+						int c = target[ii];
+						if ((c&mutmsk) == DELETE) continue; // deletion
+						if (end == 0) end = i;
+						tmp_seq[1][j++] = c&0xf;
+						if((c&mutmsk) == NOCHANGE || (c&mutmsk) == SUBSTITUTE) continue;
+						int n = (c&mutmsk) >> 12, ins = c >> 4;
+						while (n > 0) {
+							if (j == s[1]) break;
+							tmp_seq[1][j++] = ins & 0x3;
+							ins >>= 2;
+							n--;
+						}
+					}
+					for (j = 0; j < s[0]; j++) {
+						c = tmp_seq[0][j];
+						//c = nst_nt4_table[(int)seq.s[index.idx[i]+pos+j]];
+						if (c >= 4) c = 4;
+						else if (drand48() < ERR_RATE) {
+							c = (c + (int)(drand48()*3.0+1)) & 3;
+						}
+						tmp_seq[0][j] = c;
+					}
+
+					for (j = 0; j < s[1]; j++) {
+						c = tmp_seq[1][j];
+						//c = nst_nt4_table[(int)seq.s[index.idx[i]+pos+d-j]];
+						if (c >= 4) c = 4;
+						else if (drand48() < ERR_RATE) {
+							c = (c + (int)(drand48()*3.0 + 1)) & 3;
+						}
+						tmp_seq[1][j] = c<4?3-c:4;
+					}
+					
+					for (m = 0; m < 2; m++) {
+						fprintf(fpo[m], "@%s_%s_%llu_%llu_%d/%d\n", name, cut, (unsigned long long)index.idx[i], (unsigned long long)id, d, m==0?is_flip+1:2-is_flip);
+						for (j = 0; j < s[m]; j++) {
+							qstr[j] = Q;
+							fputc("ACGTN"[(int)tmp_seq[m][j]], fpo[m]);
+						} 
+						qstr[j] = 0;
+						fprintf(fpo[m], "\n+\n%s\n", qstr);
+					}
+
+				}
+				/*
+				dep = poisson_num_gen(DEPTH);
+				for (k = 0; k < dep; k++) {
+					FILE *fpo[2];
+					int is_flip = 0, s[2], d;
+					id++;
+
+					d = dist + (int)(drand48()*step);
+
+					//if (drand48() < 0.5) {
+					if (!rev) {
+						fpo[0] = fpout1; fpo[1] = fpout2;
+						s[0] = size[0]; s[1] = size[1];
+					} else {
+						fpo[1] = fpout1; fpo[0] = fpout2;
+						s[1] = size[0]; s[0] = size[1];
+						is_flip = 1;
+					}
+
+					for (j = 0; j < s[0]; j++) {
+						c = nst_nt4_table[(int)seq.s[index.idx[i]+pos-j-1]];
+						if (c >= 4) c = 4;
+						else if (drand48() < ERR_RATE) {
+							c = (c + (int)(drand48()*3.0+1)) & 3;
+						}
+						tmp_seq[0][j] = c;
+					}
+
+					for (j = 0; j < s[1]; j++) {
+						c = nst_nt4_table[(int)seq.s[index.idx[i]+pos-d+j]];
+						if (c >= 4) c = 4;
+						else if (drand48() < ERR_RATE) {
+							c = (c + (int)(drand48()*3.0 + 1)) & 3;
+						}
+						tmp_seq[1][j] = c<4?3-c:4;
+					}
+					
+					for (m = 0; m < 2; m++) {
+						fprintf(fpo[m], "@%s_%s_%lld_%lld_%d/%d\n", name, cut, index.idx[i], id, d, m==0?is_flip+1:2-is_flip);
+						for (j = 0; j < s[m]; j++) {
+							qstr[j] = Q;
+							fputc("ACGTN"[(int)tmp_seq[m][j]], fpo[m]);
+						} 
+						qstr[j] = 0;
+						fprintf(fpo[m], "\n+\n%s\n", qstr);
+					}
+				}*/
+				dist += overlap;
+			}
+			dist = distance;
+		}
+		tot_len += len;
+		++n_ref;
+	}
+	fprintf(stderr, "-- %d sequences, total length: %llu\n", n_ref, (unsigned long long)tot_len);
+	
+	free(seq.s);
+	free(index.idx);
+}
+
+int EF_usage()
+{
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Program: ezmsim (simulate enzyme cut assembly sequences)\n");
+	fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
+	fprintf(stderr, "Contact: Zechen Chong <chongzechen at gmail.com>\n\n");
+	fprintf(stderr, "Usage: ezmsim RAD [options] <-z enzyme> <in.ref.fa> <out.read1.fq> <out.read2.fq>\n\n");
+	fprintf(stderr, "Options: -e FLOAT    base error rate [%.3f]\n", ERR_RATE);
+	fprintf(stderr, "         -D FLOAT    read depth [%.1f]\n", DEPTH);
+	fprintf(stderr, "         -1 INT      length of the first read [100]\n");
+	fprintf(stderr, "         -2 INT      length of the second read [100]\n");
+	fprintf(stderr, "         -z STRING   enzyme sequence (must be specified)\n");
+	fprintf(stderr, "         -p INT      enzyme cut position [length(enzyme)/2]\n");
+	fprintf(stderr, "         -d INT      initial insert size distance [120]\n");
+	//fprintf(stderr, "         -s INT      standard deviation of insert size [20]\n");
+	fprintf(stderr, "         -o INT      insert size overlap distance [50]\n");
+	fprintf(stderr, "         -t INT      elongation steps of insert size [10]\n");
+	fprintf(stderr, "         -h FLOAT    rate of homozygosity[%.4f]\n", HOM_RATE);
+	fprintf(stderr, "         -m FLOAT    rate of mutation[%.4f]\n", MUT_RATE);
+	fprintf(stderr, "         -R FLOAT    fraction of indels [%.2f]\n", INDEL_FRAC);
+	fprintf(stderr, "         -X FLOAT    probability an indel is extended [%.2f]\n", INDEL_EXTEND);
+	fprintf(stderr, "         -r          reverse or not [forward only]\n");
+	fprintf(stderr, "         -H          haploid mode\n");
+	fprintf(stderr, "\n");
+	return 1;
+}
+
+int EF_main(int argc, char *argv[])
+{
+	int c, size_l, size_r, pos, dist, overlap, step, rev, is_hap = 0;
+	FILE *fpout1, *fpout2, *fp_fa;
+	char *cut = "";
+
+	pos = -1;
+	dist = 120;  //initial distance
+	//std_dev = 20; 
+	overlap = 50;
+	size_l = size_r = 100;
+	step = 10; rev = 0;
+	while ((c = getopt(argc, argv, "e:D:1:2:d:s:z:p:o:t:R:rh:Hm:")) != -1) {
+	switch (c) {
+		case 'e': ERR_RATE = atof(optarg); break;
+		case 'D': DEPTH = atof(optarg); break;
+		case '1': size_l = atoi(optarg); break;
+		case '2': size_r = atoi(optarg); break;
+		case 'z': cut = strdup(optarg);
+		case 'p': pos = atoi(optarg);
+				  uc((unsigned char*)cut); break;
+		case 'd': dist = atoi(optarg); break;
+		//case 's': std_dev = atoi(optarg); break;
+		case 'o': overlap = atoi(optarg); break;
+		case 't': step = atoi(optarg); break;
+		case 'r': rev = 1; break;
+		case 'h': HOM_RATE = atof(optarg); break;
+		case 'H': is_hap = 1; break;
+		case 'm': MUT_RATE = atof(optarg); break;
+		case 'R': INDEL_FRAC = atof(optarg); break;
+		case 'X': INDEL_EXTEND = atof(optarg); break;
+		default:
+				  //printf("unknown option: %c\n", optopt);
+				  //return 1;
+				  return EF_usage();
+		}
+	}
+	if (strlen(cut)==0) {
+		//fprintf(stderr, "parameter z (enzyme cut) must be specified\n");
+		return EF_usage();
+	}
+	if (argc - optind < 3) {
+		fprintf(stderr, "files must be specified\n");
+		return EF_usage();
+	}
+	fp_fa = (strcmp(argv[optind+0], "-") == 0)?stdin:xopen(argv[optind+0], "r");
+	fpout1 = xopen(argv[optind+1], "w");
+	fpout2 = xopen(argv[optind+2], "w");
+
+	if (pos == -1) {
+		pos = strlen(cut)/2;
+	}
+	ezmsim_EF_core(fpout1, fpout2, fp_fa, size_l, size_r, (unsigned char*)cut, pos, dist, overlap, step, rev, is_hap);
+	
+	fclose(fp_fa); fclose(fpout1); fclose(fpout2);
+	return 0;
+}
+
+int usage()
+{
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Program: ezmsim (simulate enzyme cut assembly sequences)\n");
+	fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
+	fprintf(stderr, "Contact: Zechen Chong <chongzechen at gmail.com>\n\n");
+	fprintf(stderr, "Usage: ezmsim <LR|EF> [options]\n\n");
+	fprintf(stderr, "Options: LR          simulate LR reads\n");
+	fprintf(stderr, "         RAD          simulate RAD reads\n");
+	fprintf(stderr, "\n");
+	return 1;
+}
+
+int main (int argc, char *argv[])
+{
+	if (argc < 2) return usage();
+	if (strcmp(argv[1], "LR") == 0) return LR_main(argc-1, argv+1);
+	else if (strcmp(argv[1], "RAD") == 0) return EF_main(argc-1, argv+1);
+	else {
+		fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
+		return 1;
+	}
+	return 0;
+}
diff --git a/file_reader.c b/file_reader.c
new file mode 100644
index 0000000..1fdfa98
--- /dev/null
+++ b/file_reader.c
@@ -0,0 +1,416 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#include "file_reader.h"
+#include <stdlib.h>
+#include <string.h>
+
+FileReader* fopen_m_filereader(int n_file, char **filenames){
+	FileReader *fr;
+	fr_file_t *fc;
+	char *cmd;
+	int i;
+	fr = (FileReader*)malloc(sizeof(FileReader));
+	fr->files = init_vec(sizeof(fr_file_t), n_file);
+	for(i=0;i<n_file;i++){
+		fc = get_next_vec_ref(fr->files);
+		if(filenames[i] == NULL || strcmp(filenames[i], "-") == 0){
+			fc->file = stdin;
+			fc->filename = NULL;
+		} else if(strlen(filenames[i]) > 3 && strcmp(filenames[i] + strlen(filenames[i]) - 3, ".gz") == 0){
+			cmd = (char*)malloc(strlen(filenames[i]) + 20);
+			sprintf(cmd, "gzip -dc %s", filenames[i]);
+			fc->filename = (char*)malloc(sizeof(char)* (strlen(filenames[i])+1));
+			fc->file = popen(cmd, "r");
+			free(cmd);
+		} else if((fc->file = fopen(filenames[i], "r")) != NULL){
+			fc->filename = (char*)malloc(sizeof(char)* (strlen(filenames[i])+1));
+			strcpy(fc->filename, filenames[i]);
+		} else {
+			return NULL;
+		}
+	}
+	fr->fidx  = 0;
+	fr->ptr   = 0;
+	fr->last_brk = 0;
+	fr->size  = 0;
+	fr->capacity = 512;
+	fr->buffer = (char*)malloc(fr->capacity + 2);
+	fr->line_breaker = '\n';
+	fr->delimiter    = '\t';
+	fr->line = init_string(81);
+	fr->vline = NULL;
+	fr->tabs = init_vec(sizeof(VirtualString), 12);
+	return fr;
+}
+
+FileReader* fopen_filereader(char *filename){
+	char *filenames[1];
+	filenames[0] = filename;
+	return fopen_m_filereader(1, filenames);
+}
+
+FileReader* fopen_filereader2(char *prefix, char *postfix){
+	char *filename;
+	filename = alloca(strlen(prefix) + strlen(postfix) + 1);
+	filename[0] = 0;
+	strcat(filename, prefix);
+	strcat(filename, postfix);
+	return fopen_filereader(filename);
+}
+
+FileReader* stdin_filereader(){
+	return fopen_filereader(NULL);
+}
+
+FileReader* string_filereader(char *string){
+	FileReader *fr = (FileReader*)malloc(sizeof(FileReader));
+	fr->files = init_vec(sizeof(fr_file_t), 1);
+	fr->fidx = 0;
+	fr->ptr   = 0;
+	fr->last_brk = 0;
+	fr->size  = strlen(string);
+	fr->capacity = fr->size;
+	fr->buffer = string;
+	fr->line_breaker = '\n';
+	fr->delimiter    = '\t';
+	fr->line = init_string(81);
+	fr->vline = NULL;
+	fr->tabs = init_vec(sizeof(VirtualString), 12);
+	return fr;
+}
+
+void fclose_filereader(FileReader *fr){
+	fr_file_t *fc;
+	size_t i;
+	for(i=0;i<vec_size(fr->files);i++){
+		fc = get_vec_ref(fr->files, i);
+		if(fc->file && fc->file != stdin){
+			if(fc->filename && strlen(fc->filename) > 3 && strcmp(fc->filename + strlen(fc->filename) - 3, ".gz") == 0) pclose(fc->file);
+			else if(fc->file != stdin) fclose(fc->file);
+		}
+		if(fc->filename) free(fc->filename);
+	}
+	free_vec(fr->files);
+	if(fr->buffer != NULL) free(fr->buffer);
+	fr->buffer = NULL;
+	if(fr->line){ free_string(fr->line); }
+	if(fr->vline){ free_string(fr->vline); }
+	free_vec(fr->tabs);
+	free(fr);
+}
+
+static inline int fr_fread(void *buf, size_t e_size, size_t size, FILE *in){
+	size_t n;
+	int c;
+	if(in != stdin || e_size > 1) return fread(buf, e_size, size, in);
+	n = 0;
+	while(n < size){
+		c = getchar();
+		if(c == -1) break;
+		else ((char*)buf)[n++] = c;
+		if(c == '\n') break;
+	}
+	return n;
+}
+
+int fread_line2(String *line, FileReader *fr){
+	int ret, last_ptr, n;
+	ret = 0;
+	last_ptr = fr->ptr;
+	while(1){
+		if(last_ptr < fr->size){
+			while(last_ptr < fr->size){
+				if(fr->buffer[last_ptr++] == fr->line_breaker){ ret = 1; break; }
+			}
+			if(ret == 1) break;
+		} else if(fr->fidx < vec_size(fr->files)) {
+			if(fr->ptr){
+				memmove(fr->buffer, fr->buffer + fr->ptr, fr->size - fr->ptr);
+				last_ptr -= fr->ptr;
+				fr->size -= fr->ptr;
+				fr->ptr = 0;
+			}
+			if(fr->size == fr->capacity){
+				fr->capacity += 4 * 1024;
+				fr->buffer = (char*)realloc(fr->buffer, fr->capacity + 2);
+			}
+			n = fr_fread(fr->buffer + fr->size, sizeof(char), fr->capacity - fr->size, ((fr_file_t*)get_vec_ref(fr->files, fr->fidx))->file);
+			if(n == 0){
+				fr->fidx ++;
+			} else {
+				fr->size += n;
+			}
+		} else {
+			break;
+		}
+	}
+	if(last_ptr > fr->ptr){
+		append_string(line, fr->buffer + fr->ptr, last_ptr - fr->ptr - ret);
+	} else ret = -1;
+	fr->last_brk = fr->ptr;
+	fr->ptr = last_ptr;
+	return ret;
+}
+
+int fread_line(String *line, FileReader *fr){
+	clear_string(line);
+	if(fread_line2(line, fr) < 0){
+		return -1;
+	} else {
+		return line->size;
+	}
+}
+
+int froll_back(FileReader *fr){
+	if(fr->last_brk >= fr->ptr) return 0;
+	fr->ptr      = fr->last_brk;
+	return 1;
+}
+
+int* init_delimiters(char *expr){
+	int *delimiters, i, state, len;
+	delimiters = (int*)malloc(sizeof(int) * 128);
+	memset(delimiters, 0, sizeof(int) * 128);
+	len = strlen(expr);
+	state = 0;
+	for(i=0;i<len;i++){
+		if(expr[i] == '\\'){
+			if(state){
+				delimiters[(int)expr[i]] = 1;
+				state = 0;
+			} else {
+				state = 1;
+			}
+		} else if(state){
+			switch(expr[i]){
+				case 't':
+					delimiters['\t'] = 1;
+					break;
+				case 's':
+					delimiters[' '] = 1;
+					break;
+				case 'n':
+					delimiters['\n'] = 1;
+					break;
+				case 'r':
+					delimiters['\n'] = 1;
+					break;
+				default:
+					delimiters[(int)expr[i]] = 1;
+			}
+		} else {
+			delimiters[(int)expr[i]] = 1;
+		}
+	}
+	return delimiters;
+}
+
+int fread_table(FileReader *fr){
+	VirtualString *vstr;
+	int i, ret;
+	if(fread_line(fr->line, fr) < 0) return -1;
+	if(fr->vline == NULL){
+		fr->vline = init_string(fr->line->size);
+		append_string(fr->vline, fr->line->string, fr->line->size);
+	} else {
+		clear_string(fr->vline);
+		append_string(fr->vline, fr->line->string, fr->line->size);
+	}
+	clear_vec(fr->tabs);
+	ret = split_string(fr->vline, fr->delimiter, fr->tabs);
+	for(i=1;i<ret;i++){
+		vstr = get_vec_ref(fr->tabs, i);
+		vstr->string[-1] = 0;
+	}
+	return ret;
+}
+
+int fread_fasta_adv(Sequence **seq_ptr, FileReader *fr, int fasta_flag){
+	Sequence *seq;
+	int i, n, flag;
+	if(*seq_ptr == NULL){
+		seq = (Sequence*)malloc(sizeof(Sequence));
+		seq->name.string = seq->comment.string = seq->seq.string = seq->qual.string = NULL;
+		seq->name.size = seq->comment.size = seq->seq.size = seq->qual.size = 0;
+		seq->name.capacity = seq->comment.capacity = seq->seq.capacity = seq->qual.capacity = 0;
+	} else {
+		seq = *seq_ptr;
+	}
+	flag = 0;
+	while((n = fread_line(fr->line, fr)) != -1){
+		if(n && fr->line->string[0] == '>'){
+			if(flag){
+				froll_back(fr);
+				break;
+			}
+			flag = 1;
+			seq->name.size = 0;
+			seq->comment.size = 0;
+			seq->seq.size = 0;
+			if((fasta_flag & FASTA_FLAG_NO_NAME) == 0){
+				for(i=1;i<n;i++){
+					switch(fr->line->string[i]){
+						case ' ':
+						case '\t':
+						case '\r':
+						case '\n':
+						goto BREAK_OUT;
+					}
+				}
+				BREAK_OUT:
+				append_string(&(seq->name), fr->line->string + 1, i - 1);
+				if(i + 1 < n) append_string(&(seq->comment), fr->line->string + i + 1, n - i - 1);
+			}
+		} else if(flag){
+			if((fasta_flag & FASTA_FLAG_NO_SEQ) == 0){
+				append_string(&(seq->seq), fr->line->string, n);
+			}
+			flag = 2;
+		}
+	}
+	if(flag < 2){
+		free_sequence(seq);
+		*seq_ptr = NULL;
+		clear_string(fr->line);
+		return 0;
+	} else {
+		*seq_ptr = seq;
+		return 1;
+	}
+}
+
+int fread_fastq_adv(Sequence **seq_ptr, FileReader *fr, int fastq_flag){
+	Sequence *seq;
+	int i, n, flag;
+	if(*seq_ptr == NULL){
+		seq = (Sequence*)malloc(sizeof(Sequence));
+		seq->name.string = seq->comment.string = seq->seq.string = seq->qual.string = NULL;
+		seq->name.capacity = seq->comment.capacity = seq->seq.capacity = seq->qual.capacity = 0;
+	} else {
+		seq = *seq_ptr;
+	}
+	seq->name.size = seq->comment.size = seq->seq.size = seq->qual.size = 0;
+	flag = 0;
+	while(flag != 4 && (n = fread_line(fr->line, fr)) >= 0){
+		switch(flag){
+			case 0:
+				if(fr->line->string[0] != '@') break;
+				flag = 1;
+				if(fastq_flag & FASTQ_FLAG_NO_NAME) break;
+				for(i=1;i<n;i++) if(fr->line->string[i] == ' ' || fr->line->string[i] == '\t' || fr->line->string[i] == '\n') break;
+				append_string(&seq->name, fr->line->string + 1, i - 1);
+				if(i < n) append_string(&seq->comment, fr->line->string + i + 1, n - i - 1);
+				break;
+			case 1:
+				flag = 2;
+				if(fastq_flag & FASTQ_FLAG_NO_SEQ) break;
+				append_string(&seq->seq, fr->line->string, n);
+				break;
+			case 2:
+				if(fr->line->string[0] != '+') break;
+				flag = 3;
+				break;
+			case 3:
+				flag = 4;
+				if(fastq_flag & FASTQ_FLAG_NO_QUAL) break;
+				append_string(&seq->qual, fr->line->string, n);
+				break;
+		}
+	}
+	if(flag < 4){
+		free_sequence(seq);
+		*seq_ptr = NULL;
+		clear_string(fr->line);
+		return 0;
+	} else {
+		*seq_ptr = seq;
+		return 1;
+	}
+}
+
+int guess_seq_file_type(FileReader *fr){
+	if (fr == NULL) return 0;
+	while(fread_line(fr->line, fr) != -1){
+		if(fr->line->size == 0) continue;
+		if(fr->line->string[0] == '#') continue;
+		if(fr->line->string[0] == '>'){
+			froll_back(fr);
+			return 1;
+		} else if(fr->line->string[0] == '@'){
+			froll_back(fr);
+			return 2;
+		} else {
+			froll_back(fr);
+			return 0;
+		}
+	}
+	return 0;
+}
+
+void guess_seq_file(FileReader *fr, SeqFileAttr *attr){
+	int n_seq, size;
+	Sequence *seq;
+	attr->is_fq = (guess_seq_file_type(fr) == 2);
+	attr->min_seq_len = 0x7FFFFFFF;
+	attr->max_seq_len = -1;
+	n_seq = 0;
+	size  = 0;
+	seq = NULL;
+	reset_filereader(fr);
+	while(attr->is_fq? fread_fastq(&seq, fr) : fread_fasta(&seq, fr)){
+		if(seq->seq.size > attr->max_seq_len) attr->max_seq_len = seq->seq.size;
+		if(seq->seq.size < attr->min_seq_len) attr->min_seq_len = seq->seq.size;
+		size += seq->seq.size;
+		n_seq ++;
+		if(n_seq > 10000) break;
+	}
+	if(seq) free_sequence(seq);
+	if(n_seq) attr->avg_seq_len = (size + n_seq / 2) / n_seq;
+	else attr->avg_seq_len = -1;
+	reset_filereader(fr);
+}
+
+
+char *fread_all(FileReader *fr){
+	char *text;
+	String *line, *string;
+	int num;
+	line   = init_string(81);
+	string = init_string(1023);
+	while((num = fread_line2(line, fr)) >= 0){ add_char_string(line, fr->line_breaker); };
+	text = string->string;
+	free(line->string);
+	free(line);
+	free(string);
+	return text;
+}
+
+int reset_filereader(FileReader *fr){
+	uint32_t i;
+	fr_file_t *fc;
+	for(i=0;i<vec_size(fr->files);i++){
+		fc = get_vec_ref(fr->files, i);
+		fseek(fc->file, 0, SEEK_SET);
+	}
+	fr->fidx = 0;
+	if(vec_size(fr->files)) fr->size = 0;
+	fr->ptr  = 0;
+	return 1;
+}
diff --git a/file_reader.h b/file_reader.h
new file mode 100644
index 0000000..ec18af1
--- /dev/null
+++ b/file_reader.h
@@ -0,0 +1,246 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __FILE_READER_RJ_H
+#define __FILE_READER_RJ_H
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "string.h"
+#include "vector.h"
+
+/**
+ * Sequence IO
+ */
+
+typedef struct {
+	String name;
+	String comment;
+	String seq;
+	String qual;
+} Sequence;
+
+typedef struct {
+	FILE *file;
+	char *filename;
+} fr_file_t;
+
+typedef struct {
+	Vector *files;
+	uint32_t fidx;
+	char *buffer;
+	int size;
+	int capacity;
+	int ptr;
+	int last_brk;
+	char line_breaker;
+	char delimiter;
+	String *line;
+	String *vline;
+	Vector *tabs;
+} FileReader;
+
+#define free_sequence(sequence) { if(sequence->name.string) free(sequence->name.string);\
+	if(sequence->comment.string) free(sequence->comment.string);\
+	if(sequence->seq.string) free(sequence->seq.string);\
+	if(sequence->qual.string) free(sequence->qual.string);\
+	free(sequence); }
+
+FileReader* fopen_filereader(char *filename);
+
+FileReader* fopen_filereader2(char *prefix, char *postfix);
+
+FileReader* fopen_m_filereader(int n_file, char **file_names);
+
+FileReader* stdin_filereader();
+
+/**
+ * Read characters from a copy of string
+ */
+
+FileReader* string_filereader(char *string);
+
+void fclose_filereader(FileReader *fr);
+
+int reset_filereader(FileReader *fr);
+
+int fread_line(String *line, FileReader *fr);
+int froll_back(FileReader *fr);
+
+int fread_table(FileReader *fr);
+#define get_col_vstr(fr, col) ((VirtualString*)get_vec_ref((fr)->tabs, col))
+#define get_col_str(fr, col) ((VirtualString*)get_vec_ref((fr)->tabs, col))->string
+#define get_col_len(fr, col) ((VirtualString*)get_vec_ref((fr)->tabs, col))->size
+
+typedef struct {
+	int is_fq;
+	int avg_seq_len;
+	int min_seq_len;
+	int max_seq_len;
+} SeqFileAttr;
+
+void guess_seq_file(FileReader *fr, SeqFileAttr *attr);
+int guess_seq_file_type(FileReader *fr);
+
+#define FASTA_FLAG_NORMAL		0
+#define FASTA_FLAG_NO_NAME		1
+#define FASTA_FLAG_NO_SEQ		2
+
+int fread_fasta_adv(Sequence **seq, FileReader *fr, int flag);
+
+#define fread_fasta(seq, fr) fread_fasta_adv(seq, fr, FASTA_FLAG_NORMAL)
+
+#define FASTQ_FLAG_NORMAL		0
+#define FASTQ_FLAG_NO_NAME		1
+#define FASTQ_FLAG_NO_SEQ		2
+#define FASTQ_FLAG_NO_QUAL		4
+
+int fread_fastq_adv(Sequence **seq, FileReader *fr, int flag);
+
+#define fread_fastq(seq, fr) fread_fastq_adv(seq, fr, FASTQ_FLAG_NORMAL)
+
+char * fread_all(FileReader *fr);
+
+static inline void print_pretty_seq(FILE *out, String *seq, int line_width){
+	char c;
+	int i, j;
+	i = 0;
+	while(i < seq->size){
+		j = i + line_width;
+		if(j > seq->size) j = seq->size;
+		c  = seq->string[j];
+		seq->string[j] = '\0';
+		fprintf(out, "%s\n", seq->string + i);
+		seq->string[j] = c;
+		i = j;
+	}
+}
+
+static inline FILE* open_file_for_read(char *name, char *suffix){
+	char *full_name;
+	FILE *file;
+	if(suffix == NULL){
+		full_name = name;
+	} else {
+		full_name = (char*)alloca(strlen(name) + strlen(suffix) + 1);
+		memcpy(full_name, name, strlen(name));
+		memcpy(full_name + strlen(name), suffix, strlen(suffix) + 1);
+	}
+	file = fopen(full_name, "r");
+	if(file == NULL) fprintf(stderr, "Cannot open file: %s\n", full_name);
+	return file;
+}
+
+static inline FILE* open_file_for_write(char *name, char *suffix){
+	char *full_name;
+	FILE *file;
+	if(suffix == NULL){
+		full_name = name;
+	} else {
+		full_name = (char*)alloca(strlen(name) + strlen(suffix) + 1);
+		memcpy(full_name, name, strlen(name));
+		memcpy(full_name + strlen(name), suffix, strlen(suffix) + 1);
+	}
+	file = fopen(full_name, "w+");
+	if(file == NULL) fprintf(stderr, "Cannot open file: %s\n", full_name);
+	return file;
+}
+
+static inline FILE* open_file_for_append(char *name, char *suffix){
+	char *full_name;
+	FILE *file;
+	if(suffix == NULL){
+		full_name = name;
+	} else {
+		full_name = (char*)alloca(strlen(name) + strlen(suffix) + 1);
+		memcpy(full_name, name, strlen(name));
+		memcpy(full_name + strlen(name), suffix, strlen(suffix) + 1);
+	}
+	file = fopen(full_name, "a+");
+	if(file == NULL) fprintf(stderr, "Cannot open file: %s\n", full_name);
+	return file;
+}
+
+typedef struct {
+	FILE *file;
+	void *buffer;
+	int buf_off, buf_size, buf_cap;
+} BufferedInputFile;
+
+static inline BufferedInputFile* init_bif(FILE *file, int buf_size){
+	BufferedInputFile *bif;
+	bif = malloc(sizeof(BufferedInputFile));
+	bif->file = file;
+	bif->buf_off = bif->buf_size = 0;
+	bif->buf_cap = buf_size;
+	bif->buffer = malloc(buf_size);
+	return bif;
+}
+
+static inline BufferedInputFile* open_bif(char *filename){
+	FILE *file;
+	if((file = fopen(filename, "r+")) == NULL){
+		return NULL;
+	}
+	return init_bif(file, 1024);
+}
+
+static inline BufferedInputFile* open_bif2(char *filename, char *suffix){
+	FILE *file;
+	char *name;
+	name = alloca(strlen(filename) + strlen(suffix) + 1);
+	strcpy(name, filename);
+	strcat(name, suffix);
+	if((file = fopen(name, "r+")) == NULL){
+		return NULL;
+	}
+	return init_bif(file, 1024);
+}
+
+static inline int64_t read_bif(BufferedInputFile *bif, void *data, int64_t size){
+	int64_t i, t, ori_size;
+	ori_size = size;
+	while(size){
+		if(bif->buf_size - bif->buf_off >= size){
+			for(i=0;i<size;i++) ((unsigned char*)data)[i] = *((unsigned char*)bif->buffer + bif->buf_off + i);
+			bif->buf_off += size;
+			size = 0;
+			break;
+		} else if(bif->buf_off < bif->buf_size){
+			t = bif->buf_size - bif->buf_off;
+			for(i=0;i<t;i++) ((unsigned char*)data)[i] = *((unsigned char*)bif->buffer + bif->buf_off + i);
+			data += t;
+			size -= t;
+			bif->buf_off = bif->buf_size;
+		} else {
+			bif->buf_size = fread(bif->buffer, 1, bif->buf_cap, bif->file);
+			bif->buf_off = 0;
+			if(bif->buf_size == 0) break;
+		}
+	}
+	return ori_size - size;
+}
+
+static inline void close_bif(BufferedInputFile *bif){
+	fclose(bif->file);
+	free(bif->buffer);
+	free(bif);
+}
+
+#endif
diff --git a/hashset.h b/hashset.h
new file mode 100644
index 0000000..ae664ea
--- /dev/null
+++ b/hashset.h
@@ -0,0 +1,513 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __HASH_SET_RJ
+#define __HASH_SET_RJ
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+
+static const uint64_t sys_prime_list[61] = {
+	0x7LLU, 0xfLLU, 0x1fLLU, 0x43LLU, 0x89LLU,
+	0x115LLU, 0x22dLLU, 0x45dLLU, 0x8bdLLU, 0x1181LLU,
+	0x2303LLU, 0x4609LLU, 0x8c17LLU, 0x1183dLLU, 0x2307bLLU,
+	0x460fdLLU, 0x8c201LLU, 0x118411LLU, 0x230833LLU, 0x461069LLU,
+	0x8c20e1LLU, 0x11841cbLLU, 0x2308397LLU, 0x461075bLLU, 0x8c20ecbLLU,
+	0x11841da5LLU, 0x23083b61LLU, 0x461076c7LLU, 0x8c20ed91LLU, 0x11841db31LLU,
+	0x23083b673LLU, 0x461076d1bLLU, 0x8c20eda41LLU, 0x11841db48dLLU, 0x23083b6937LLU,
+	0x461076d27fLLU, 0x8c20eda50dLLU, 0x11841db4a59LLU, 0x23083b694ebLLU, 0x461076d29f1LLU,
+	0x8c20eda5441LLU, 0x11841db4a887LLU, 0x23083b69511fLLU, 0x461076d2a2c1LLU, 0x8c20eda54591LLU,
+	0x11841db4a8b55LLU, 0x23083b69516c1LLU, 0x461076d2a2da5LLU, 0x8c20eda545b55LLU, 0x11841db4a8b6b5LLU,
+	0x23083b69516d91LLU, 0x461076d2a2db3bLLU, 0x8c20eda545b69dLLU, 0x11841db4a8b6d5dLLU, 0x23083b69516daf5LLU,
+	0x461076d2a2db5edLLU, 0x8c20eda545b6c5fLLU, 0x11841db4a8b6d8ebLLU, 0x23083b69516db1ffLLU, 0x461076d2a2db643fLLU,
+	0x8c20eda545b6c8f3LLU
+};
+
+static inline uint64_t _rj_hashset_find_prime(uint64_t n){
+	uint32_t i;
+	i = 0;
+	while(i < 60 && n > sys_prime_list[i]) i ++;
+	return sys_prime_list[i];
+}
+
+#ifndef HASH_FLAG_MACROS
+#define HASH_FLAG_MACROS
+#define is_entity_null(flags, idx)    ((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x01)
+#define is_entity_del(flags, idx)     ((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x02)
+#define exists_entity(flags, idx)     (!((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x03))
+#define set_entity_null(flags, idx)   ((flags)[(idx)>>4] |= (0x01u<<(((idx)&0x0f)<<1)))
+#define set_entity_del(flags, idx)    ((flags)[(idx)>>4] |= (0x02u<<(((idx)&0x0f)<<1)))
+#define clear_entity_null(flags, idx) ((flags)[(idx)>>4] &= ~(0x01u<<(((idx)&0x0f)<<1)))
+#define clear_entity_del(flags, idx)  ((flags)[(idx)>>4] &= ~(0x02u<<(((idx)&0x0f)<<1)))
+#endif
+
+#define init_hashset_macro(hash_type, hash_key_type) \
+typedef struct { hash_key_type *array;  uint32_t *flags; size_t e_size; size_t ocp; size_t size; size_t count; size_t max; float load_factor; size_t iter_ptr; } hash_type; \
+static inline int hash_type##_is_prime(uint64_t num){                          \
+	uint64_t i, max;                                                           \
+	if(num < 4) return 1;                                                      \
+	if(num % 2 == 0) return 0;                                                 \
+	max = (uint64_t)sqrt((double)num);                                         \
+	for(i=3;i<max;i+=2){ if(num % i == 0) return 0; }                          \
+	return 1;                                                                  \
+}                                                                              \
+static inline uint64_t hash_type##_find_next_prime(uint64_t num){              \
+	if(num % 2 == 0) num ++;                                                   \
+	while(1){ if(hash_type##_is_prime(num)) return num; num += 2; }            \
+}                                                                              \
+static inline hash_type* init2_##hash_type(uint32_t size, float factor){       \
+	hash_type *set;                                                            \
+	set = (hash_type*)malloc(sizeof(hash_type));                               \
+	set->e_size = sizeof(hash_key_type);                                       \
+	set->size   = _rj_hashset_find_prime(size);                                \
+	set->count  = 0;                                                           \
+	set->ocp    = 0;                                                           \
+	set->load_factor = factor;                                                 \
+	set->max    = set->size * set->load_factor;                                \
+	set->iter_ptr    = 0;                                                      \
+	set->array       = calloc(set->size, set->e_size);                         \
+	set->flags       = malloc((set->size + 15)/16 * 4);                        \
+	memset(set->flags, 0x55, (set->size + 15) / 16 * 4);                       \
+	return set;                                                                \
+}                                                                              \
+static inline hash_type* init_##hash_type(uint32_t size){ return init2_##hash_type(size, 0.67f); }
+
+#define get_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
+static inline hash_key_type* get_##hash_type(hash_type *set, hash_key_type key){\
+	hash_key_type *e;                                                          \
+	uint32_t flag;                                                             \
+	size_t hc;                                                                 \
+	hc = hash_code_macro(key) % set->size;                                     \
+	while(1){                                                                  \
+		flag = (set->flags[hc >> 4] >> (((hc) & 0x0f) << 1)) & 0x03;           \
+		if(flag & 0x01){                                                       \
+			return NULL;                                                       \
+		} else if(flag & 0x02){                                                \
+		} else {                                                               \
+			e = ((hash_key_type*)set->array) + hc;                             \
+			if(hash_equal_macro(*e, key)) return e;                            \
+		}                                                                      \
+		if(hc + 1 == set->size) hc = 0; else hc ++;                            \
+	}                                                                          \
+	return NULL;                                                               \
+}                                                                              \
+static inline size_t offset_##hash_type(hash_type *set, hash_key_type *ptr){   \
+	return ptr - set->array;                                                   \
+}
+
+#define prepare_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
+static inline void encap_##hash_type(hash_type *set, size_t num);              \
+static inline hash_key_type* prepare_##hash_type(hash_type *set, hash_key_type key, int *exists){\
+	hash_key_type *e;                                                          \
+	size_t hc, d;                                                              \
+	encap_##hash_type(set, 1);                                                 \
+	hc = hash_code_macro((key)) % set->size;                                   \
+	d = set->size;                                                             \
+	while(1){                                                                  \
+		if(is_entity_null(set->flags, hc)){                                    \
+			if(d == set->size){                                                \
+				clear_entity_null(set->flags, hc);                             \
+				set->ocp ++;                                                   \
+			} else {                                                           \
+				hc = d;                                                        \
+				clear_entity_del(set->flags, hc);                              \
+			}                                                                  \
+			*exists = 0;                                                       \
+			set->count ++;                                                     \
+			e = ((hash_key_type*)set->array) + hc;                             \
+			return e;                                                          \
+		} else if(is_entity_del(set->flags, hc)){                              \
+			if(d == set->size) d = hc;                                         \
+		} else {                                                               \
+			e = ((hash_key_type*)set->array) + hc;                             \
+			if(hash_equal_macro((*e), (key))){                                \
+				*exists = 1;                                                   \
+				return e;                                                      \
+			}                                                                  \
+		}                                                                      \
+		hc ++;                                                                 \
+		hc %= set->size;                                                       \
+	}                                                                          \
+	return NULL;                                                               \
+}
+
+#define exists_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
+static inline int exists_##hash_type(hash_type *set, hash_key_type key){       \
+	hash_key_type *e;                                                          \
+	size_t hc;                                                                 \
+	hc = hash_code_macro(key) % set->size;                                     \
+	while(1){                                                                  \
+		if(is_entity_null(set->flags, hc)){                                    \
+			return 0;                                                          \
+		} else if(is_entity_del(set->flags, hc)){                              \
+		} else {                                                               \
+			e = ((hash_key_type*)set->array) + hc;                             \
+			if(hash_equal_macro(*e, key)) return 1;                            \
+		}                                                                      \
+		hc ++;                                                                 \
+		hc %= set->size;                                                       \
+	}                                                                          \
+	return 0;                                                                  \
+}
+
+#define add_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
+static inline hash_key_type* add_##hash_type(hash_type *set, hash_key_type key){       \
+	hash_key_type *e;                                                          \
+	size_t d, hc;                                                              \
+	hc = hash_code_macro(key) % set->size;                                     \
+	d  = set->size;                                                            \
+	do{                                                                        \
+		if(is_entity_null(set->flags, hc)){                                    \
+			if(d == set->size){                                                \
+				d = hc;                                                        \
+				clear_entity_null(set->flags, d);                              \
+				set->ocp ++;                                                   \
+			} else {                                                           \
+				clear_entity_del(set->flags, d);                               \
+			}                                                                  \
+			e = ((hash_key_type*)set->array) + d;                              \
+			*e = key;                                                          \
+			set->count ++;                                                     \
+			return e;                                                          \
+		} else if(is_entity_del(set->flags, hc)){                              \
+			if(d == set->size) d = hc;                                         \
+		} else {                                                               \
+			e = ((hash_key_type*)set->array) + hc;                             \
+			if(hash_equal_macro(*e, key)){                                     \
+				return e;                                                      \
+			}                                                                  \
+		}                                                                      \
+		if(hc + 1 == set->size) hc = 0;                                        \
+		else hc = hc + 1;                                                      \
+	} while(1);                                                                \
+	return NULL;                                                                  \
+}
+
+#define put_hashset_macro(hash_type, hash_key_type) \
+static inline hash_key_type* put_##hash_type(hash_type *set, hash_key_type key){         \
+	encap_##hash_type(set, 1);                                                 \
+	return add_##hash_type(set, key);                                          \
+}
+
+#define remove_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro) \
+static inline void delete_##hash_type(hash_type *set, hash_key_type *key){ set_entity_del(set->flags, key - set->array); set->count --; }   \
+static inline int remove_##hash_type(hash_type *set, hash_key_type key){       \
+	hash_key_type *e;                                                          \
+	size_t hc;                                                                 \
+	hc = hash_code_macro(key) % set->size;                                     \
+	while(1){                                                                  \
+		if(is_entity_null(set->flags, hc)){                                    \
+			return 0;                                                          \
+		} else if(is_entity_del(set->flags, hc)){                              \
+		} else {                                                               \
+			e = ((hash_key_type*)set->array) + hc;                             \
+			if(hash_equal_macro(*e, key)){                                     \
+				set->count --;                                                 \
+				set_entity_del(set->flags, hc);                                \
+				return 1;                                                      \
+			}                                                                  \
+		}                                                                      \
+		hc ++;                                                                 \
+		hc %= set->size;                                                       \
+	}                                                                          \
+	return 0;                                                                  \
+}
+
+#define reset_iter_hashset_macro(hash_type) static inline void reset_iter_##hash_type(hash_type *set){ set->iter_ptr = 0; }
+
+#define iter_hashset_macro(hash_type, hash_key_type) \
+static inline int iter_##hash_type(hash_type *set, hash_key_type *ret){        \
+	if(set->iter_ptr >= set->size) return 0;                                   \
+	while(set->iter_ptr < set->size){                                          \
+		if(exists_entity(set->flags, set->iter_ptr)){                          \
+			*ret = *(((hash_key_type*)set->array) + set->iter_ptr);            \
+			set->iter_ptr ++;                                                  \
+			return 1;                                                          \
+		}                                                                      \
+		set->iter_ptr ++;                                                      \
+	}                                                                          \
+	return 0;                                                                  \
+}
+
+#define ref_iter_hashset_macro(hash_type, hash_key_type) \
+static inline hash_key_type* ref_iter_##hash_type(hash_type *set){             \
+	if(set->iter_ptr >= set->size) return NULL;                                \
+	while(set->iter_ptr < set->size){                                          \
+		if(exists_entity(set->flags, set->iter_ptr)){                          \
+			return (((hash_key_type*)set->array) + set->iter_ptr++);           \
+		}                                                                      \
+		set->iter_ptr ++;                                                      \
+	}                                                                          \
+	return NULL;                                                               \
+}
+
+#define count_hashset_macro(hash_type) static inline int64_t count_##hash_type(hash_type *set){ return set->count; }
+
+#define clear_hashset_macro(hash_type) \
+static inline void clear_##hash_type(hash_type *set){                          \
+	if(set->ocp == 0) return;                                                  \
+	memset(set->flags, 0x55, (set->size + 15) / 16 * 4);                       \
+	set->count = 0;                                                            \
+	set->ocp   = 0;                                                            \
+	set->iter_ptr = 0;                                                         \
+}
+
+#define ffwrite(ptr, e_size, size, file) (e_size * fwrite(ptr, e_size, size, file))
+#define ffread(ptr, e_size, size, file) (e_size * fread(ptr, e_size, size, file))
+
+#define dump_hashset_macro(hash_type) \
+static inline size_t sizeof_##hash_type(hash_type *set){                       \
+	return sizeof(size_t) * 3 + sizeof(float) + set->e_size * set->size        \
+				+ sizeof(uint32_t) * ((set->size + 15) / 16);                  \
+}                                                                              \
+static inline size_t dump_##hash_type(hash_type *set, FILE *out){              \
+	size_t n;                                                          \
+	n =  ffwrite(&set->e_size, sizeof(size_t), 1, out);                        \
+	n += ffwrite(&set->size, sizeof(size_t), 1, out);                          \
+	n += ffwrite(&set->count, sizeof(size_t), 1, out);                         \
+	n += ffwrite(&set->load_factor, sizeof(float), 1, out);                    \
+	n += ffwrite(set->array, set->e_size, set->size, out);	\
+	n += ffwrite(set->flags, sizeof(uint32_t), (set->size + 15) / 16, out);	\
+	return n;                                                                  \
+}
+
+#define load_hashset_macro(hash_type) \
+static inline hash_type* load_##hash_type(FILE *in){                           \
+	hash_type *set;                                                            \
+	size_t n;                                                                  \
+	set = (hash_type*)malloc(sizeof(hash_type));                               \
+	n =  ffread(&set->e_size, sizeof(size_t), 1, in);                          \
+	n += ffread(&set->size, sizeof(size_t), 1, in);                            \
+	n += ffread(&set->count, sizeof(size_t), 1, in);                           \
+	n += ffread(&set->load_factor, sizeof(float), 1, in);                      \
+	set->max   = set->size * set->load_factor;                                 \
+	set->array = malloc(set->size * set->e_size);                              \
+	n += ffread(set->array, set->e_size, set->size, in);                       \
+	set->flags = (uint32_t*)malloc((set->size + 15) / 16 * 4);                 \
+	n += ffread(set->flags, sizeof(uint32_t), (set->size + 15) / 16, in);      \
+	return set;                                                                \
+}
+
+#define free_hashset_macro(hash_type) \
+static inline void free_##hash_type(hash_type *set){                           \
+	free(set->array);                                                          \
+	free(set->flags);                                                          \
+	free(set);                                                                 \
+}
+
+#define encap_hashset_macro(hash_type, hash_key_type, hash_code_macro) \
+static inline void encap_##hash_type(hash_type *set, size_t num){             \
+	uint32_t *flags, *f;                                                      \
+	uint64_t i, n, size, hc;                                                  \
+	hash_key_type key;                                                        \
+	hash_key_type tmp;                                                        \
+	if(set->ocp + num <= set->max) return;                                  \
+	n = set->size;                                                            \
+	do{ n = _rj_hashset_find_prime(n * 2); } while(n * set->load_factor < set->count + num);    \
+	set->array = realloc(set->array, n * set->e_size);                        \
+	if(set->array == NULL){                                                   \
+		fprintf(stderr, "-- Out of memory --\n");                             \
+		abort();                                                              \
+	}                                                                         \
+	flags = malloc((n+15)/16 * 4);                                            \
+	memset(flags, 0x55, (n+15)/16 * 4);                                       \
+	size = set->size;                                                         \
+	set->size = n;                                                            \
+	set->ocp  = set->count;                                                   \
+	set->max = n * set->load_factor;                                          \
+	f = set->flags;                                                           \
+	set->flags = flags;                                                       \
+	flags = f;                                                                \
+	for(i=0;i<size;i++){                                                      \
+		if(!exists_entity(flags, i)) continue;                                \
+		key = ((hash_key_type*)set->array)[i];                                \
+		set_entity_del(flags, i);                                             \
+		while(1){                                                             \
+			hc = hash_code_macro(key) % set->size;                            \
+			while(!is_entity_null(set->flags, hc)){ hc = (hc + 1) % set->size; }        \
+			clear_entity_null(set->flags, hc);                                \
+			if(hc < size && exists_entity(flags, hc)){                        \
+				tmp = key;                                                    \
+				key = ((hash_key_type*)set->array)[hc];                       \
+				((hash_key_type*)set->array)[hc] = tmp;                       \
+				set_entity_del(flags, hc);                                    \
+			} else {                                                          \
+				((hash_key_type*)set->array)[hc] = key;                       \
+				break;                                                        \
+			}                                                                 \
+		}                                                                     \
+	}                                                                         \
+	free(flags);                                                              \
+}
+
+
+
+// ---------------------- Define your own hashset ----------------------------------
+// Example: 
+// typedef struct { int group; int user; } Info;
+// #define my_hashcode(val) (val)->group
+// #define my_hashequal(v1, v2) (((v1)->group == (v2)->group) && ((v1)->user == (v2)->user))
+// define_hashset(myhash, Info, my_hashcode, my_hashequal);
+
+#define define_hashset(hash_type, hash_key_type, hash_code_macro, hash_equal_macro)    \
+	init_hashset_macro(hash_type, hash_key_type);                              \
+	get_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro);    \
+	prepare_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro);\
+	exists_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro); \
+	add_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro);    \
+	put_hashset_macro(hash_type, hash_key_type);                               \
+	remove_hashset_macro(hash_type, hash_key_type, hash_code_macro, hash_equal_macro); \
+	iter_hashset_macro(hash_type, hash_key_type);                              \
+	ref_iter_hashset_macro(hash_type, hash_key_type);                          \
+	reset_iter_hashset_macro(hash_type);                                       \
+	count_hashset_macro(hash_type);                                            \
+	clear_hashset_macro(hash_type);                                            \
+	dump_hashset_macro(hash_type);                                             \
+	load_hashset_macro(hash_type);                                             \
+	free_hashset_macro(hash_type);                                             \
+	encap_hashset_macro(hash_type, hash_key_type, hash_code_macro);
+
+/* ------------------ Useful functions ------------------------------------- */
+
+static inline uint32_t __lh3_Jenkins_hash_int(uint32_t key){
+	key += (key << 12);
+	key ^= (key >> 22);
+	key += (key << 4);
+	key ^= (key >> 9);
+	key += (key << 10);
+	key ^= (key >> 2);
+	key += (key << 7);
+	key ^= (key >> 12);
+	return key;
+}
+
+static inline uint64_t __lh3_Jenkins_hash_64(uint64_t key){
+	key += ~(key << 32);
+	key ^= (key >> 22);
+	key += ~(key << 13);
+	key ^= (key >> 8);
+	key += (key << 3);
+	key ^= (key >> 15);
+	key += ~(key << 27);
+	key ^= (key >> 31);
+	return key;
+}
+
+static inline uint32_t jenkins_one_at_a_time_hash(char *key, size_t len){
+	uint32_t hash, i;
+	for(hash = i = 0; i < len; ++i){
+		hash += key[i];
+		hash += (hash << 10);
+		hash ^= (hash >> 6);
+	}
+	hash += (hash << 3);
+	hash ^= (hash >> 11);
+	hash += (hash << 15);
+	return hash;
+}
+
+static inline uint64_t hash64shift(uint64_t key){
+	key = (~key) + (key << 21); // key = (key << 21) - key - 1;
+	key = key ^ (key >> 24);
+	key = (key + (key << 3)) + (key << 8); // key * 265
+	key = key ^ (key >> 14);
+	key = (key + (key << 2)) + (key << 4); // key * 21
+	key = key ^ (key >> 28);
+	key = key + (key << 31);
+	return key;
+}
+
+
+static inline uint64_t MurmurHash64A(const void * key, int len, uint32_t seed){
+	const uint64_t m = 0xc6a4a7935bd1e995LLU;
+	const int r = 47;
+
+	uint64_t h = seed ^ (len * m);
+
+	const uint64_t * data = (const uint64_t *)key;
+	const uint64_t * end = data + (len/8);
+
+	while(data != end){
+		uint64_t k = *data++;
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h ^= k;
+		h *= m;
+	}
+
+	const unsigned char * data2 = (const unsigned char*)data;
+
+	switch(len & 7){
+	case 7: h ^= ((uint64_t)data2[6]) << 48;
+	case 6: h ^= ((uint64_t)data2[5]) << 40;
+	case 5: h ^= ((uint64_t)data2[4]) << 32;
+	case 4: h ^= ((uint64_t)data2[3]) << 24;
+	case 3: h ^= ((uint64_t)data2[2]) << 16;
+	case 2: h ^= ((uint64_t)data2[1]) << 8;
+	case 1: h ^= ((uint64_t)data2[0]);
+	        h *= m;
+	};
+
+	h ^= h >> r;
+	h *= m;
+	h ^= h >> r;
+
+	return h;
+}
+
+#define u32hashcode(key) __lh3_Jenkins_hash_int(key)
+#define u64hashcode(key) __lh3_Jenkins_hash_64(key)
+
+static inline uint32_t __string_hashcode(const char *s){
+	uint32_t h = *s;
+	if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
+	return h;
+}
+
+#define u32hash_code(e) u32hashcode(e)
+#define u64hash_code(e) u64hashcode(e)
+#define uxxhash_equals(e1, e2) ((e1) == (e2))
+define_hashset(u32hash, uint32_t, u32hash_code, uxxhash_equals);
+define_hashset(u64hash, uint64_t, u64hash_code, uxxhash_equals);
+
+#define i32hash_code(e) u32hashcode((uint32_t)(e))
+#define i32hash_equals(e1, e2) ((e1) == (e2))
+define_hashset(i32hash, int, i32hash_code, i32hash_equals);
+
+#define chash_code(e) __string_hashcode(e)
+#define chash_equals(e1, e2) (strcmp(e1, e2) == 0)
+define_hashset(chash, char*, chash_code, chash_equals);
+
+typedef struct { uint32_t key, val; } uuhash_t;
+#define uuhash_code(e) (e).key
+#define uuhash_equals(e1, e2) ((e1).key == (e2).key)
+define_hashset(uuhash, uuhash_t, uuhash_code, uuhash_equals);
+
+typedef struct { char *key; uint32_t val; } cuhash_t;
+#define cuhash_code(e) __string_hashcode((e).key)
+#define cuhash_equals(e1, e2) (strcmp((e1).key, (e2).key) == 0)
+define_hashset(cuhash, cuhash_t, cuhash_code, cuhash_equals);
+
+#endif
diff --git a/heap.h b/heap.h
new file mode 100644
index 0000000..d00012e
--- /dev/null
+++ b/heap.h
@@ -0,0 +1,95 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __HEAP_RJ_H
+#define __HEAP_RJ_H
+
+#include "list.h"
+
+define_list(rjheapv, void*);
+
+typedef int (*heap_comp_func)(const void *e1, const void *e2, void *ref);
+
+typedef struct {
+	rjheapv *ptrs;
+	void *ref;
+	heap_comp_func cmp;
+} Heap;
+
+static inline Heap* init_heap(heap_comp_func cmp, void *ref){
+	Heap *heap;
+	heap = malloc(sizeof(Heap));
+	heap->ptrs = init_rjheapv(8);
+	heap->cmp  = cmp;
+	heap->ref  = ref;
+	return heap;
+}
+
+static inline void free_heap(Heap *heap){ free_rjheapv(heap->ptrs); free(heap); }
+
+static inline void clear_heap(Heap *heap){ clear_rjheapv(heap->ptrs); }
+
+static inline void push_heap(Heap *heap, void *p){
+	void *pp;
+	size_t i;
+	i = count_rjheapv(heap->ptrs);
+	push_rjheapv(heap->ptrs, p);
+	while(i && heap->cmp(get_rjheapv(heap->ptrs, i), get_rjheapv(heap->ptrs, (i - 1) >> 1), heap->ref) < 0){
+		pp = get_rjheapv(heap->ptrs, i);
+		set_rjheapv(heap->ptrs, i, get_rjheapv(heap->ptrs, (i - 1) >> 1));
+		set_rjheapv(heap->ptrs, (i - 1) >> 1, pp);
+		i = (i - 1) >> 1;
+	}
+}
+
+static inline size_t count_heap(Heap *heap){ return count_rjheapv(heap->ptrs); }
+
+static inline void* peer_heap(Heap *heap){ return (count_rjheapv(heap->ptrs)? get_rjheapv(heap->ptrs, 0) : NULL );}
+
+static inline void remove_heap(Heap *heap, size_t idx){
+	void *pp;
+	size_t swap;
+	set_rjheapv(heap->ptrs, idx, get_rjheapv(heap->ptrs, count_rjheapv(heap->ptrs) - 1));
+	trunc_rjheapv(heap->ptrs, 1);
+	while((idx << 1) + 1 < count_rjheapv(heap->ptrs)){
+		swap = idx;
+		if(heap->cmp((const void*)get_rjheapv(heap->ptrs, swap), (const void*)get_rjheapv(heap->ptrs, (idx << 1) + 1), heap->ref) > 0){
+			swap = (idx << 1) + 1;
+		}
+		if((idx << 1) + 2 < count_rjheapv(heap->ptrs) && heap->cmp((const void*)get_rjheapv(heap->ptrs, swap), (const void*)get_rjheapv(heap->ptrs, (idx << 1) + 2), heap->ref) > 0){
+			swap = (idx << 1) + 2;
+		}
+		if(swap == idx) break;
+		pp = get_rjheapv(heap->ptrs, idx);
+		set_rjheapv(heap->ptrs,  idx, get_rjheapv(heap->ptrs, swap));
+		set_rjheapv(heap->ptrs, swap, pp);
+		idx = swap;
+	}
+}
+
+static inline void* pop_heap(Heap *heap){
+	void *p;
+	if(count_rjheapv(heap->ptrs)){
+		p = get_rjheapv(heap->ptrs, 0);
+		remove_heap(heap, 0);
+		return p;
+	} else return NULL;
+}
+
+#endif
diff --git a/list.h b/list.h
new file mode 100644
index 0000000..2cbefce
--- /dev/null
+++ b/list.h
@@ -0,0 +1,234 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __LIST_RJ_H
+#define __LIST_RJ_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include "sort.h"
+
+/**
+ * * Common staic functions
+ * */
+
+#define num_min(n1, n2) (((n1) < (n2))? (n1) : (n2))
+#define num_max(n1, n2) (((n1) > (n2))? (n1) : (n2))
+/**
+ * List
+ */
+
+#define define_list_core(list_type, e_type, size_type, inc_size)	\
+	\
+typedef struct { e_type* buffer; size_type size; size_type cap; } list_type;	\
+	\
+static inline list_type* init_##list_type(size_type init_size){	\
+	if(init_size == 0) init_size = 2;	\
+	list_type *list = (list_type*)malloc(sizeof(list_type));	\
+	list->size = 0;	\
+	list->cap  = init_size;	\
+	list->buffer = (e_type*)malloc(sizeof(e_type) * list->cap);	\
+	return list;	\
+}	\
+	\
+static inline void list_type##_init(list_type *list, size_type init_size){	\
+	if(init_size == 0) init_size = 2;	\
+	list->size = 0;	\
+	list->cap  = init_size;	\
+	list->buffer = (e_type*)malloc(sizeof(e_type) * list->cap);	\
+}	\
+	\
+static inline size_type count_##list_type(list_type *list){ return list->size; }	\
+	\
+static inline void clear_##list_type(list_type *list){ list->size = 0; }	\
+	\
+static inline void encap_##list_type(list_type *list, size_type n){	\
+	if(list->size + n <= list->cap) return;	\
+	if(list->size + n < list->size){	\
+		fprintf(stderr, " -- elements size exceed %s's data type %s in %s -- %s:%d --\n", #list_type, #size_type, __FUNCTION__, __FILE__, __LINE__);	\
+		fflush(stderr);	\
+		abort();	\
+	}	\
+	while(list->size + n > list->cap){	\
+		if(list->cap < inc_size){	\
+			list->cap <<= 1;	\
+		} else {	\
+			list->cap += inc_size;	\
+		}	\
+	}	\
+	list->buffer = realloc(list->buffer, list->cap * sizeof(e_type));	\
+}	\
+	\
+static inline void trunc_##list_type(list_type *list, size_type size){	\
+	if(size > count_##list_type(list)) size = count_##list_type(list);	\
+	list->size -= size;	\
+}	\
+	\
+static inline void set_##list_type##_size(list_type *list, size_type size){ list->size = size; }	\
+	\
+static inline void incre_##list_type(list_type *list, size_type size){	\
+	if(size + list->size > list->cap) list->size = list->cap;	\
+	else list->size += size;	\
+}	\
+	\
+static inline void push_##list_type(list_type *list, e_type e){	\
+	encap_##list_type(list, 1);	\
+	list->buffer[list->size++] = e;	\
+}	\
+	\
+static inline int pop_##list_type(list_type *list, e_type*e){	\
+	if(count_##list_type(list)){	\
+		list->size --;	\
+		*e = list->buffer[list->size];	\
+		return 1;	\
+	} else return 0;	\
+}	\
+	\
+static inline void insert_##list_type(list_type *list, size_type idx, e_type e){	\
+	if(idx > list->size) return;	\
+	encap_##list_type(list, 1);	\
+	if(idx == list->size){	\
+		list->buffer[list->size] = e;	\
+	} else {	\
+		memmove(list->buffer + idx + 1, list->buffer + idx, (list->size - idx) * sizeof(e_type));	\
+		list->buffer[idx] = e;	\
+	}	\
+	list->size ++;	\
+}	\
+	\
+static inline void remove_##list_type(list_type *list, size_type idx){	\
+	if(idx >= list->size) return;	\
+	if(idx + 1 < list->size){	\
+		memmove(list->buffer + idx, list->buffer + idx + 1, (list->size - idx - 1) * sizeof(e_type));	\
+	}	\
+	list->size --;	\
+}	\
+	\
+static inline void set_##list_type(list_type *list, size_type idx, e_type e){ list->buffer[idx] = e; }	\
+	\
+static inline e_type get_##list_type(list_type *list, size_type idx){ return list->buffer[idx]; }	\
+	\
+static inline e_type* ref_##list_type(list_type *list, size_type idx){ return list->buffer + idx; }	\
+	\
+static inline e_type* next_ref_##list_type(list_type *list){ encap_##list_type(list, 1); list->size ++; return list->buffer + list->size - 1; }	\
+	\
+static inline e_type* ref_next_##list_type(list_type *list){ list->size ++; return list->buffer + list->size - 1; }	\
+	\
+static inline e_type* as_array_##list_type(list_type *list){ return list->buffer; }	\
+	\
+static inline void reverse_##list_type(list_type *list){	\
+	size_type i, j;	\
+	e_type t;	\
+	if(count_##list_type(list) == 0) return;	\
+	i = 0;	\
+	j = count_##list_type(list) - 1;	\
+	while(i < j){	\
+		t = get_##list_type(list, i);	\
+		set_##list_type(list, i, get_##list_type(list, j));	\
+		set_##list_type(list, j, t);	\
+		i ++;	\
+		j --;	\
+	}	\
+}	\
+	\
+static inline void append_##list_type(list_type *list1, list_type *list2){	\
+	encap_##list_type(list1, count_##list_type(list2));	\
+	memcpy(list1->buffer + list1->size, list2->buffer, sizeof(e_type) * list2->size);	\
+	list1->size += list2->size;	\
+}	\
+	\
+static inline size_type dump_##list_type(list_type *list, FILE *out){	\
+	return fwrite(list->buffer, sizeof(e_type), count_##list_type(list), out);	\
+}	\
+	\
+static inline void free_##list_type(list_type *list){ free(list->buffer); free(list); }	\
+	\
+static inline void list_type##_free(list_type *list){ free(list->buffer); list->buffer = NULL; }	\
+
+#define define_list_ext(list_type, e_type, size_type, cmp_func)	\
+static inline size_type delete_##list_type(list_type *list, e_type e){	\
+	size_type i, ret;	\
+	ret = 0;	\
+	for(i=list->size;i>0;i--){	\
+		if(cmp_func(list->buffer[i-1], e, NULL) == 0){	\
+			if(i < list->size){	\
+				memmove(list->buffer + i - 1, list->buffer + i, (list->size - i) * sizeof(e_type));	\
+			}	\
+			list->size --;	\
+			ret ++;	\
+		}	\
+	}	\
+	return ret;	\
+}	\
+	\
+static inline size_type occ_##list_type(list_type *list, e_type e){	\
+	size_type i, n;	\
+	for(i=0,n=0;i<list->size;i++){	\
+		if(cmp_func(list->buffer[i], e, NULL) == 0) n++;	\
+	}	\
+	return n;	\
+}	\
+	\
+static inline size_type replace_##list_type(list_type *list, e_type from, e_type to){	\
+	size_type i, ret;	\
+	ret = 0;	\
+	for(i=0;i<list->size;i++){	\
+		if(cmp_func(list->buffer[i], from, NULL) == 0){	\
+			list->buffer[i] = to;	\
+			ret ++;	\
+		}	\
+	}	\
+	return ret;	\
+}	\
+	\
+static inline size_type locate_##list_type(list_type *list, e_type e, size_type start){	\
+	size_type i;	\
+	for(i=start;i<list->size;i++){	\
+		if(cmp_func(list->buffer[i], e, NULL)) return i;	\
+	}	\
+	return i;	\
+}	\
+	\
+define_quick_sort(sort_##list_type##_core, e_type, cmp_func);	\
+	\
+static inline void sort_##list_type(list_type *list){ sort_##list_type##_core(ref_##list_type(list, 0), count_##list_type(list), NULL); }
+
+#define define_list(name, e_type) define_list_core(name, e_type, size_t, 0xFFFFFU)
+
+#define native_number_cmp(e1, e2, obj) (((e1) == (e2))? 0 : (((e1) < (e2))? -1 : 1))
+
+#define define_native_list(name, e_type)	\
+define_list_core(name, e_type, size_t, 0xFFFFFU);	\
+define_list_ext(name, e_type, size_t, native_number_cmp);
+
+define_native_list(u8list,  uint8_t);
+define_native_list(u16list, uint16_t);
+define_native_list(u32list, uint32_t);
+define_native_list(u64list, uint64_t);
+
+define_native_list(b8list,  int8_t);
+define_native_list(b16list, int16_t);
+define_native_list(b32list, int32_t);
+define_native_list(b64list, int64_t);
+
+define_list(vplist, void*);
+
+#endif
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..0074c89
--- /dev/null
+++ b/main.c
@@ -0,0 +1,250 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#include "rainbow.h"
+
+const char *version = "2.0.4";
+
+int usage(){
+	printf(
+	"rainbow %s -- <ruanjue at gmail.com, chongzechen at gmail.com>\n"
+	"Usage: rainbow <cmd> [options]\n"
+	"\n"
+	" cluster\n"
+	"Input  File Format: paired fasta/fastq file(s)\n"
+	"Output File Format: <seqid:int>\\t<cluster_id:int>\\t<read1:string>\\t<read2:string>\n"
+	"  -1 <string> Input fasta/fastq file, supports multiple '-1'\n"
+	"  -2 <string> Input fasta/fastq file, supports multiple '-2' [null]\n"
+	"  -l <int>    Read length, default: 0 variable\n"
+	//"  -r <int>    rank of input files [1]\n"
+	"  -m <int>    Maximum mismatches [4]\n"
+	"  -e <int>    Exactly matching threshold [2000]\n"
+	"  -L          Low level of polymorphism\n"
+	" div\n"
+	"Input File Format: <seqid:int>\\t<cluster_id:int>\\t<read1:string>\\t<read2:string>\n"
+	"Output File Format: <seqid:int>\\t<cluster_id:int>\\t<read1:string>\\t<read2:string>[\\t<pre_cluster_id:int>]\n"
+	"  -i <string> Input file [stdin]\n"
+	"  -o <string> Output file [stdout]\n"
+	"  -k <int>    K_allele, min variants to create a new group [2]\n"
+	"  -K <int>    K_allele, divide regardless of frequency when num of variants exceed this value [50]\n"
+	"  -f <float>  Frequency, min variant frequency to create a new group [0.2]\n"
+	" merge \n"
+	"Input File Format: <seqid:int>\\t<cluster_id:int>\\t<read1:string>\\t<read2:string>[\\t<pre_cluster_id:int>]\n"
+	"  -i <string> Input rbasm output file [stdin]\n"
+	"  -a          output assembly\n"
+//	"  -v <string> Input rainbow divided file [stdin]\n"
+//	"  -p <float>  maximum heterozygosity to collapse, should be specifed according to the estimated\n"
+//	"              polymorphism of the species [0.02]\n"
+//	"  -l <int>    Minimum overlap to collapse two contigs [100]\n"
+//	"  -k <int>    Minimum number of kmers to define similarity between two contigs [5]\n"
+	"  -o <string> Output file for merged contigs, one line per cluster [stdout]\n" 
+	"  -N <int>    Maximum number of divided clusters to merge [300]\n"
+	"  -l <int>    Minimum overlap when assemble two reads (valid only when '-a' is opened) [5]\n"
+	"  -f <float>  Minimum fraction of similarity when assembly (valid only when '-a' is opened) [0.90]\n"
+	"  -r <int>    Minimum number of reads to assemble (valid only when '-a' is opened) [5]\n"
+	"  -R <int>    Maximum number of reads to assemble (valid only when '-a' is opened) [300]\n"
+	"\n",
+	version
+	);
+	return 1;
+}
+
+define_list(namelist, char*);
+
+int cluster_invoker(int argc, char **argv){
+	Cluster *cluster;
+	FileReader *fr1, *fr2;
+	namelist *list1, *list2;
+	int max_mm, c, exact_limit, is_fq1, is_fq2, fix_rd_len;
+	uint32_t KMER_SIZE = 15, KMER_NUM = 6;
+//	int rank = 1;
+	fr2 = NULL;
+	max_mm = 4;
+	exact_limit = 2000;
+	fix_rd_len = 0;
+	list1 = init_namelist(2);
+	list2 = init_namelist(2);
+
+
+	while((c = getopt(argc, argv, "h1:2:m:e:l:L")) != -1){
+		switch(c){
+			case 'h': return usage();
+			case '1': push_namelist(list1, optarg); break;
+			case '2': push_namelist(list2, optarg); break;
+//			case 'r': rank = atoi(optarg); break;
+			case 'l': fix_rd_len = atoi(optarg); break;
+			case 'm': max_mm = atoi(optarg); break;
+			case 'e': exact_limit = atoi(optarg); break;
+			case 'L': KMER_SIZE = 13; KMER_NUM = 4; break;
+			default: return usage();
+		}
+	}
+	if(count_namelist(list1) == 0) return usage();
+	if(count_namelist(list2) != 0 && count_namelist(list1)!=count_namelist(list2)) {
+		fprintf(stderr, "file1 and file2 should be paired\n\n");
+		return usage();
+	}
+	is_fq1 = is_fq2 = 0;
+	if((fr1 = fopen_m_filereader(count_namelist(list1), as_array_namelist(list1))) == NULL){
+		fprintf(stderr, " -- Cannot open input file in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__);
+		abort();
+	} else {
+		is_fq1 = guess_seq_file_type(fr1);
+		switch (is_fq1) {
+			case 1: is_fq1 = 0; break;
+			case 2: is_fq1 = 1; break;
+			default: fprintf(stderr, "unknown file type\n");
+			abort(); 
+		}
+	}
+	if(count_namelist(list2) != 0) {
+		if((fr2 = fopen_m_filereader(count_namelist(list2), as_array_namelist(list2))) == NULL){
+			fprintf(stderr, " -- Cannot open input file in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__);
+			abort();
+		} else {
+			is_fq2 = guess_seq_file_type(fr2);
+			switch (is_fq2) {
+				case 1: is_fq2 = 0; break;
+				case 2: is_fq2 = 1; break;
+				default: fprintf(stderr, "unknown file type\n");
+				abort(); 
+			}
+		}
+	}
+	free_namelist(list1);
+	free_namelist(list2);
+	
+	cluster = init_cluster(max_mm, exact_limit, KMER_SIZE, KMER_NUM);
+	indexing_cluster(cluster, fr1, is_fq1, fix_rd_len);
+	clustering(cluster, fr2, is_fq2, fix_rd_len, stdout);
+	free_cluster(cluster);
+	fclose_filereader(fr1);
+	if(fr2) fclose_filereader(fr2);
+	fprintf(stderr, "Program exit normally\n");
+	return 0;
+}
+
+int div_invoker(int argc, char **argv){
+	Div *div;
+	FileReader *fr;
+	FILE *out;
+	int c, k_allele, K_allele;
+	float min_freq;
+	char *infile, *outfile;
+	infile = NULL;
+	outfile = NULL;
+	k_allele = 2;
+	K_allele = 50;
+	min_freq = 0.2;
+	while((c = getopt(argc, argv, "hi:o:k:K:f:")) != -1){
+		switch(c){
+			case 'h': return usage();
+			case 'i': infile = optarg; break;
+			case 'o': outfile = optarg; break;
+			case 'k': k_allele = atoi(optarg); break;
+			case 'K': K_allele = atoi(optarg); break;
+			case 'f': min_freq = atof(optarg); break;
+			default: return usage();
+		}
+	}
+	if(infile){
+		if((fr = fopen_filereader(infile)) == NULL){
+			fprintf(stdout, " -- Cannot open %s in %s -- %s:%d --\n", infile, __FUNCTION__, __FILE__, __LINE__);
+			abort();
+		}
+	} else fr = stdin_filereader();
+	if(outfile){
+		if((out = fopen(outfile, "w")) == NULL){
+			fprintf(stdout, " -- Cannot write %s in %s -- %s:%d --\n", outfile, __FUNCTION__, __FILE__, __LINE__);
+			abort();
+		}
+	} else out = stdout;
+	div = init_div(k_allele, K_allele, min_freq);
+	div_reads(div, fr, out);
+	free_div(div);
+	fclose_filereader(fr);
+	if(outfile) fclose(out);
+	return 0;
+}
+
+int merge_invoker(int argc, char **argv) {
+	FileReader *divd;
+	FILE *out = NULL;
+	char *divdf = NULL, *outfile = NULL;
+	uint32_t min_kmer = 5;
+	uint32_t min_overlap = 5;
+	float het = 0.85; int c;
+	uint32_t kmersize = 23;
+	uint32_t max_cluster = 300;
+	uint32_t need_asm = 0;
+	float min_sm = 0.90;
+	uint32_t min_read = 5;
+	uint32_t max_read = 300;
+
+	while ((c = getopt(argc, argv, "hi:l:p:k:o:s:N:f:r:R:a")) != -1) {
+		switch (c) {
+			case 'h': return usage();
+	//		case 'a': asmdf = optarg; break;
+			case 'i': divdf = optarg; break;
+			case 'l': min_overlap = atoi(optarg); break;
+			case 'p': het = atof(optarg); break;
+			case 'k': min_kmer = atoi(optarg); break;
+			case 'o': outfile = optarg; break;
+			case 'f': min_sm = atof(optarg); break;
+			case 's': kmersize = atoi(optarg); break;
+			case 'N': max_cluster = atoi(optarg); break;
+			case 'a': need_asm = 1; break;
+			case 'r': min_read = atoi(optarg); break;
+			case 'R': max_read = atoi(optarg); break;
+			default: return usage();
+		}
+	}
+	if (divdf) {
+		if ((divd = fopen_filereader(divdf)) == NULL) {
+			fprintf(stdout, " -- Cannot open %s in %s -- %s:%d --\n", divdf, __FUNCTION__, __FILE__, __LINE__);
+			abort();
+		}
+	} else divd = stdin_filereader();
+	if (outfile) {
+		if ((out = fopen(outfile, "w")) == NULL) {
+			fprintf(stdout, " -- Cannot write %s in %s -- %s:%d --\n", divdf, __FUNCTION__, __FILE__, __LINE__);
+			abort();
+		}
+	} else out = stdout;
+	merge_t *merger;
+	merger = init_merger(min_kmer, min_overlap, het, kmersize, max_cluster, need_asm, min_sm, min_read, max_read);
+	merge_ctgs(merger, divd, out);
+	free_merger(merger);
+	fclose_filereader(divd);
+	if (outfile) fclose(out);
+	return 0;
+}
+
+int main(int argc, char **argv){
+	if(argc < 2) return usage();
+	if(strcasecmp(argv[1], "cluster") == 0){
+		return cluster_invoker(argc - 1, argv + 1);
+	} else if(strcasecmp(argv[1], "div") == 0){
+		return div_invoker(argc - 1, argv + 1);
+	} else if(strcasecmp(argv[1], "merge") == 0) {
+		return merge_invoker(argc - 1, argv + 1);
+	} else {
+		return usage();
+	}
+}
diff --git a/mergecontig.c b/mergecontig.c
new file mode 100644
index 0000000..7ac419e
--- /dev/null
+++ b/mergecontig.c
@@ -0,0 +1,630 @@
+#include <time.h>
+#include "mergecontig.h"
+#include "string.h"
+
+CtgDB* init_ctgdb(void ) {
+	CtgDB *db;
+	
+	db = (CtgDB*)malloc(sizeof(CtgDB));
+	db->ctgnum = 0;
+	db->ctgs = init_ctglist(6);
+
+	return db;
+}
+
+CtgDB* load_ctgdb(FileReader *fr1, FileReader *fr2) {
+	uint32_t id = 0, i = 0;
+	CtgDB *db;
+	uuhash *map = init_uuhash(1023);
+	uint32_t key, val;
+	uuhash_t h;
+	int len = 0;
+	char *seq = NULL;
+	String *line = init_string(1);
+
+	while (fread_table(fr2) != -1) {
+		key = atoi(get_col_str(fr2, 1));
+		val = atoi(get_col_str(fr2, 4));
+		h.key = key;
+		h.val = val;
+
+		if (!exists_uuhash(map, h)) {
+			put_uuhash(map, h);
+		}
+	}
+
+	db = (CtgDB*)malloc(sizeof(CtgDB));
+	db->ctgnum = 0;
+	db->ctgs = init_ctglist(6);
+
+	while (fread_line(line, fr1) != -1) {
+		if (line->string[0] == 'E') {
+			if (len != 0) {
+				Ctg contig;
+				contig.id = id;
+				contig.cls_id = i;
+				h.key = id;
+				h.val = 0;
+				contig.old_clsid = get_uuhash(map, h)->val;
+				contig.sz = 1;
+				contig.seq = strdup(seq);
+				db->ctgnum++;
+				push_ctglist(db->ctgs, contig);
+				i++;
+			}
+			free(seq); seq = NULL;
+			len = 0;
+			id = atoi(line->string+2);
+		} else if (line->string[0] == 'S') {
+			if (len < (int)strlen(line->string+2)) {
+				len = (int)strlen(line->string+2);
+				free(seq); seq = NULL;
+				seq = strdup(line->string+2);
+			}
+		}
+	}
+
+	Ctg contig;
+	contig.id = id;
+	contig.cls_id = i;
+	h.key = id;
+	h.val = 0;
+	contig.old_clsid = get_uuhash(map, h)->val;
+	contig.sz = 1;
+	contig.seq = strdup(seq);
+	db->ctgnum++;
+	push_ctglist(db->ctgs, contig);
+
+	free(seq);
+	free_string(line);
+	free_uuhash(map);
+	return db;
+}
+
+void print_ctgdb(CtgDB *db) {
+	uint32_t i;
+	Ctg *contig;
+
+	for (i = 0; i < count_ctglist(db->ctgs); i++) {
+		contig = ref_ctglist(db->ctgs, i);
+		fprintf(stdout, "%d %d %d %d %s\n", contig->id, contig->cls_id, contig->old_clsid, contig->sz, contig->seq);
+		fflush(stdout);
+	}
+	//fprintf(stdout, "%d\n", db->ctgnum);
+}
+
+void free_ctgdb(CtgDB *db) {
+	free_ctglist(db->ctgs);
+	free(db);
+}
+
+void free_load_ctgdb(CtgDB *db) {
+	uint32_t i;
+	Ctg *contig;
+
+	for (i = 0; i < count_ctglist(db->ctgs); i++) {
+		contig = ref_ctglist(db->ctgs, i);
+		free(contig->seq);
+	}
+
+	free_ctglist(db->ctgs);
+	free(db);
+}
+
+int aln_cmp(const void *p0, const void *p1, void *ref) {
+	PWcontig *t0, *t1;
+	t0 = (PWcontig*)p0;
+	t1 = (PWcontig*)p1;
+	if (t0->score < t1->score)
+		return 1;
+	else if(t0->score > t1->score)
+		return -1;
+	else
+		return 0;
+	ref = ref;
+}
+
+define_search_array(bisearch, uint64_t, native_number_cmp);
+
+static inline int olbisearch(uint64_t a[], uint64_t q, int i, int j) {
+	int low, high, mid;
+	low = i; high = j;
+	while (low <= high) {
+		mid = (low + high) / 2;
+		if (a[mid] > q) {
+			high = mid - 1;
+		} else if (a[mid] < q) {
+			low = mid + 1;
+		} else {
+			return mid;
+		}
+	}
+	return -(low + 1);  // failed to find q
+}
+
+PWDB* pw_aln_contigs_brute(CtgDB *db) {
+	uint32_t i, j, n;
+	int k, mn, mm, off0, off1, aln_len;
+	PWDB *pwdb;
+	Ctg *c0, *c1;
+	pwdb = (PWDB*)malloc(sizeof(PWDB));
+
+	pwdb->pwctgs = init_pwctglist(6);
+	pwdb->hp = init_heap(aln_cmp, pwdb);
+	pwdb->ctgv = db->ctgs;
+	AlnParam ap = {10, 2, 2, aln_sm_nt, 16, 75};
+
+	n = db->ctgnum;
+
+	for (i = 0; i < n-1; i++) {
+		c0 = ref_ctglist(db->ctgs, i);
+		for (j = i+1; j < n; j++) {
+			c1 = ref_ctglist(db->ctgs, j);
+			AlnAln *aa;
+			mn = mm = 0;
+			off0 = off1 = -1;
+			aa = aln_stdaln(c0->seq, c1->seq, &ap, 0, 1);
+			aln_len = strlen(aa->out1);
+			for (k = 0; k < aln_len; k++) {
+					if (aa->out1[k] == '-' || aa->out2[k] == '-') continue;
+					if (aa->out1[k] != aa->out2[k]) mm++;
+					mn++;
+			}
+			PWcontig *pwc = (PWcontig*)malloc(sizeof(PWcontig));
+			pwc->id0 = c0->cls_id;
+			pwc->id1 = c1->cls_id;
+			pwc->overlap = mn;
+			pwc->score = aa->score;
+			pwc->het = (float)mm/mn;
+			push_heap(pwdb->hp, pwc);
+			push_pwctglist(pwdb->pwctgs, pwc);
+			//fprintf(stdout, "%d\t%d\t%d\t%d\t%d\t%d\t%.3f\n", c0->cls_id, c1->cls_id, pwc->id0, pwc->id1, mn, mm, pwc->het);
+			//fprintf(stdout, "%s\n%s\n", c0->seq, c1->seq);
+			//fprintf(stdout, "%d\t%d\t%d\t%d\t%d\t%d\n%s\n%s\n%s\n\n", aa->start1, aa->end1,aa->start2, aa->end2, pwc->score, pwc->overlap, aa->out1, aa->outm, aa->out2);
+
+			//fprintf(stdout, "%s\n%s\n%s\n\n", aa->out1, aa->outm, aa->out2);
+			fflush(stdout);
+			aln_free_AlnAln(aa);
+		}
+	}
+	return pwdb;
+}
+
+
+PWDB* pw_aln_contigs(CtgDB *db, uint32_t min_overlap, float het) {
+	uint32_t i, j, jj, n, q, r;
+	int k, mn, mm, score, seqlen, aln_len, count, pre, idx, lastid;
+	kmer_tt K, *t; int exists;
+	uint64_t kmask = 0xFFFFFFFFFFFFFFFFLLU >> ((32-KMER_SIZE_CTG)*2);
+	uint64_t pos, next, bt, p;
+	link_t *link;
+	posv *posvec;
+	kmer_pos_t postmp;
+	idlist *idtmp;
+	u32hash *ids;
+	id_tt *ID, *preID;
+	int **alned;
+	uint64_t *idv;
+#ifdef DEBUG
+	clock_t before;
+	double elapsed;
+#endif
+
+	PWDB *pwdb;
+	Ctg *c0, *c1;
+	pwdb = (PWDB*)malloc(sizeof(PWDB));
+
+	pwdb->pwctgs = init_pwctglist(6);
+	pwdb->hp = init_heap(aln_cmp, pwdb);
+	pwdb->ctgv = db->ctgs;
+	AlnParam ap = {10, 2, 2, aln_sm_nt, 16, 75};
+
+	kmerhash *index = init_kmerhash(2);
+	n = db->ctgnum;
+	idv = (uint64_t *) malloc(n * sizeof(uint64_t));
+	alned = malloc(n * sizeof(int *));
+	for (i = 0; i < n; i++) {
+		alned[i] = malloc(n * sizeof(int));
+		for (j = 0; j < n; j++) {
+			alned[i][j] = 0;
+		}
+	}
+
+	K.kmer = 0;
+	K.kpos = 0;
+	pos = 0;
+	//index here
+#ifdef DEBUG
+	before = clock();
+#endif
+	link = NULL;
+	for (i = 0; i < n; i++) {
+		c0 = ref_ctglist(db->ctgs, i);
+		seqlen = strlen(c0->seq);
+		if (seqlen < KMER_SIZE_CTG) continue;
+		link = (link_t *)realloc(link, (pos+seqlen)*sizeof(link_t));
+		idv[i] = pos+seqlen-1;
+
+		for (j = 0; j < KMER_SIZE_CTG-1; j++)
+			K.kmer = (K.kmer << 2) | base_bit_table[(int)c0->seq[j]];
+		for (j = 0; j <= (unsigned)seqlen-KMER_SIZE_CTG; j++) {
+			K.kmer = ((K.kmer << 2) | base_bit_table[(int)c0->seq[j+KMER_SIZE_CTG-1]]) & kmask;
+			t = prepare_kmerhash(index, K, &exists);
+			//printf("%d\n", pos+j);
+			if (exists) {
+				link[pos+j].last = t->kpos;
+				link[pos+j].offset = j;
+			} else {
+				t->kmer = K.kmer;
+				link[pos+j].last = pos+j;
+				link[pos+j].offset = j;
+			}
+			t->kpos = pos+j;
+		}
+		pos += seqlen;
+	}
+#ifdef DEBUG
+	elapsed = clock() - before;
+	fprintf(stderr, "index used %.3f sec\n", elapsed/CLOCKS_PER_SEC);
+#endif
+
+	posvec = init_posv(2);
+	idtmp = init_idlist(6);
+	ids = init_u32hash(2);
+	preID = NULL;
+	//query here
+	pos = 0;
+	for (i = 0; i < n; i++) {
+	//	before = clock();
+		clear_posv(posvec);
+		clear_idlist(idtmp);
+		clear_u32hash(ids);
+		c0 = ref_ctglist(db->ctgs, i);
+		seqlen = strlen(c0->seq);
+		if (seqlen < KMER_SIZE_CTG) continue;
+
+		for (j = 0; j < KMER_SIZE_CTG-1; j++)
+			K.kmer = (K.kmer << 2) | base_bit_table[(int)c0->seq[j]];
+		for (j = 0; j <= (unsigned)seqlen-KMER_SIZE_CTG; j++) {
+			K.kmer = ((K.kmer << 2) | base_bit_table[(int)c0->seq[j+KMER_SIZE_CTG-1]]) & kmask;
+			t = get_kmerhash(index, K);
+			//printf("%lld\n", t->kpos);
+			bt = t->kpos;
+			if (bt >= pos+seqlen || bt < pos) {
+				postmp.pos = bt;
+				postmp.lastoffset = link[bt].offset;
+				postmp.offset = j;
+				push_posv(posvec, postmp); 
+			}
+			while (1) { //tracing positions
+				next = link[bt].last;
+				if (next == bt) break;
+				if (next >= pos+seqlen || next < pos) {
+					postmp.pos = next;
+					postmp.lastoffset = link[next].offset;
+					postmp.offset = j;
+					push_posv(posvec, postmp);
+				}
+				bt = next;
+			}
+		}
+#ifdef DEBUG
+		elapsed = clock() - before;
+		fprintf(stderr, "%d th contig tracing used %.3f sec\n", i, elapsed/CLOCKS_PER_SEC);
+		before = clock();
+#endif
+		//translate positions into ids
+		qsort(as_array_posv(posvec), count_posv(posvec), sizeof(kmer_pos_t), cmp_kmer_pos);
+		//sort_u64list(posv);
+		p = (get_posv(posvec, 0)).pos;
+		q = (get_posv(posvec, 0)).offset;
+		r = (get_posv(posvec, 0)).lastoffset;
+		//idx = bisearch(idv, p, 0, n-1);
+		idx = bisearch(idv, n,  p, NULL);
+		if (idx < 0) idx = -1 - idx;
+		//printf("idx %d\n", idx);
+		if (p <= idv[idx]) {
+			if (idx > (int)i) {
+				ID = next_ref_idlist(idtmp);
+				ID->id = idx;
+				ID->offset = q;
+				ID->lastoffset = r;
+			}
+		} 
+		for (j = 1; j < count_posv(posvec); j++) {
+			if ((get_posv(posvec, j)).pos - p >= KMER_SIZE_CTG) {
+				p = (get_posv(posvec, j)).pos; 
+				q = (get_posv(posvec, j)).offset;
+				r = (get_posv(posvec, j)).lastoffset;
+			} else {
+				continue; 
+			}
+			
+			if (p <= idv[idx]) {
+				if (idx > (int)i) {
+					ID = next_ref_idlist(idtmp);
+					ID->id = idx;
+					ID->offset = q;
+					ID->lastoffset = r;
+				}
+			} else {
+				//idx = bisearch(idv, p, idx, n-1);
+				//idx = bisearch(idv+idx+1, n-idx-1, p, NULL);
+				idx = bisearch(idv, n, p, NULL);
+				if(idx < 0) idx = -1 - idx;
+				//printf("idx %d\n", idx);
+				if (idx > (int)i) {
+					ID = next_ref_idlist(idtmp);
+					ID->id = idx;
+					ID->offset = q;
+					ID->lastoffset = r;
+				}
+			}
+		}
+		qsort(as_array_idlist(idtmp), count_idlist(idtmp), sizeof(id_tt), cmp_ids);
+		//sort_u32list(idtmp);
+		count = 0;
+		pre = -1;
+		for (j = 0; j < count_idlist(idtmp); j++) {
+			ID = ref_idlist(idtmp, j);
+			q = ID->id;
+			//if (q <= i) continue;
+			if (pre != (int)q) {
+				if (count >= 5) { //magic number of kmers 
+					put_u32hash(ids, preID->id);
+				}
+				count = 0;
+				pre = q;
+				preID = ID;
+				count++;
+			} else {
+				count++;
+				preID = ID;
+			}
+		}
+		if (count >= 5) //magic number of kmers 
+			put_u32hash(ids, preID->id);
+#ifdef DEBUG
+		elapsed = clock() - before;
+		fprintf(stderr, "%d th translate pos and search and sort id used %.3f sec\n", i, elapsed/CLOCKS_PER_SEC);
+		before = clock();
+#endif
+		lastid = -1; 
+		for (j = 0; j < count_idlist(idtmp); j++) {
+			ID = ref_idlist(idtmp, j);
+			jj = ID->id;
+			if (!exists_u32hash(ids, jj)) continue; // kmer < magic number 3
+			q = ID->offset;
+			r = ID->lastoffset;
+			if ((int)jj != lastid) {
+				if (lastid != -1 && !alned[i][lastid]) { // then smith-waterman 
+					c1 = ref_ctglist(db->ctgs, lastid);
+					AlnAln *aa;
+					mn = mm = 0;
+					aa = aln_stdaln(c0->seq, c1->seq, &ap, 0, 1);
+					aln_len = strlen(aa->out1);
+					for (k = 0; k < aln_len; k++) {
+						if (aa->out1[k] == '-' || aa->out2[k] == '-') continue;
+						if (aa->out1[k] != aa->out2[k]) mm++;
+						mn++;
+					}
+					PWcontig *pwc = (PWcontig*)malloc(sizeof(PWcontig));
+					pwc->id0 = c0->cls_id;
+					pwc->id1 = c1->cls_id;
+					pwc->overlap = mn;
+					pwc->score = aa->score;
+					pwc->het = (float)mm/mn; 
+					push_heap(pwdb->hp, pwc);
+					push_pwctglist(pwdb->pwctgs, pwc);
+					//fprintf(stdout, "%d\t%d\t%d\t%d\t%d\t%d\t%.3f\n", c0->cls_id, c1->cls_id, pwc->id0, pwc->id1, mn, mm, pwc->het);
+					//fprintf(stdout, "%s\n%s\n", c0->seq, c1->seq);
+					//fprintf(stdout, "%d\t%d\t%d\t%d\t%d\t%d\n%s\n%s\n%s\n\n", aa->start1, aa->end1,aa->start2, aa->end2, pwc->score, pwc->overlap, aa->out1, aa->outm, aa->out2);
+
+					//fprintf(stdout, "%s\n%s\n%s\n\n", aa->out1, aa->outm, aa->out2);
+					//fflush(stdout);
+					aln_free_AlnAln(aa);
+					alned[i][lastid] = 1;
+					lastid = jj;
+				} else {
+					lastid = jj;
+				}
+			}
+			if (!alned[i][jj]) {
+				c1 = ref_ctglist(db->ctgs, jj);
+				aln_str(c0->seq+q, c1->seq+r, &mm, &mn, &score); 
+				if (mn >= (int)min_overlap && (het-(float)mm/mn) >= 0) { // no need to align again, magic min_overlap and het
+					PWcontig *pwc = (PWcontig*)malloc(sizeof(PWcontig));
+					pwc->id0 = c0->cls_id;
+					pwc->id1 = c1->cls_id;
+					pwc->overlap = mn;
+					pwc->score = score;
+					pwc->het = (float)mm/mn; 
+					push_heap(pwdb->hp, pwc);
+					push_pwctglist(pwdb->pwctgs, pwc);
+					alned[i][jj] = 1;
+				} 
+			}
+		}
+
+		if (lastid != -1 && !alned[i][lastid]) { // then smith-waterman 
+			c1 = ref_ctglist(db->ctgs, lastid);
+			AlnAln *aa;
+			mn = mm = 0;
+			aa = aln_stdaln(c0->seq, c1->seq, &ap, 0, 1);
+			aln_len = strlen(aa->out1);
+			for (k = 0; k < aln_len; k++) {
+				if (aa->out1[k] == '-' || aa->out2[k] == '-') continue;
+				if (aa->out1[k] != aa->out2[k]) mm++;
+				mn++;
+			}
+			PWcontig *pwc = (PWcontig*)malloc(sizeof(PWcontig));
+			pwc->id0 = c0->cls_id;
+			pwc->id1 = c1->cls_id;
+			pwc->overlap = mn;
+			pwc->score = aa->score;
+			pwc->het = (float)mm/mn; 
+			push_heap(pwdb->hp, pwc);
+			push_pwctglist(pwdb->pwctgs, pwc);
+			aln_free_AlnAln(aa);
+			alned[i][lastid] = 1;
+		}
+#ifdef DEBUG
+		elapsed = clock() - before;
+		fprintf(stderr, "%d th Smith-watherman used %.3f sec\n", i, elapsed/CLOCKS_PER_SEC);
+#endif
+		pos += seqlen;
+	}
+
+	free(idv);
+	free(link);
+	//free_u64list(posv);
+	free_posv(posvec);
+	free_kmerhash(index);
+	free_idlist(idtmp);
+	free_u32hash(ids);
+	for (i = 0; i < n; i++)
+		free(alned[i]);
+
+	free(alned);
+
+	return pwdb;
+}
+PWDB* clustering_ctg(PWDB *db, uint32_t min_overlap, float het) {
+	PWDB *ret;
+	ret = db;
+	uint32_t i, j, p, q;
+	
+	PWcontig *poped;
+
+	while ((poped = pop_heap(db->hp)) != NULL) {
+		if (poped->overlap >= min_overlap && (poped->het - het <= 0)) {
+		p = poped->id0;
+		q = poped->id1;
+
+		for (i = p; i != (ref_ctglist(ret->ctgv, i))->cls_id; i = (ref_ctglist(ret->ctgv, i))->cls_id)
+			ref_ctglist(ret->ctgv, i)->cls_id = ref_ctglist(ret->ctgv, ref_ctglist(ret->ctgv, i)->cls_id)->cls_id;
+		for (j = q; j != (ref_ctglist(ret->ctgv, j))->cls_id; j = (ref_ctglist(ret->ctgv, j))->cls_id)
+			ref_ctglist(ret->ctgv, j)->cls_id = ref_ctglist(ret->ctgv, ref_ctglist(ret->ctgv, j)->cls_id)->cls_id;
+		if (i == j) continue;
+		if (ref_ctglist(ret->ctgv, i)->sz < ref_ctglist(ret->ctgv, j)->sz) {
+			ref_ctglist(ret->ctgv, i)->cls_id = j;
+			ref_ctglist(ret->ctgv, j)->sz += ref_ctglist(ret->ctgv, i)->sz;
+		} else {
+			ref_ctglist(ret->ctgv, j)->cls_id = i;
+			ref_ctglist(ret->ctgv, i)->sz += ref_ctglist(ret->ctgv, j)->sz;
+		}
+		}
+	}
+
+	return ret;
+}
+
+int cmp_ctg_clsid(const void *p0, const void *p1) {
+	Ctg *t0, *t1;
+	t0 = (Ctg*)p0;
+	t1 = (Ctg*)p1;
+	if (t0->cls_id == t1->cls_id) return 0;
+	if (t0->cls_id < t1->cls_id) return 1;
+	return -1;
+}
+
+void print_clusters(PWDB *db) {
+	int last_cid = -1;
+	int line_num = 0;
+	uint32_t i;
+	Ctg *ctg;
+
+	ctglist *t = db->ctgv;
+	
+	qsort(as_array_ctglist(t), count_ctglist(t), sizeof(Ctg), cmp_ctg_clsid);
+	for (i = 0; i < count_ctglist(t); i++) {
+		ctg = ref_ctglist(t, i);
+		if (last_cid != (int)ctg->cls_id && line_num > 0)
+			printf("\n");
+		last_cid = ctg->cls_id;
+		line_num++;
+		printf("%d ", ctg->id);
+	}
+	printf("\n");
+
+	return;
+
+}
+
+void execute_pwaln(CtgDB *db, uint32_t min_overlap, float het, uint32_t max_nctg) {
+	PWDB *pwaln;
+	CtgDB *tdb = init_ctgdb();
+
+	uint32_t i, max;
+	int last_oldcid = -1;
+	int line_num = 0;
+	Ctg *ctg;
+	int id = 0;
+
+	
+	for (i = 0; i < count_ctglist(db->ctgs); i++) {
+		ctg = ref_ctglist(db->ctgs, i);
+		if (last_oldcid != (int)ctg->old_clsid) {
+			if (line_num > 0) {
+				tdb->ctgnum = count_ctglist(tdb->ctgs);
+				if (tdb->ctgnum > 1) { 
+					//print_ctgdb(tdb);
+					if (tdb->ctgnum <= 5) { //magic number 10
+						pwaln = pw_aln_contigs_brute(tdb);
+					} else {
+						pwaln = pw_aln_contigs(tdb, min_overlap, het); 
+					}
+					pwaln = clustering_ctg(pwaln, min_overlap, het);
+					print_clusters(pwaln);
+					free_pwdb(pwaln); 
+					free_ctgdb(tdb);
+				} else {
+					free_ctgdb(tdb);
+				}
+				id = 0;
+				tdb = init_ctgdb();
+			}
+			last_oldcid = (int)ctg->old_clsid;
+			line_num++;
+			ctg->cls_id = id;
+			push_ctglist(tdb->ctgs, *ctg);
+			id++;
+		} else {
+			line_num++;
+			ctg->cls_id = id;
+			push_ctglist(tdb->ctgs, *ctg);
+			id++;
+		}
+	}
+	tdb->ctgnum = count_ctglist(tdb->ctgs);
+	if (tdb->ctgnum > 1) { 
+		//print_ctgdb(tdb);
+		if (tdb->ctgnum <= 5) { //magic number 5
+			pwaln = pw_aln_contigs_brute(tdb);
+		} else {
+			pwaln = pw_aln_contigs(tdb, min_overlap, het); 
+		}
+		
+		pwaln = clustering_ctg(pwaln, min_overlap, het);
+		print_clusters(pwaln);
+		free_pwdb(pwaln);
+		free_ctgdb(tdb);
+	} else {
+		free_ctgdb(tdb);
+	}
+	max = max_nctg;
+}
+
+void free_pwdb(PWDB *db) {
+	uint32_t i;
+	PWcontig *pw;
+
+	for (i = 0; i < count_pwctglist(db->pwctgs); i++) {
+		pw = get_pwctglist(db->pwctgs, i);
+		free(pw);
+	}
+	free_pwctglist(db->pwctgs);
+	free_heap(db->hp);
+	
+	free(db);
+}
diff --git a/mergecontig.h b/mergecontig.h
new file mode 100644
index 0000000..f1b1302
--- /dev/null
+++ b/mergecontig.h
@@ -0,0 +1,162 @@
+#ifndef __MERGECONTIG_H
+#define __MERGECONTIG_H
+
+#include <stdint.h>
+#include "list.h"
+#include "file_reader.h"
+#include "stdaln.h"
+#include "string.h"
+#include "heap.h"
+#include "hashset.h"
+#include "rainbow.h"
+
+#define KMER_SIZE_CTG 15
+
+/* char -> 17 (=16+1) nucleotides */
+static unsigned char aln_nt16_table[256] = {
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,16 /*'-'*/,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
+        15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
+        15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
+        15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+        15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
+};
+
+
+typedef struct {
+	uint64_t kmer;
+	uint64_t kpos;
+} kmer_tt;
+
+#define kmer_code(k) u64hashcode((k).kmer)
+#define kmer_eq(k1, k2) ((k1).kmer == (k2).kmer)
+define_hashset(kmerhash, kmer_tt, kmer_code, kmer_eq);
+
+typedef struct {
+	uint64_t pos;
+	uint32_t lastoffset;
+	uint32_t offset;
+} kmer_pos_t;
+
+define_list(posv, kmer_pos_t);
+
+static inline int cmp_kmer_pos (const void *e1, const void *e2) {
+	kmer_pos_t *t1, *t2;
+	t1 = (kmer_pos_t *)e1;
+	t2 = (kmer_pos_t *)e2;
+
+	if (t1->pos == t2->pos)
+		return 0;
+	else if (t1->pos > t2->pos)
+		return 1;
+	else
+		return -1;
+}
+
+typedef struct {
+	uint32_t id;
+	uint32_t offset;
+	uint32_t lastoffset;
+} id_tt;
+
+define_list(idlist, id_tt);
+
+static inline int cmp_ids(const void *e1, const void *e2) {
+	id_tt *t1, *t2;
+	t1 = (id_tt *)e1;
+	t2 = (id_tt *)e2;
+
+	if (t1->id == t2->id)
+		return 0;
+	else if (t1->id > t2->id)
+		return 1;
+	else
+		return -1;
+}
+
+static inline void aln_str(char *s1, char *s2, int *mm, int *mn, int *score) {
+	int len, len1, len2, i, j, k, s;
+	len1 = strlen(s1);
+	len2 = strlen(s2);
+	len = len1<len2?len1:len2;
+	s = 0;
+	int m = 0;
+	for (i = 0; i < len; i++) {
+		j = aln_nt16_table[(int)s1[i]];
+		k = aln_nt16_table[(int)s2[i]];
+		s += aln_sm_nt[j+k*16]; // magic aln_sm_nt table ROW_NUMBER=16
+		if (s1[i] != s2[i])
+			m++;
+	}
+	*mm = m;
+	*mn = len;
+	*score = s;
+}
+
+typedef struct {
+	uint64_t last; //last kmer position
+	uint32_t offset;
+} link_t;
+
+typedef struct {
+	uint32_t id;
+	uint32_t cls_id;
+	uint32_t old_clsid;
+	uint32_t sz;   //union tree depth
+	char *seq;
+} Ctg;
+
+define_list(ctglist, Ctg);
+
+typedef struct {
+	uint32_t ctgnum;
+	ctglist *ctgs;
+} CtgDB;
+
+typedef struct {
+	uint32_t id0;
+	uint32_t id1;
+	uint32_t overlap;
+	float het;
+	int score; 
+} PWcontig;
+
+define_list(pwctglist, PWcontig*);
+
+typedef struct {
+	pwctglist *pwctgs;
+	Heap *hp;
+	ctglist *ctgv;
+} PWDB;
+
+#ifdef __CPLUSPLUS
+extern "C" {
+#endif
+
+CtgDB* init_ctgdb(void );
+CtgDB* load_ctgdb(FileReader *fr1, FileReader *fr2);
+void print_ctgdb(CtgDB *db);
+void free_ctgdb(CtgDB *db);
+void free_load_ctgdb(CtgDB *db);
+PWDB* pw_aln_contigs(CtgDB *db, uint32_t overlap, float het);
+PWDB* pw_aln_contigs_brute(CtgDB *db);
+PWDB* clustering_ctg(PWDB *db, uint32_t overlap, float het);
+void print_clusters(PWDB *db);
+void execute_pwaln(CtgDB *db, uint32_t overlap, float het, uint32_t max_nctg);
+void free_pwdb(PWDB *db);
+
+#ifdef __CPLUSPLUS
+}
+#endif
+
+#endif
diff --git a/mergectg.c b/mergectg.c
new file mode 100644
index 0000000..4edaf2a
--- /dev/null
+++ b/mergectg.c
@@ -0,0 +1,654 @@
+/*
+ * 
+ * Copyright (c) 2012, Zechen Chong <chongzechen at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mergectg.h"
+#include "rainbow.h"
+#include "vector.h"
+
+contig_t* lend_ctgv_merger(merge_t *merger) {
+	contig_t *ctg;
+	if (!pop_contigsv(merger->cache, &ctg)) {
+//		ctg = next_ref_contigv(merger->ctgs);
+		ctg = malloc(sizeof(contig_t));
+		ctg->path = init_string(10);
+		ctg->rds = init_readv(50);
+		ctg->index = init_rdkhash(24);
+		ctg->m_idx = init_idxv(2);
+		ctg->m_rds = init_u32list(2);
+	}
+	return ctg;
+}
+
+void return_ctgv_merger(merge_t *merger, contig_t *ctg) {
+	clear_ctg(ctg);
+	push_contigsv(merger->cache, ctg);
+}
+
+void prepare_reads(merge_t *merger, FileReader *in, uint32_t lastcid) {
+	int n_col;
+	uint32_t cid, ef_id, eid, eflen, id = 0;
+	ef_id = 0; 
+	contig_t *ctg = NULL;
+	read_t *rd;
+	char *efstr, *path;
+
+	while ((n_col = fread_table(in)) != -1) {
+		if (n_col == 0) continue;
+		cid = atoi(get_col_str(in, 4));
+		if (cid != lastcid) {
+			froll_back(in);
+			return;
+		}
+		eid = atoi(get_col_str(in, 1));
+		if (eid != ef_id) {
+			ef_id = eid;
+			efstr = get_col_str(in, 2);
+			eflen = get_col_len(in, 2);
+			reverse_dna(efstr, eflen);
+			ctg = lend_ctgv_merger(merger); 
+			push_contigv(merger->ctgs, ctg);
+			ctg->id = ef_id;
+			ctg->closed = 0;
+			path = get_col_str(in, 5);
+			append_string(ctg->path, path, strlen(path));
+//			ctg->path = strdup(get_col_str(in, 5));
+//			ctg->efctgs = init_vec(sizeof(FContig*), 6);
+			push_u32list(ctg->m_rds, id++);
+			rd = next_ref_readv(ctg->rds);
+			rd->seq_id = atol(get_col_str(in, 0));
+			rd->rd_len = eflen;
+			memmove(rd->seq, efstr, rd->rd_len);
+			rd->seq[rd->rd_len]= '\0';
+			rd->rank = 1;
+
+		}
+		rd = next_ref_readv(ctg->rds);
+		rd->rank = 1;
+		rd->seq_id = atol(get_col_str(in, 0));
+		rd->rd_len = get_col_len(in, 3);
+		memmove(rd->seq, get_col_str(in, 3), rd->rd_len);
+		rd->seq[rd->rd_len]= '\0';
+	}
+}
+
+void print_asm(merge_t *merger, FILE *out) {
+	uint32_t i, j, k;
+	contig_t *ctg, *ctg2;
+	read_t *rd;
+
+	for (k = 0; k < merger->ctgs->size; k++) {
+		ctg = get_contigv(merger->ctgs, k);
+		if (merger->flag && !ctg->closed && ctg->rds->size >= merger->min_read && ctg->rds->size <= merger->max_read) {  // have used ef
+			rd = ref_readv(ctg->rds, 0);
+			reset_ef(merger->ef, ctg->id, rd->seq, rd->rd_len, merger->min_ol, merger->min_sm);
+			for (i = 1; i < ctg->rds->size; i++) {
+				rd = ref_readv(ctg->rds, i);
+				add_read2ef(merger->ef, rd->seq, rd->seq_id, rd->rd_len, rd->rank);
+			} 
+			for (j = 1; j < ctg->m_rds->size; j++) {
+				ctg2 = get_contigv(merger->ctgs, get_u32list(ctg->m_rds, j));
+				for (i = 0; i < ctg2->rds->size; i++) {
+					rd = ref_readv(ctg2->rds, i);
+					add_read2ef(merger->ef, rd->seq, rd->seq_id, rd->rd_len, rd->rank);
+				}
+			}
+			align_reads_ef(merger->ef);
+			asm_ef_ctgs(merger->ef);
+			output_ef_ctgs(merger->ef, out);
+			
+		}
+		if (!merger->flag && !ctg->closed && ctg->rds->size >= merger->min_read && ctg->rds->size <= merger->max_read) {
+			merger->flag = 1;
+			rd = ref_readv(ctg->rds, 0);
+			merger->ef = init_ef(ctg->id, rd->seq, rd->rd_len, merger->min_ol, merger->min_sm);
+			for (i = 1; i < ctg->rds->size; i++) {
+				rd = ref_readv(ctg->rds, i);
+				add_read2ef(merger->ef, rd->seq, rd->seq_id, rd->rd_len, rd->rank);
+			}
+			for (j = 1; j < ctg->m_rds->size; j++) {
+				ctg2 = get_contigv(merger->ctgs, get_u32list(ctg->m_rds, j));
+				for (i = 0; i < ctg2->rds->size; i++) {
+					rd = ref_readv(ctg2->rds, i);
+					add_read2ef(merger->ef, rd->seq, rd->seq_id, rd->rd_len, rd->rank);
+				}
+			}
+			align_reads_ef(merger->ef);
+			asm_ef_ctgs(merger->ef);
+			output_ef_ctgs(merger->ef, out);
+
+		}
+	}
+}
+
+void print_asm2(merge_t *merger, FILE *out) {
+	uint32_t i, j, k;
+	contig_t *ctg, *ctg2;
+	read_t *rd;
+
+	for (k = 0; k < merger->ctgs->size; k++) {
+		ctg = get_contigv(merger->ctgs, k);
+		if (merger->flag && !ctg->closed && ctg->rds->size >= merger->min_read && ctg->rds->size <= merger->max_read) {  // have used ef
+			for (i = 1; i < ctg->rds->size; i++) {
+				rd = ref_readv(ctg->rds, i);
+				fprintf(out, "%u\t%u\n", merger->cid, rd->seq_id);
+				fflush(out);
+			} 
+			for (j = 1; j < ctg->m_rds->size; j++) {
+				ctg2 = get_contigv(merger->ctgs, get_u32list(ctg->m_rds, j));
+				for (i = 0; i < ctg2->rds->size; i++) {
+					rd = ref_readv(ctg2->rds, i);
+					fprintf(out, "%u\t%u\n", merger->cid, rd->seq_id);
+					fflush(out);
+				}
+			}
+			merger->cid++;
+		}
+		if (!merger->flag && !ctg->closed && ctg->rds->size >= merger->min_read && ctg->rds->size <= merger->max_read) {
+			merger->flag = 1;
+			rd = ref_readv(ctg->rds, 0);
+			for (i = 1; i < ctg->rds->size; i++) {
+				rd = ref_readv(ctg->rds, i);
+				fprintf(out, "%u\t%u\n", merger->cid, rd->seq_id);
+				fflush(out);
+			}
+			for (j = 1; j < ctg->m_rds->size; j++) {
+				ctg2 = get_contigv(merger->ctgs, get_u32list(ctg->m_rds, j));
+				for (i = 0; i < ctg2->rds->size; i++) {
+					rd = ref_readv(ctg2->rds, i);
+					fprintf(out, "%u\t%u\n", merger->cid, rd->seq_id);
+					fflush(out);
+				}
+			}
+			merger->cid++;
+		}
+	}
+}
+
+void index_rds(merge_t *merger, contig_t *ctg) {
+	uint64_t kmask = 0xFFFFFFFFFFFFFFFFLLU >> ((32-merger->RD_KMER_SIZE)*2), pos;
+	read_t *rd; uint32_t i, j, len; int exists;
+	rd_kmer_t K, *t;
+
+	K.kmer = 0;
+	K.kpos = 0;
+	pos = 0;
+	for (i = 1; i < ctg->rds->size; i++) { 
+		rd = ref_readv(ctg->rds, i);
+		len = rd->rd_len;
+		if (len < merger->RD_KMER_SIZE) continue;
+		for (j = 0; j < merger->RD_KMER_SIZE-1; j++)
+			K.kmer = (K.kmer << 2) | base_bit_table[(int)rd->seq[j]];
+		for (j = 0; j <= (unsigned)len-merger->RD_KMER_SIZE; j++) {
+			K.kmer = ((K.kmer << 2) | base_bit_table[(int)rd->seq[j+merger->RD_KMER_SIZE-1]]) & kmask;
+			t = prepare_rdkhash(ctg->index, K, &exists);
+			if (exists) {
+			} else {
+				t->kmer = K.kmer;
+				t->kpos = 1;
+			}
+		}
+		pos += len;
+	}
+}
+
+void update_ctg2merge(merge_t *merger) {
+	uint32_t j;
+	contig_t *ctg;
+
+	for (j = 0; j < merger->ctgs->size; j++) {
+		ctg = get_contigv(merger->ctgs, j);
+		index_rds(merger, ctg);
+		push_idxv(ctg->m_idx, ctg->index);
+	}
+}
+/*
+static void print_leaf(merge_t *merger, pathtree_t *tree, FILE *out) {
+	contig_t *ctg;
+	if (tree->tid) {
+		ctg = get_contigv(merger->ctgs, tree->tid-1);
+		fprintf(out, ">%d\n", ctg->id);
+		fflush(out);
+	} else {
+		print_leaf(merger, tree->left, out);
+		print_leaf(merger, tree->right, out); 
+	}
+}
+
+static void printnode(int c, int h) {
+	int i;
+	for (i = 0; i < h; i++)
+		printf(" ");
+	printf("%d\n", c);
+}
+
+static void show(pathtree_t *tree, int h) {
+	if (tree == NULL) 
+	{ printnode('*', h); return;}
+	show(tree->right, h+1);
+	printnode(tree->tid, h);
+	show(tree->left, h+1);
+}
+*/
+
+void merge_leaves(merge_t *merger, uint32_t id1, uint32_t id2) {
+	char *prefix;
+	uint32_t i; int n, n1, n2;
+	contig_t *c1, *c2, *c;
+	
+	c1 = get_contigv(merger->ctgs, id1);
+	c2 = get_contigv(merger->ctgs, id2);
+	c = id1<id2?c1:c2;
+	
+	if (c == c1) {
+		c2->closed = 1;
+	} else {
+		c1->closed = 1;
+	}
+	n1 = strlen(c1->path->string);
+	n2 = strlen(c2->path->string);
+	n = n1>=n2?n1:n2;
+	prefix = (char*)malloc(sizeof(char)*(n+1));
+	memset(prefix, 0, n+1);
+	prefix_path(c1->path->string, c2->path->string, n, prefix);
+	
+	if (c2->closed) {
+		for (i = 0; i < c2->m_rds->size; i++)
+			push_u32list(c->m_rds, get_u32list(c2->m_rds, i));
+	}
+	else {
+		for (i = 0; i < c1->m_rds->size; i++)
+			push_u32list(c->m_rds, get_u32list(c1->m_rds, i));
+	}
+	clear_string(c->path);
+	append_string(c->path, prefix, n);
+	if (c == c1) {
+		for (i = 0; i < c2->m_idx->size; i++)
+			push_idxv(c->m_idx, get_idxv(c2->m_idx, i));
+	} else {
+		for (i = 0; i < c1->m_idx->size; i++)
+			push_idxv(c->m_idx, get_idxv(c1->m_idx, i));
+	}
+	//	align_reads_ef(merger->ef);
+//	asm_ef_ctgs(merger->ef);
+//	output_ef_ctgs(merger->ef, stderr);
+//	assign_best_ctg(merger, c);
+	free(prefix);
+} 
+
+int is_similar_enough(merge_t *merger, contig_t *c1, contig_t *c2) {
+	uint64_t kmask = 0xFFFFFFFFFFFFFFFFLLU >> ((32-merger->RD_KMER_SIZE)*2);
+	read_t *rd; uint32_t i, j, k, len, m, n, n1, n2, cnt; contig_t *cdb, *cq, *c; int found;
+	rd_kmer_t K, *t;
+	
+	n1 = n2 = 0;
+	for (m = 0; m < c1->m_rds->size; m++) {
+		c = get_contigv(merger->ctgs, get_u32list(c1->m_rds, m));
+		n1 += c->rds->size;
+	}
+	for (m = 0; m < c2->m_rds->size; m++) {
+		c = get_contigv(merger->ctgs, get_u32list(c2->m_rds, m));
+		n2 += c->rds->size;
+	}
+	if (n1 >= n2) {
+		cdb = c1;
+		cq = c2;
+		n = n2-1;
+	} else {
+		cdb = c2;
+		cq = c1;
+		n = n1-1;
+	}
+
+	cnt = 0;
+	for (m = 0; m < cq->m_rds->size; m++) {
+		c = get_contigv(merger->ctgs, get_u32list(cq->m_rds,m));
+		K.kmer = 0;
+		K.kpos = 0;
+		found = 0;
+		if (m > 0)
+			i = 0;
+		else
+			i = 1;
+		for (; i < c->rds->size; i++) { 
+			rd = ref_readv(c->rds, i);
+			len = rd->rd_len;
+			if (len < merger->RD_KMER_SIZE) continue;
+			for (j = 0; j < merger->RD_KMER_SIZE-1; j++)
+				K.kmer = (K.kmer << 2) | base_bit_table[(int)rd->seq[j]];
+			for (j = 0; j <= (unsigned)len-merger->RD_KMER_SIZE-3; j++) {
+				K.kmer = ((K.kmer << 2) | base_bit_table[(int)rd->seq[j+merger->RD_KMER_SIZE-1]]) & kmask;
+				j++;
+				K.kmer = ((K.kmer << 2) | base_bit_table[(int)rd->seq[j+merger->RD_KMER_SIZE-1]]) & kmask;
+				j++;
+				K.kmer = ((K.kmer << 2) | base_bit_table[(int)rd->seq[j+merger->RD_KMER_SIZE-1]]) & kmask;
+				j++;
+				K.kmer = ((K.kmer << 2) | base_bit_table[(int)rd->seq[j+merger->RD_KMER_SIZE-1]]) & kmask;
+				for (k = 0; k < cdb->m_idx->size; k++) {
+					t = get_rdkhash(get_idxv(cdb->m_idx, k), K);
+					if (t) {
+						cnt++;
+						found = 1;
+						break;
+					}
+				}
+				if (found)
+					break;
+			}
+			found = 0;
+		}
+	}
+//	fprintf(stderr, "cnt=%d n=%d div=%f, n1=%d, n2=%d\n", cnt, n, (float)cnt/n, n1, n2);
+	if ((float)cnt/n - merger->het >= 0)
+		return 1;
+
+	return 0;
+}
+
+void prefix_path(char *s1, char *s2, int n, char *pre) {
+	int i;
+	for (i = 0; i < n; i++) {
+		if (s1[i] == s2[i]) {
+			pre[i] = s1[i];
+		} else {
+			break;
+		}
+	}
+	pre[i] = '\0';
+
+	return;
+}
+
+void merge_core(merge_t *merger) {
+	contigv *ctgs = merger->ctgs;
+	contig_t *ctg1, *ctg2; uint32_t i, j;
+	
+	for (i = 0; i < ctgs->size-1; i++) {
+		ctg1 = get_contigv(ctgs, i);
+		if (ctg1->closed) continue;
+		for (j = i+1; j < ctgs->size; j++) {
+			if (ctg1->closed) break;
+			ctg2 = get_contigv(ctgs, j);
+			if (ctg2->closed || ctg2->rds->size <= 5 ) continue;
+			if (is_similar_enough(merger, ctg1, ctg2)) {
+				merge_leaves(merger, i, j);
+				break;
+			}
+		}
+	}
+}
+
+void build_tree(merge_t *merger) {
+	uint32_t i, n;
+	n = count_contigv(merger->ctgs);
+//	reset_iter_ctgset(merger->ctgs);
+	char *path; int len = 0, j;
+	contig_t *ctg;
+	pathtree_t *t;
+	if (merger->tree == NULL) {
+		merger->tree = (pathtree_t *) malloc(sizeof(pathtree_t));
+		merger->tree->left = NULL;
+		merger->tree->right = NULL;
+		merger->tree->tid = 0;
+	}
+	t = merger->tree;
+	for (i = 0; i < n; i++) {
+		ctg = get_contigv(merger->ctgs, i);	
+		path = ctg->path->string;
+		len = strlen(path);
+		for (j = 0; j < len; j++) {
+			if (path[j] == '0') {
+				if (t->left == NULL) {
+					t->left = (pathtree_t *) malloc(sizeof(pathtree_t));
+					t->left->left = NULL;
+					t->left->right = NULL;
+					t->left->tid = 0;
+					if (j == len-1)
+						t->left->tid = i+1;
+				}
+				t = t->left;
+			} else { // '1'
+				if (t->right == NULL) {
+					t->right = (pathtree_t *) malloc(sizeof(pathtree_t));
+					t->right->left = NULL;
+					t->right->right = NULL;
+					t->right->tid = 0;
+					if (j == len-1)
+						t->right->tid = i+1;
+				}
+				t = t->right;
+			}
+		}
+		t = merger->tree;
+	}
+	return;
+}
+
+void destroy_tree(pathtree_t *t) {
+	if (t != NULL) {
+		destroy_tree(t->left);
+		destroy_tree(t->right);
+		free(t);
+		t = NULL;
+	}
+}
+
+void free_tree(merge_t *merger) {
+	destroy_tree(merger->tree);
+	merger->tree = NULL;
+	return;
+}
+
+merge_t* init_merger(uint32_t min_kmer, uint32_t min_overlap, float het, uint32_t kmersize, uint32_t max_cluster, uint32_t need_asm, float min_sm, uint32_t min_read, uint32_t max_read) {
+	merge_t *merger;
+//	uint32_t skmer;
+	merger = (merge_t *)malloc(sizeof(merge_t));
+	merger->ctgs = init_contigv(2);
+	merger->cache = init_contigsv(2);
+//	merger->ctgs = init_ctgset(2);
+	merger->tree = NULL;
+	merger->min_kmer = min_kmer;
+	merger->min_overlap = min_overlap;
+	merger->het = het;
+	merger->RD_KMER_SIZE = kmersize;
+//	merger->min_ol = 5;              
+	merger->min_ol = min_overlap;              
+	merger->min_sm = min_sm;
+	merger->min_read = min_read;
+	merger->max_read = max_read;
+	merger->need_asm = need_asm;
+	merger->max_cluster = max_cluster;
+	merger->sim_pairs = 0;
+	merger->ef = NULL;
+	merger->flag = 0;
+	merger->cid = 0;
+//	skmer = kmersize;
+	return merger;
+}
+
+void free_ctg(contig_t *ctg) {
+	free_string(ctg->path);
+	free_u32list(ctg->m_rds);
+	free_rdkhash(ctg->index);
+	free_idxv(ctg->m_idx);
+	free_readv(ctg->rds);
+}
+
+void clear_ctg(contig_t *ctg) {
+	clear_string(ctg->path);
+	clear_u32list(ctg->m_rds);
+	clear_rdkhash(ctg->index);
+	clear_idxv(ctg->m_idx);
+	if (ctg->rds->size) clear_readv(ctg->rds);
+}
+
+void free_ctgs(merge_t *merger) {
+	contig_t *ctg;
+	uint32_t i;
+
+	for (i = 0; i < merger->cache->size; i++) {
+		ctg = get_contigsv(merger->cache, i);
+		free_ctg(ctg);
+		free(ctg);
+	}
+}
+
+void put_cache_ctgs(merge_t *merger, contig_t *ctg) {
+	clear_ctg(ctg);
+	push_contigsv(merger->cache, ctg);
+}
+
+void reset_merger(merge_t *merger) {
+	uint32_t i;
+	
+	for (i = 0; i < merger->ctgs->size; i++) {
+		put_cache_ctgs(merger, get_contigv(merger->ctgs, i));
+	}
+
+//	for (i = 0; i < count_contigsv(merger->cache); i++) {
+//		return_ctgv_merger(merger, get_contigsv(merger->cache, i));
+//	}
+//	clear_contigsv(merger->cache);
+	clear_contigv(merger->ctgs);
+//	for(i=0;i<vec_size(merger->ef->ctgs);i++){ put_pool_ctg(merger->ef, gget_vec(merger->ef->ctgs, i, FContig*)); }
+//	clear_vec(merger->ef->ctgs);
+	/*
+	for (i = 0; i < vec_size(merger->ef->pool_ctg); i++) {
+		ctg = gget_vec(merger->ef->pool_ctg, i, FContig*);
+		free_vec(ctg->rids);
+		free_string(ctg->seq);
+		free(ctg);
+	}*/
+//	clear_vec(merger->ef->pool_ctg);
+	//clear_ctgkmerv(merger->kmers);
+	
+	//clear_ctgkmerv(merger->aux_kmers);
+}
+
+void free_merger(merge_t *merger) {
+	uint32_t i;
+
+	for (i = 0; i < merger->ctgs->size; i++) {
+		put_cache_ctgs(merger, get_contigv(merger->ctgs, i));
+	}
+
+	free_ctgs(merger);
+	free_contigsv(merger->cache);
+	free_contigv(merger->ctgs);
+//	free_ctgset(merger->ctgs);
+	if (merger->ef)
+		free_ef(merger->ef);
+	
+	free(merger);
+}
+
+void merge_along_tree(merge_t *merger, pathtree_t *tree) {
+	contig_t *c1, *c2;
+	if (tree->tid || (tree->left == NULL && tree->right == NULL))
+		return ;
+	if (tree->left->left == NULL && tree->left->right == NULL && tree->right->left == NULL && tree->right->right == NULL) {
+		c1 = get_contigv(merger->ctgs, tree->left->tid-1);
+		c2 = get_contigv(merger->ctgs, tree->right->tid-1);
+		if (is_similar_enough(merger, c1, c2)) {
+			merger->sim_pairs++;
+			merge_leaves(merger, tree->left->tid-1, tree->right->tid-1);
+			tree->tid = c1->id<c2->id?c1->id:c2->id;
+//			fprintf(stderr, "alongtree %d %d\n", ref_contigv(merger->ctgs, tree->left->tid-1)->id, ref_contigv(merger->ctgs, tree->right->tid-1)->id);
+		}
+
+	}
+	if (tree->left->left && tree->left->right)
+		merge_along_tree(merger, tree->left);
+	if (tree->right->left && tree->right->right)
+		merge_along_tree(merger, tree->right);
+}
+
+void merge_ctgs(merge_t *merger, FileReader *in, FILE *out) {
+	uint32_t lastcid, cid;
+	int n_col;
+	lastcid = 0;
+	while((n_col = fread_table(in)) != -1){
+		if(n_col == 0) continue;
+		cid = atoi(get_col_str(in, 4));
+		if (cid != lastcid) {
+			if (lastcid) { 
+				build_tree(merger);
+				update_ctg2merge(merger);
+				if (merger->ctgs->size <= merger->max_cluster) {
+					do {
+						merger->sim_pairs = 0;
+						merge_along_tree(merger, merger->tree); 
+					} while (merger->sim_pairs);
+					if (merger->ctgs->size>=3){ 
+	//				if (merger->ctgs->size>=4 && merger->ctgs->size<=200){ 
+	//					index_ctgs(merger);
+						merge_core(merger);
+					}
+					if (merger->need_asm)
+						print_asm(merger, out);
+					else
+						print_asm2(merger, out);
+
+				} else {
+					merge_along_tree(merger, merger->tree); 
+					if (merger->need_asm)
+						print_asm(merger, out);
+					else
+						print_asm2(merger, out);
+				}
+				free_tree(merger);
+//				free_ctgs(merger);
+				reset_merger(merger);
+			}
+			lastcid = cid;
+			froll_back(in);
+			prepare_reads(merger, in, lastcid);
+		} else {
+			prepare_reads(merger, in, lastcid);
+		}
+	}
+	if (lastcid) {
+		build_tree(merger);
+		update_ctg2merge(merger);
+		if (merger->ctgs->size <= merger->max_cluster) {
+			do {
+				merger->sim_pairs = 0;
+				merge_along_tree(merger, merger->tree);
+			} while (merger->sim_pairs);
+			if (merger->ctgs->size>=3){ 
+	//		if (merger->ctgs->size>=4 && merger->ctgs->size<=200){ 
+	//			index_ctgs(merger);
+				merge_core(merger);
+			}
+			if (merger->need_asm)
+				print_asm(merger, out);
+			else
+				print_asm2(merger, out);
+		} else {
+			merge_along_tree(merger, merger->tree); 
+			if (merger->need_asm)
+				print_asm(merger, out);
+			else
+				print_asm2(merger, out);
+		}
+		free_tree(merger);
+//		free_ctgs(merger);
+		reset_merger(merger);
+		//free_merger(merger);
+	}
+}
diff --git a/mergectg.h b/mergectg.h
new file mode 100644
index 0000000..fb63596
--- /dev/null
+++ b/mergectg.h
@@ -0,0 +1,139 @@
+#ifndef MERGECTG_H
+#define MERGECTG_H
+
+#include <stdint.h>
+#include "list.h"
+#include "file_reader.h"
+#include "hashset.h"
+#include "string.h"
+#include "stdaln.h"
+#include "asm_R2.h"
+#include "bloom_filter.h"
+
+typedef struct {
+	char seq[MAX_RD_LEN+1];
+	uint32_t seq_id;
+	uint32_t rd_len;
+	uint32_t rank;
+} read_t;
+
+define_list(readv, read_t);
+
+typedef struct {
+	uint64_t kmer:62, kpos:2;
+} rd_kmer_t;
+
+#define rd_kmer_code(r) u32hashcode((r).kmer)
+#define rd_kmer_eq(r1, r2) ((r1).kmer == (r2).kmer)
+define_hashset(rdkhash, rd_kmer_t, rd_kmer_code, rd_kmer_eq);
+define_list(idxv, rdkhash*);
+//define_list(idxv, BloomFilter*);
+
+typedef struct {
+//	uint32_t id, clsid, old_clsid, sz;
+	uint32_t id;
+	int closed;
+//	char *seq, *sec_seq;
+	String *path;
+	readv *rds;
+	u32list *m_rds;  // merged reads index
+	rdkhash *index;
+//	BloomFilter *index;
+	idxv *m_idx;  // merged multiple index
+//	Vector *efctgs;
+} contig_t; 
+
+#define contig_code(c) u32hashcode((c).id)
+#define contig_eq(c1, c2) ((c1).id == (c2).id)
+define_hashset(ctgset, contig_t, contig_code, contig_eq);
+
+typedef struct {
+	uint32_t id;
+	char *seq;
+} contig_seq_t;
+
+define_list(contigv, contig_t*);
+define_list(contigsv, contig_t*);
+
+typedef struct pathtree_t pathtree_t;
+struct pathtree_t {
+	uint32_t tid; // leaf records contig ID
+	pathtree_t *left;
+	pathtree_t *right;
+};
+
+typedef struct {
+	uint64_t kmer, kpos;
+	uint32_t id; // which contig
+	int offset;  // offset w.r.t. the current contig
+	int offset2; // offset of query contig
+} ctg_kmer_t;
+
+#define kmer_code(k) u64hashcode((k).kmer)
+#define kmer_eq(k1, k2) ((k1).kmer == (k2).kmer)
+define_hashset(ctgkhash, ctg_kmer_t, kmer_code, kmer_eq);
+
+typedef struct {uint32_t key; uint32_t oldid; char *path;} uuchash_t;
+#define uuchash_code(e) (e).key
+#define uuchash_equals(e1, e2) ((e1).key == (e2).key)
+define_hashset(uuchash, uuchash_t, uuchash_code, uuchash_equals);
+
+define_list(ctgkmerv, ctg_kmer_t);
+
+typedef struct {
+	uint64_t last; //last kmer position
+	int offset; // current kmer offset
+} link_t;
+
+define_search_array(bisearch, uint64_t, native_number_cmp);
+
+typedef struct {
+	contigv *ctgs;
+	contigsv *cache;
+	pathtree_t *tree;
+	uint32_t min_kmer; // parameter: # kmers to define two similar contigs
+	uint32_t min_overlap; // parameter
+	float het; // parameter
+	uint32_t RD_KMER_SIZE; // parameter
+	uint32_t min_ol; //parameter for asm
+	float min_sm; // parameter for asm
+	uint32_t min_read; // parameter for asm
+	uint32_t max_read; // parameter for asm
+	uint32_t sim_pairs;
+	uint32_t max_cluster; //parameter
+	uint32_t need_asm; // parameter
+	uint32_t cid; //
+	EF *ef;
+	int flag;  // if == 0 first use, init; else reset
+} merge_t;
+
+
+#ifdef __CPLUSPLUS
+extern "C" {
+#endif
+
+merge_t* init_merger(uint32_t min_kmer, uint32_t min_overlap, float het, uint32_t kmersize, uint32_t max_cluster, uint32_t need_asm, float min_sm, uint32_t min_read, uint32_t max_read);
+//void merge_ctgs(merge_t *merger, FileReader *asmd, FileReader *divd, FILE *out);
+void merge_ctgs(merge_t *merger, FileReader *in, FILE *out);
+void merge_along_tree(merge_t *merger, pathtree_t *tree);
+void merge_core(merge_t *merger);
+void free_index(merge_t *merger);
+void free_ctg(contig_t *ctg);
+void free_ctgs(merge_t *merger);
+void build_tree(merge_t *merger);
+void update_ctg2merge(merge_t *merger);
+int is_similar_enough(merge_t *merger, contig_t *c1, contig_t *c2);
+void merge_2ctg(merge_t *merger, contig_t *ctg1, contig_t *ctg2);
+void update_merger(merge_t *merger, contig_t *ctg1, contig_t *ctg2);
+void prefix_path(char *s1, char *s2, int n, char *pre);
+void destroy_tree(pathtree_t *t);
+void free_tree(merge_t *merger);
+void reset_merger(merge_t *merger);
+void free_merger(merge_t *merger);
+void clear_ctg(contig_t *ctg);
+
+#ifdef __CPLUSPLUS
+}
+#endif
+
+#endif
diff --git a/mergetag.c b/mergetag.c
new file mode 100644
index 0000000..3dd0494
--- /dev/null
+++ b/mergetag.c
@@ -0,0 +1,203 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#include "file_reader.h"
+#include "dna.h"
+#include "list.h"
+#include "sort.h"
+#include <unistd.h>
+
+typedef struct {
+	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;
+} REC;
+
+define_list(recv, REC);
+
+typedef struct {
+	uint32_t gid, off, len;
+	uint32_t cns_off, cns_len;
+} Block;
+
+define_list(blockv, Block);
+
+void consensus(recv *divs, String *seqs, uint32_t beg, uint32_t end, uint32_t *cns_off, uint32_t *cns_len){
+	REC *r;
+	uint32_t i, j, len, acgtn[5], ref;
+	len = 0;
+	for(i=beg;i<end;i++){
+		r = ref_recv(divs, i);
+		if(r->len1 > len) len = r->len1;
+	}
+	*cns_off = seqs->size;
+	*cns_len = len;
+	for(i=0;i<len;i++){
+		acgtn[0] = 0;
+		acgtn[1] = 0;
+		acgtn[2] = 0;
+		acgtn[3] = 0;
+		acgtn[4] = 0;
+		for(j=beg;j<end;j++){
+			r = ref_recv(divs, j);
+			if(r->len1 <= i) continue;
+			acgtn[base_bit_table[(int)seqs->string[r->off1 + i]]] ++;
+		}
+		ref = 0;
+		for(j=1;j<4;j++){
+			if(acgtn[j] > acgtn[ref]) ref = j;
+		}
+		add_char_string(seqs, bit_base_table[ref]);
+	}
+	add_char_string(seqs, '\0');
+}
+
+uint32_t cal_mm(String *seqs, Block *b1, Block *b2){
+	uint32_t mm, i, len;
+	mm = 0;
+	len = b1->cns_len;
+	if(len > b2->cns_len) len = b2->cns_len;
+	for(i=0;i<len;i++){
+		if(seqs->string[b1->cns_off + i] != seqs->string[b2->cns_off + i]) mm ++;
+	}
+	if(len < b1->cns_len) mm += b1->cns_len - len;
+	else if(len < b2->cns_len) mm += b2->cns_len - len;
+	return mm;
+}
+
+void merge_core(recv *divs, String *seqs, uint32_t max_mm, int task, blockv *blocks, FILE *out){
+	Block *b;
+	REC *r;
+	uint32_t i, j, gid, beg, mm;
+	beg = 0;
+	gid = 0;
+	clear_blockv(blocks);
+	for(i=0;;i++){
+		if(i < count_recv(divs) && ref_recv(divs, i)->gid == gid) continue;
+		if(i > beg){
+			b = next_ref_blockv(blocks);
+			b->gid = gid;
+			b->off = beg;
+			b->len = i - beg;
+			consensus(divs, seqs, beg, i, &b->cns_off, &b->cns_len);
+			if(task == 1){
+				fprintf(out, "%u\t%s\n", gid, seqs->string + b->cns_off);
+			}
+		}
+		if(i == count_recv(divs)) break;
+		beg = i;
+		gid = ref_recv(divs, i)->gid;
+	}
+	if(task == 1) return;
+	for(i=0;i+1<count_blockv(blocks);i++){
+		for(j=i+1;j<count_blockv(blocks);j++){
+			mm = cal_mm(seqs, ref_blockv(blocks, i), ref_blockv(blocks, j));
+			if(mm <= max_mm) ref_blockv(blocks, j)->gid = ref_blockv(blocks, i)->gid;
+		}
+	}
+	sort_array(blocks->buffer, blocks->size, Block, ((a.gid == b.gid)? 0 : ((a.gid < b.gid)? -1 : 1)));
+	for(i=0;i<blocks->size;i++){
+		b = ref_blockv(blocks, i);
+		for(j=0;j<b->len;j++){
+			r = ref_recv(divs, b->off + j);
+			fprintf(out, "%u\t%u\t%s\t%s\t%u\n", r->rid, b->gid, seqs->string + r->off1, seqs->string + r->off2, r->cid);
+		}
+	}
+}
+
+int usage(){
+	printf(
+			"Usage: rbmergetag [options]\n"
+			"Options:\n"
+			" -i <string>    Input file name [stdin]\n"
+			" -o <string>    Output file name [stdout]\n"
+			" -j <cns|merge> Job type, cns: consensus, merge: merging, [merge]\n"
+			" -m <int>       Maximum mismatches to merge two groups [1]\n"
+			" -h             Show this document\n"
+		  );
+	return 1;
+}
+
+int main(int argc, char **argv){
+	FileReader *fr;
+	recv *divs;
+	REC *r;
+	blockv *blocks;
+	FILE *out;
+	String *seqs;
+	char *inf, *ouf;
+	uint32_t cid, max_mm;
+	int n, c, task;
+	max_mm = 1;
+	task = 2;
+	inf = NULL;
+	ouf = NULL;
+	while((c = getopt(argc, argv, "hi:o:j:m:")) != -1){
+		switch(c){
+			case 'i': inf = optarg; break;
+			case 'o': ouf = optarg; break;
+			case 'j': task = (strcasecmp(optarg, "cns") == 0)? 1 : 2; break;
+			case 'm': max_mm = atoi(optarg); break;
+			default: return usage();
+		}
+	}
+	if(inf == NULL){ fr = stdin_filereader(); }
+	else if((fr = fopen_filereader(inf)) == NULL){
+		fprintf(stderr, "Cannot read '%s'\n", inf);
+		return 1;
+	}
+	if(ouf == NULL){ out = stdout; }
+	else if((out = fopen(ouf, "w")) == NULL){
+		fprintf(stderr, "Cannot write'%s'\n", ouf);
+		return 1;
+	}
+	divs = init_recv(1024);
+	seqs = init_string(1024);
+	blocks = init_blockv(12);
+	cid = 0;
+	while(1){
+		n = fread_table(fr);
+		if(n == -1 || (uint32_t)atoll(get_col_str(fr, 2)) != cid){
+			if(count_recv(divs)){ merge_core(divs, seqs, max_mm, task, blocks, out); }
+			clear_string(seqs);
+			clear_recv(divs);
+			if(n == -1) break;
+			cid = atoll(get_col_str(fr, 2));
+		}
+		{
+			r = next_ref_recv(divs);
+			r->rid  = atoll(get_col_str(fr, 0));
+			r->rank = 1;
+			r->cid  = atoll(get_col_str(fr, 1));
+			r->gid  = atoll(get_col_str(fr, 2));
+			r->off1 = seqs->size;
+			r->len1 = get_col_len(fr, 3);
+			append_string(seqs, get_col_str(fr, 3), get_col_len(fr, 3));
+			add_char_string(seqs, '\0');
+			r->off2 = seqs->size;
+			r->len2 = get_col_len(fr, 4);
+			append_string(seqs, get_col_str(fr, 4), get_col_len(fr, 4));
+			add_char_string(seqs, '\0');
+		}
+	}
+	free_recv(divs);
+	free_string(seqs);
+	free_blockv(blocks);
+	fclose_filereader(fr);
+	if(ouf) fclose(out);
+	return 0;
+}
diff --git a/rainbow.h b/rainbow.h
new file mode 100644
index 0000000..403af99
--- /dev/null
+++ b/rainbow.h
@@ -0,0 +1,127 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __RAINBOW_RJ_H
+#define __RAINBOW_RJ_H
+
+#include <stdint.h>
+#include <time.h>
+#include <unistd.h>
+#include "bitvec.h"
+#include "hashset.h"
+#include "list.h"
+#include "sort.h"
+#include "dna.h"
+#include "file_reader.h"
+#include "string.h"
+//#include "mergecontig.h"
+#include "mergectg.h"
+
+
+//static uint32_t KMER_SIZE = 15;
+//static uint32_t KMER_NUM = 6;
+//#define KMER_NUM	6
+
+typedef struct {
+	uint32_t kmer1, kmer2, seqid;
+} kmer_t;
+
+#define kmer_hashcode(k) u64hashcode((((uint64_t)(k).kmer1) << 32) | (k).kmer2)
+#define kmer_equals(k1, k2) (((k1).kmer1 == (k2).kmer1) && ((k1).kmer2 == (k2).kmer2))
+define_hashset(khash, kmer_t, kmer_hashcode, kmer_equals);
+
+typedef struct {
+	uint64_t *seqs;
+	uint32_t n_rd;
+	uint8_t  rd_len, max_rd_len;
+	u64list *seqoffs;
+	u8list  *seqlens;
+} SeqDB;
+
+typedef struct {
+	uint32_t bt;
+	uint32_t len;
+	uint64_t seq[8];
+} SBT;
+
+define_list(sbtv, SBT);
+
+typedef struct {
+	SeqDB    *sdb, *sdb2;
+	uint64_t seq1[10], seq2[10];
+	uint32_t gidoff;
+	uint32_t max_seqid;
+	uint32_t max_pair_len;
+	uint32_t max_mm;
+	uint32_t exact_limit;
+	uint32_t idxs[2];
+	uint32_t KMER_SIZE;
+	uint32_t KMER_NUM;
+	khash *index;
+	u32list *links;
+	BitVec  *flags;
+	//uuhash *gid_map;
+	u32list *gid_map;
+	u32list *gids;
+	u32list *bts;
+	sbtv    *sbts;
+} Cluster;
+
+Cluster* init_cluster(uint32_t max_mm, uint32_t exact_limit, uint32_t KMER_SIZE, uint32_t KMER_NUM);
+void indexing_cluster(Cluster *cluster, FileReader *fr1, int is_fq, int fix_rd_len);
+void clustering(Cluster *cluster, FileReader *fr2, int is_fq, int fix_rd_len, FILE *out);
+void free_cluster(Cluster *cluster);
+
+typedef struct {
+	uint32_t seqid, seqoff, seqlen1:10, seqlen2:10, rank:6, revsed:6;
+} ReadInfo;
+
+define_list(rilist, ReadInfo);
+
+define_list(u32slist, u32list*);
+
+typedef struct {
+	uint32_t col, cnt, base;
+} col_base_t;
+
+define_list(cbv, col_base_t);
+
+typedef struct {
+	uint32_t gidoff;
+	rilist *rds;
+	u8list *seqs;
+	u32slist *grps, *cache;
+	u64list *markers[4];
+	u32list *deps;
+	u32list *gids;
+	cbv *cbs;
+	u32list *ps1;
+	u32list *ps2;
+	uint32_t n_col;
+	uint32_t k_allele, K_allele;
+	float min_freq;
+} Div;
+
+Div* init_div(uint32_t k_allele, uint32_t K_allele, float min_freq);
+uint32_t div_reads(Div *div, FileReader *fr, FILE *out);
+void reset_div(Div *div);
+void free_div(Div *div);
+
+
+#endif
diff --git a/rbasm_main.c b/rbasm_main.c
new file mode 100644
index 0000000..ac308aa
--- /dev/null
+++ b/rbasm_main.c
@@ -0,0 +1,41 @@
+#include "asm_R2.h"
+
+int main(int argc, char **argv){
+	FileReader *in;
+	FILE *out;
+	uint32_t min_ol, min_read, max_read;
+	float min_sm;
+	char *infile, *outfile;
+	int c;
+	infile = NULL;
+	outfile = NULL;
+	min_ol = 5;
+	min_sm = 0.9;
+	min_read = 5;
+	max_read = 200;
+	while((c = getopt(argc, argv, "hi:o:r:R:l:s:")) != -1){
+		switch(c){
+			case 'i': infile = optarg; break;
+			case 'o': outfile = optarg; break;
+			case 'l': min_ol = atoi(optarg); break;
+			case 's': min_sm = atof(optarg); break;
+			case 'r': min_read = atoi(optarg); break;
+			case 'R': max_read = atoi(optarg); break;
+			case 'h': return ef_usage();
+		}
+	}
+	if(infile == NULL) in = stdin_filereader();
+	else if((in = fopen_filereader(infile)) == NULL){
+		fprintf(stderr, " -- Cannot open %s in %s -- %s:%d --\n", infile, __FUNCTION__, __FILE__, __LINE__);
+		abort();
+	}
+	if(outfile == NULL) out = stdout;
+	else if((out = fopen(outfile, "w")) == NULL){
+		fprintf(stderr, " -- Cannot write %s in %s -- %s:%d --\n", outfile, __FUNCTION__, __FILE__, __LINE__);
+		abort();
+	}
+	asm_ef(in, out, min_ol, min_sm, min_read, max_read);
+	fclose_filereader(in);
+	if(outfile) fclose(out);
+	return 0;
+}
diff --git a/select_all_rbcontig.pl b/select_all_rbcontig.pl
new file mode 100755
index 0000000..437b65d
--- /dev/null
+++ b/select_all_rbcontig.pl
@@ -0,0 +1,28 @@
+#!/usr/bin/perl -w
+#
+
+use strict;
+use warnings;
+
+my $file = shift or die "Usage: $0 <rbasmed file>\n";
+
+my $len = 0;
+my $name = "";
+my $seq = "";
+open IN, $file or die $!;
+while (<IN>) {
+	if (/^E/) {
+		my @e = split /\s+/, $_;
+		$name = $e[0].$e[1];
+		$len = 0;
+		$seq = "";
+	} elsif (/^S/) {
+		my @e = split /\s+/, $_;
+		$seq = $e[1];
+		$len = length $e[1];
+		print ">$name"."_L"."$len\n";
+		print $seq, "\n";
+	}
+}
+close IN;
+
diff --git a/select_best_rbcontig.pl b/select_best_rbcontig.pl
new file mode 100755
index 0000000..328d95d
--- /dev/null
+++ b/select_best_rbcontig.pl
@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+#
+
+use strict;
+use warnings;
+
+my $file = shift or die "Usage: $0 <rbasmed file>\n";
+
+my $len = 0;
+my $name = "";
+my $seq = "";
+open IN, $file or die $!;
+while (<IN>) {
+	if (/^E/) {
+		my @e = split /\s+/, $_;
+		if ($len != 0) {
+			print ">$name"."_L"."$len\n";
+			print $seq, "\n";
+		}
+		$name = $e[0].$e[1];
+		$len = 0;
+		$seq = "";
+	} elsif (/^S/) {
+		my @e = split /\s+/, $_;
+		if ($len < length($e[1])){
+			$seq = $e[1];
+			$len = length $e[1];
+		}
+	}
+}
+close IN;
+
+print ">$name"."_L"."$len\n";
+print $seq, "\n";
diff --git a/select_best_rbcontig_plus_read1.pl b/select_best_rbcontig_plus_read1.pl
new file mode 100755
index 0000000..cbeee28
--- /dev/null
+++ b/select_best_rbcontig_plus_read1.pl
@@ -0,0 +1,88 @@
+#!/usr/bin/perl -w
+#
+
+use strict;
+use warnings;
+
+my $file = shift or die "Usage: $0 <rbasmed file> <rbdiv output file>\n";
+my $div = shift or die "Usage: $0 <rbasmed file> <rbdiv output file>\n";
+
+my $len = 0;
+my $name = "";
+my $seq = "";
+my $reads = "";
+
+open DIV, $div or die $!;
+open IN, $file or die $!;
+my %cls = ();
+$_ = <DIV>;
+my $dseq = $_;
+my @de = split /\s+/, $dseq;
+while (<IN>) {
+	if (/^E/) {
+		my @e = split /\s+/, $_;
+		if ($len != 0) {
+			if (&isoverlap($reads)) {
+				print ">$name"."_L"."$len\n";
+				print $seq, "\n"
+			} else {
+				print ">$name"."_L"."$len\n";
+				print &gen_mock_ctg($seq, $de[2]), "\n";
+			}
+			while (<DIV>) {
+				my @e2 = split /\s+/, $_;
+				if ($e2[1] eq $e[1]) { # thanks to Jonathan Puritz for pointing this
+					$dseq = $_;
+					@de = @e2;
+					last;
+				}
+			}
+		}
+		$name = $e[0].$e[1];
+		$len = 0;
+		$seq = "";
+		$reads = "";
+	} elsif (/^S/) {
+		my @e = split /\s+/, $_;
+		if ($len < length($e[1])) {
+			$seq = $e[1];
+			$len = length $e[1];
+			<IN>;
+			$_ = <IN>;
+			$reads = $_;
+		}
+	} 
+}
+close IN;
+close DIV;
+if (&isoverlap($reads)) {
+	print ">$name"."_L"."$len\n";
+	print $seq, "\n"
+} else {
+	print ">$name"."_L"."$len\n";
+	print &gen_mock_ctg($seq, $de[2]), "\n";
+}
+
+1;
+
+sub isoverlap {
+	my $reads = shift;
+	my @ids = split /\s+/, $reads;
+	my $ret = 0;
+	foreach my $id (@ids) {
+		next if $id =~ /R/;
+		my @rec = split /:/, $id;
+		if ($rec[2] == 0) {
+			$ret = 1;
+			return $ret;
+		}
+	}
+	return $ret;
+}
+
+sub gen_mock_ctg {
+	my @seqs = @_;
+	$seqs[1] =~ tr/ACGTacgt/TGCAtgca/;
+	$seqs[1] = reverse $seqs[1];
+	return $seqs[0].("N"x10).$seqs[1];
+}
diff --git a/select_sec_rbcontig.pl b/select_sec_rbcontig.pl
new file mode 100755
index 0000000..1e61803
--- /dev/null
+++ b/select_sec_rbcontig.pl
@@ -0,0 +1,49 @@
+#!/usr/bin/perl -w
+#
+
+use strict;
+use warnings;
+
+my $file = shift or die "Usage: $0 <rbasmed file>\n";
+
+my $len = 0;
+my $name = "";
+my $seq = "";
+my $seclen = 0;
+my $secseq = "";
+open IN, $file or die $!;
+while (<IN>) {
+	if (/^E/) {
+		my @e = split /\s+/, $_;
+		if ($len) {
+			print ">$name"."_L"."$len\n";
+			print $seq, "\n";
+		}
+		if ($seclen) {
+			print ">$name"."_L"."$seclen\n";
+			print $secseq, "\n";
+		}
+		$name = $e[0].$e[1];
+		$len = $seclen = 0;
+		$seq = $secseq = "";
+	} elsif (/^S/) {
+		my @e = split /\s+/, $_;
+		if ($len < length($e[1])){
+			$secseq = $seq;
+			$seq = $e[1];
+			$seclen = length $secseq;
+			$len = length $e[1];
+		} elsif (length($e[1]) > $seclen) {
+			$secseq = $e[1];
+			$seclen = length $secseq;
+		}
+	}
+}
+close IN;
+
+print ">$name"."_L"."$len\n";
+print $seq, "\n";
+if ($seclen) {
+	print ">$name"."_L"."$seclen\n";
+	print $secseq, "\n";
+}
diff --git a/simp_asm.h b/simp_asm.h
new file mode 100644
index 0000000..e7a78b5
--- /dev/null
+++ b/simp_asm.h
@@ -0,0 +1,287 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __SIMPLE_ASM_RJ_H
+#define __SIMPLE_ASM_RJ_H
+
+#include "string.h"
+#include "sr_aln.h"
+#include "list.h"
+#include "dna.h"
+#include "stdaln.h"
+
+typedef struct {
+	uint32_t seqid, len;
+	uint32_t ctg_id, ctg_dir:1, ctg_off:20, used:1, rank:10;
+} SimpSeqInfo;
+
+define_list(seqv, SimpSeqInfo);
+
+typedef struct {
+	uint32_t len:31, closed:1;
+	String   *seq;
+	u32list  *sids;
+} SimpContigInfo;
+
+define_list(ctgv, SimpContigInfo);
+
+typedef struct {
+	SR_SeqDB *sra;
+	u32list  *rids;
+	seqv     *rds;
+	ctgv     *ctgs;
+	u64hash  *r2r;
+	sr_hitv  *ols;
+	uint32_t iter_idx;
+	AlnParam ap;
+	int      aln_ext_size;
+} SimpAssembler;
+
+static inline SimpAssembler* init_simpasm(uint32_t n_cpu, uint32_t kmer_size, uint32_t rd_len, uint32_t strand, uint32_t min_ol, float min_sm, uint32_t max_mm, int allow_gap){
+	SimpAssembler *sa;
+	sa = malloc(sizeof(SimpAssembler));
+	sa->sra  = sr_init_sdb(NULL, n_cpu, kmer_size, rd_len, strand, min_ol, min_sm, max_mm, allow_gap);
+	sa->rds  = init_seqv(16);
+	sa->ctgs = init_ctgv(16);
+	sa->r2r  = init_u64hash(13);
+	sa->ols  = init_sr_hitv(64);
+	sa->ap   = (AlnParam){10, 2, 0, aln_sm_nt, 16, 75}; //change 2 to 0 at para pos 3
+	sa->aln_ext_size = 10;
+	return sa;
+}
+
+static inline void free_simpasm(SimpAssembler *sa){
+	SimpContigInfo *ctg;
+	uint32_t i;
+	sr_free_sdb(sa->sra);
+	free_seqv(sa->rds);
+	for(i=0;i<count_ctgv(sa->ctgs);i++){
+		ctg = ref_ctgv(sa->ctgs, i);
+		free_string(ctg->seq);
+		free_u32list(ctg->sids);
+	}
+	free_ctgv(sa->ctgs);
+	free_u64hash(sa->r2r);
+	free_sr_hitv(sa->ols);
+	free(sa);
+}
+
+static inline void reset_simpasm(SimpAssembler *sa){
+	SimpContigInfo *ctg;
+	uint32_t i;
+	clear_seqv(sa->rds);
+	for(i=0;i<count_ctgv(sa->ctgs);i++){
+		ctg = ref_ctgv(sa->ctgs, i);
+		free_string(ctg->seq);
+		free_u32list(ctg->sids);
+	}
+	clear_ctgv(sa->ctgs);
+	sr_reset_sdb(sa->sra);
+}
+
+static inline void push_simpasm(SimpAssembler *sa, uint32_t seqid, char *seq, uint32_t seqlen, uint8_t rank){
+	SimpSeqInfo *rd;
+	SimpContigInfo *ctg;
+	rd = next_ref_seqv(sa->rds);
+	rd->seqid   = seqid;
+	rd->len     = seqlen;
+	rd->ctg_id  = count_ctgv(sa->ctgs);
+	rd->ctg_dir = 0;
+	rd->ctg_off = 0;
+	rd->used    = 0;
+	rd->rank    = rank;
+	ctg = next_ref_ctgv(sa->ctgs);
+	ctg->seq    = init_string(seqlen);
+	ctg->sids   = init_u32list(2);
+	ctg->len    = seqlen;
+	ctg->closed = 0;
+	append_string(ctg->seq, seq, seqlen);
+	uc_string(ctg->seq);
+	push_u32list(ctg->sids, count_seqv(sa->rds) - 1);
+	sr_push_sdb(sa->sra, seq, seqlen, 1); // visuable = 1, this seq will be visuable in alignment
+}
+
+static inline int cmp_sr_alnhit(const void *e1, const void *e2){
+	SR_AlnHit *h1, *h2;
+	h1 = (SR_AlnHit*)e1;
+	h2 = (SR_AlnHit*)e2;
+	if(h1->n_ol == h2->n_ol){
+		if(h1->n_mm == h2->n_mm) return 0;
+		else if(h1->n_mm < h2->n_mm) return 1;
+		else return -1;
+	} else if(h1->n_ol < h2->n_ol) return 1;
+	else return -1;
+}
+
+static inline void simple_reverse_contig(SimpAssembler *sa, uint32_t ctg_id){
+	SimpContigInfo *ctg;
+	SimpSeqInfo *rd;
+	uint32_t i;
+	ctg = ref_ctgv(sa->ctgs, ctg_id);
+	if(ctg->closed) return;
+	reverse_dna(ctg->seq->string, ctg->len);
+	for(i=0;i<count_u32list(ctg->sids);i++){
+		rd = ref_seqv(sa->rds, get_u32list(ctg->sids, i));
+		rd->ctg_dir = ! rd->ctg_dir;
+		rd->ctg_off = ctg->len - (rd->ctg_off + rd->len);
+	}
+}
+
+static inline void simple_move_rids(SimpAssembler *sa, uint32_t dst, uint32_t src, int off){
+	SimpContigInfo *ctg1, *ctg2;
+	SimpSeqInfo *rd;
+	int i;
+	ctg1 = ref_ctgv(sa->ctgs, dst);
+	ctg2 = ref_ctgv(sa->ctgs, src);
+	for(i=0;i<(int)count_u32list(ctg2->sids);i++){
+		rd = ref_seqv(sa->rds, get_u32list(ctg2->sids, i));
+		rd->ctg_id = dst;
+		rd->ctg_off += off;
+		push_u32list(ctg1->sids, get_u32list(ctg2->sids, i));
+	}
+	if(off < 0){
+		for(i=0;i<(int)count_u32list(ctg1->sids);i++){
+			rd = ref_seqv(sa->rds, get_u32list(ctg1->sids, i));
+			rd->ctg_off -= off;
+		}
+	}
+}
+
+static inline int simple_join_contigs(SimpAssembler *sa, SR_AlnHit *hit){
+	SimpContigInfo *ctg1, *ctg2;
+	SimpSeqInfo *rd1, *rd2;
+	AlnAln *aa;
+	int i, dir1, dir2, off1, off2, off3, off4, mm, mn, ret;
+	ret = 0;
+	rd1 = ref_seqv(sa->rds, hit->rid1);
+	rd2 = ref_seqv(sa->rds, hit->rid2);
+	dir1 = rd1->ctg_dir ^ hit->dir1;
+	dir2 = rd1->ctg_dir ^ hit->dir2;
+	if(dir1 == dir2) dir1 = dir2 = 0;
+	if(dir1) simple_reverse_contig(sa, rd1->ctg_id);
+	if(dir2) simple_reverse_contig(sa, rd2->ctg_id);
+	ctg1 = ref_ctgv(sa->ctgs, rd1->ctg_id);
+	ctg2 = ref_ctgv(sa->ctgs, rd2->ctg_id);
+	dir1 = rd1->ctg_dir;
+	dir2 = rd2->ctg_dir;
+	off1 = rd1->ctg_off - sa->aln_ext_size + (int)hit->off;
+	off2 = rd2->ctg_off - sa->aln_ext_size;
+	if(off1 < 0) off1 = 0;
+	if(off2 < 0) off2 = 0;
+	if(off1 < off2){ off2 -= off1; off1 = 0; }
+	else {off1 -= off2; off2 = 0; }
+	aa = aln_stdaln_aux(ctg1->seq->string + off1, ctg2->seq->string + off2, &sa->ap, 1, 1, ctg1->len - off1, ctg2->len - off2);
+	mn = mm = 0;
+	off3 = off4 = -1;
+	for(i=aa->path_len-1;i>=0;i--){
+		if(aa->path[i].ctype == FROM_M){
+			if(off3 == -1) off3 = aa->path_len - 1 - i;
+			if(off4 == -1) off4 = aa->path_len - 1 - i;
+			mn ++;
+			if(ctg1->seq->string[aa->path[i].i-1+off1] != ctg2->seq->string[aa->path[i].j-1+off2]) mm ++;
+		} else if(aa->path[i].ctype == FROM_D){
+			if(off3 == -1) off3 = aa->path_len - 1 - i;
+		} else {
+			if(off4 == -1) off4 = aa->path_len - 1 - i;
+		}
+	}
+	//fprintf(stdout, "\n\nHIT %d %d %d, %d %d %d + %d\n", hit->rid1, rd1->ctg_dir, rd1->ctg_off, hit->rid2, rd2->ctg_dir, rd2->ctg_off, hit->off);
+	//sr_output_hit(sa->sra, hit, stdout, 1);
+	//fprintf(stdout, "CTG %s\nCTG %s\n", ctg1->seq->string, ctg2->seq->string);
+	//fprintf(stdout, "%s\n%s\n", ctg1->seq->string + off1, ctg2->seq->string + off2);
+	//fprintf(stdout, "%d %d, %d %d\n", off1, off3, off2, off4);
+	//fprintf(stdout, "%d/%d\n", mm, mn);
+	//fprintf(stdout, "%s\n%s\n%s\n\n", aa->out1, aa->outm, aa->out2);
+	//fflush(stdout);
+	if(mn >= (int)sa->sra->min_overlap && mm <= mn * (1 - sa->sra->min_similarity)){
+		off3 = off3 - off1;
+		off4 = off4 - off2;
+		simple_move_rids(sa, rd1->ctg_id, rd2->ctg_id, off4 - off3);
+		if(off1){
+			trunc_string(ctg1->seq, off1);
+		} else {
+			clear_string(ctg1->seq);
+			if(off2) append_string(ctg1->seq, ctg2->seq->string, off2);
+		}
+		for(i=0;i<aa->path_len;i++){
+			add_char_string(ctg1->seq, (aa->out1[i] == '-')? aa->out2[i] : aa->out1[i]);
+		}
+		ctg1->len = ctg1->seq->size;
+		ctg2->closed = 1;
+		//fprintf(stdout, "## %s\n", ctg1->seq->string);
+		//fflush(stdout);
+		ret = 1;
+	} else {
+		//fprintf(stdout, "EE\n");
+		//fflush(stdout);
+	}
+	aln_free_AlnAln(aa);
+	return ret;
+}
+
+static inline void simple_assemble(SimpAssembler *sa){
+	SimpSeqInfo *rd1, *rd2;
+	SR_AlnHit *hit;
+	uint64_t key, *k;
+	uint32_t i, rank_type, rtype;
+	int exists;
+	sr_ready_sdb(sa->sra);
+	sr_aln_sdb(sa->sra);
+	//for(i=0;i<count_sr_hitv(sa->sra->hits);i++){
+		//hit = ref_sr_hitv(sa->sra->hits, i);
+		//sr_output_hit(sa->sra, hit, stdout, 1);
+	//}
+	qsort(as_array_sr_hitv(sa->sra->hits), count_sr_hitv(sa->sra->hits), sizeof(SR_AlnHit), cmp_sr_alnhit);
+	clear_u64hash(sa->r2r);
+	clear_sr_hitv(sa->ols);
+	for(i=0;i<count_sr_hitv(sa->sra->hits);i++){
+		hit = ref_sr_hitv(sa->sra->hits, i);
+		key = (hit->rid1 < hit->rid2)? ((((uint64_t)hit->rid1) << 32) | hit->rid2) : ((((uint64_t)hit->rid2) << 32) | hit->rid1);
+		k = prepare_u64hash(sa->r2r, key, &exists);
+		if(exists) continue;
+		*k = key;
+		push_sr_hitv(sa->ols, *hit);
+	}
+	for(rank_type=0;rank_type<3;rank_type++){
+		for(i=0;i<count_sr_hitv(sa->ols);i++){
+			hit = ref_sr_hitv(sa->ols, i);
+			rd1 = ref_seqv(sa->rds, hit->rid1);
+			rd2 = ref_seqv(sa->rds, hit->rid2);
+			if(rd1->ctg_id == rd2->ctg_id) continue;
+			if(rd1->rank == rd2->rank) rtype = 0;
+			else if(rd1->rank + 1 == rd2->rank || rd1->rank == rd2->rank + 1) rtype = 1;
+			else rtype = 2;
+			if(rtype != rank_type) continue;
+			simple_join_contigs(sa, hit);
+		}
+	}
+}
+
+static inline void begin_iter_simpasm(SimpAssembler *sa){ sa->iter_idx = 0; }
+
+static inline SimpContigInfo* iter_simpasm(SimpAssembler *sa){
+	SimpContigInfo *ctg;
+	while(sa->iter_idx < count_ctgv(sa->ctgs)){
+		ctg = ref_ctgv(sa->ctgs, sa->iter_idx ++);
+		if(ctg->closed == 0) return ctg;
+	}
+	return NULL;
+}
+
+#endif
diff --git a/sort.h b/sort.h
new file mode 100644
index 0000000..a04851c
--- /dev/null
+++ b/sort.h
@@ -0,0 +1,259 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __SORT_RJ_H
+#define __SORT_RJ_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define cmp_2nums_proc(a, b) if((a) < (b)) return -1; else if((a) > (b)) return 1;
+#define num_cmp_script(e1, e2, obj, val_macro) ((val_macro(e1, obj) == val_macro(e2, obj))? 0 : ((val_macro(e1, obj) < val_macro(e2, obj))? -1 : 1))
+
+#define define_bubble_sort(name, e_type, cmp_func)	\
+static inline void name(e_type* list, size_t size, void *ref){	\
+	size_t i, j, n;	\
+	e_type t;	\
+	i = 0;	\
+	while(i < size){	\
+		n = 0;	\
+		for(j=size-1;j>i;j--){	\
+			if(cmp_func(list[j-1], list[j], ref) > 0){	\
+				t = list[j-1]; list[j-1] = list[j]; list[j] = t;	\
+				n = 1;	\
+			}	\
+		}	\
+		if(n == 0) break;	\
+		i ++;	\
+	}	\
+	if(ref == ref) return;	\
+}
+
+#define sort_array(rs, n, e_type, expression)	\
+do {	\
+	size_t s, e, i, j, m, stack[64][2], x;	\
+	e_type p, t, a, b;	\
+	if(n < 2) break;	\
+	x = 0;	\
+	stack[x][0] = 0; stack[x][1] = n - 1; x ++;	\
+	while(x){	\
+		x --; s = stack[x][0]; e = stack[x][1];	\
+		m = s + (e - s) / 2;	\
+		a = rs[s]; b = rs[m];	\
+		if((int)(expression) > 0){ t = rs[s]; rs[s] = rs[m]; rs[m] = t; }	\
+		a = rs[m]; b = rs[e];	\
+		if((int)(expression) > 0){	\
+			t = rs[e]; rs[e] = rs[m]; rs[m] = t;	\
+			a = rs[s]; b = rs[m];	\
+			if((int)(expression) > 0){ t = rs[s]; rs[s] = rs[m]; rs[m] = t; }	\
+		}	\
+		p = rs[m];	\
+		i = s + 1; j = e - 1;	\
+		while(1){	\
+			b = p;	\
+			while(a = rs[i], (int)(expression) < 0) i ++;	\
+			a = p;	\
+			while(b = rs[j], (int)(expression) < 0) j --;	\
+			if(i < j){	\
+				t = rs[i]; rs[i] = rs[j]; rs[j] = t;	\
+				i ++; j --;	\
+			} else break;	\
+		}	\
+		if(i == j){ i ++; j --; }	\
+		if(s + 4 < j){ stack[x][0] = s; stack[x][1] = j; x ++; }	\
+		if(i + 4 < e){ stack[x][0] = i; stack[x][1] = e; x ++; }	\
+	}	\
+	for(i=0;i<n;i++){	\
+		x = 0;	\
+		for(j=n-1;j>i;j--){	\
+			a = rs[j - 1]; b = rs[j];	\
+			if((int)(expression) > 0){ t = rs[j - 1]; rs[j - 1] = rs[j]; rs[j] = t; x = 1; }	\
+		}	\
+		if(x == 0) break;	\
+	}	\
+} while(0)
+
+#define apply_array(rs, rs_size, e_type, expression)	\
+do {	\
+	size_t i, size;	\
+	e_type a;	\
+	size = rs_size;	\
+	for(i=0;i<size;i++){	\
+		a = (rs)[i];	\
+		(expression);	\
+	}	\
+} while(0)
+
+#define ref_apply_array(rs, rs_size, e_type, expression)	\
+do {	\
+	size_t i, size;	\
+	e_type *a;	\
+	size = rs_size;	\
+	for(i=0;i<size;i++){	\
+		a = (rs) + i;	\
+		(expression);	\
+	}	\
+} while(0)
+
+#define define_quick_sort(name, e_type, cmp_func)	\
+static inline void name(e_type *rs, size_t n, void *obj){	\
+	size_t s, e, i, j, m, stack[64][2], x;	\
+	e_type p, t;	\
+	if(n < 2) return;	\
+	x = 0;	\
+	stack[x][0] = 0; stack[x][1] = n - 1; x ++;	\
+	while(x){	\
+		x --; s = stack[x][0]; e = stack[x][1];	\
+		m = s + (e - s) / 2;	\
+		if(cmp_func(rs[s], rs[m], obj) > 0){ t = rs[s]; rs[s] = rs[m]; rs[m] = t; }	\
+		if(cmp_func(rs[m], rs[e], obj) > 0){	\
+			t = rs[e]; rs[e] = rs[m]; rs[m] = t;	\
+			if(cmp_func(rs[s], rs[m], obj) > 0){ t = rs[s]; rs[s] = rs[m]; rs[m] = t; }	\
+		}	\
+		p = rs[m];	\
+		i = s + 1; j = e - 1;	\
+		while(1){	\
+			while(cmp_func(rs[i], p, obj) < 0) i ++;	\
+			while(cmp_func(p, rs[j], obj) < 0) j --;	\
+			if(i < j){	\
+				t = rs[i]; rs[i] = rs[j]; rs[j] = t;	\
+				i ++; j --;	\
+			} else break;	\
+		}	\
+		if(i == j){ i ++; j --; }	\
+		if(s + 4 < j){ stack[x][0] = s; stack[x][1] = j; x ++; }	\
+		if(i + 4 < e){ stack[x][0] = i; stack[x][1] = e; x ++; }	\
+	}	\
+	for(i=0;i<n;i++){	\
+		x = 0;	\
+		for(j=n-1;j>i;j--){	\
+			if(cmp_func(rs[j - 1], rs[j], obj) > 0){ t = rs[j - 1]; rs[j - 1] = rs[j]; rs[j] = t; x = 1; }	\
+		}	\
+		if(x == 0) break;	\
+	}	\
+	if(obj == obj) return;	\
+}
+
+#define define_merge(name, e_type, cmp_func, output_func)	\
+static inline void name(e_type *list1, size_t size1, e_type *list2, size_t size2, void *ref){	\
+	size_t i, j;	\
+	i = j = 0;	\
+	while(i < size1 && j < size2){	\
+		if(cmp_func(list1[i], list2[j], ref) != 1){	\
+			output_func(list1[i], ref);	\
+			i ++;	\
+		} else {	\
+			output_func(list2[j], ref);	\
+			j ++;	\
+		}	\
+	}	\
+	while(i < size1){ output_func(list1[i++], ref); }	\
+	while(j < size2){ output_func(list2[j++], ref); }	\
+}	\
+	\
+static inline size_t name##_files(FILE **files, int n, void *ref){	\
+	e_type *es;	\
+	int *flags, i, min;	\
+	size_t ret;	\
+	ret = 0;	\
+	es = malloc(sizeof(e_type) * n);	\
+	flags = malloc(sizeof(int) * n);	\
+	for(i=0;i<n;i++) flags[i] = 0;	\
+	while(1){	\
+		min = -1;	\
+		for(i=0;i<n;i++){	\
+			if(flags[i] == 0){	\
+				flags[i] = (fread(es + i, sizeof(e_type), 1, files[i]) == 1)? 1 : 2;	\
+			}	\
+			if(flags[i] == 1){	\
+				if(min == -1){	\
+					min = i;	\
+				} else if(cmp_func(es[i], es[min], ref) != 1){	\
+					min = i;	\
+				}	\
+			}	\
+		}	\
+		if(min == -1) break;	\
+		output_func(es[min], ref);	\
+		flags[min] = 0;	\
+		ret ++;	\
+	}	\
+	free(es);	\
+	free(flags);	\
+	return ret;	\
+}
+
+#define define_unique_merge(name, e_type, cmp_func, output_func)	\
+static inline void name(e_type *list1, size_t size1, e_type *list2, size_t size2, void *ref){	\
+	size_t i, j;	\
+	i = j = 0;	\
+	while(i < size1 && j < size2){	\
+		switch(cmp_func(list1[i], list2[j])){	\
+			case 0:  output_func(list1[i++], ref); j ++; break;	\
+			case -1: output_func(list1[i++], ref); break;	\
+			default: output_func(list2[j++], ref);	\
+		}	\
+	}	\
+	while(i < size1){ output_func(list1[i++], ref); }	\
+	while(j < size2){ output_func(list2[j++], ref); }	\
+}
+
+#define define_reverse_array(name, e_type)	\
+static inline void name(e_type *list, size_t size){	\
+	size_t i, j;	\
+	e_type t;	\
+	if(size == 0) return;	\
+	i = 0;	\
+	j = size - 1;	\
+	while(i < j){	\
+		t = list[i]; list[i] = list[j]; list[j] = t;	\
+		i ++; j --;	\
+	}	\
+}
+
+#define define_apply_array(name, e_type, apply_func)	\
+static inline size_t name(e_type *list, size_t size, void *ref){	\
+	size_t i, ret;	\
+	ret = 0;	\
+	for(i=0;i<size;i++){	\
+		ret += apply_func(list[i], ref);	\
+	}	\
+	return ret;	\
+	ref = NULL;	\
+}
+
+#define define_search_array(name, e_type, cmp_func)	\
+static inline long long name(e_type *array, long long size, e_type key, void *ref){	\
+	long long i, j, m;	\
+	i = 0;	\
+	j = size;	\
+	while(i < j){	\
+		m = i + (j - i) / 2;	\
+		if(cmp_func(array[m], key, ref) < 0){	\
+			i = m + 1;	\
+		} else {	\
+			j = m;	\
+		}	\
+	}	\
+	if(i < size && cmp_func(array[i], key, ref) == 0) return i;	\
+	else return - (i + 1);	\
+	if(ref) return 0;	\
+}
+
+#endif
diff --git a/stdaln.c b/stdaln.c
new file mode 100644
index 0000000..37522d5
--- /dev/null
+++ b/stdaln.c
@@ -0,0 +1,797 @@
+/*
+ * stdaln.c -- standard alignment (local and banded global alignment)
+ *
+ * Copyright (c) 2003-2006, Li Heng <liheng at genomics.org.cn>
+ *                                  <lh3lh3 at gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "stdaln.h"
+
+/* char -> 17 (=16+1) nucleotides */
+unsigned char aln_nt16_table[256] = {
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,16 /*'-'*/,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
+	15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
+	15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
+	15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
+};
+char *aln_nt16_rev_table = "XAGRCMSVTWKDYHBN-";
+
+/* char -> 5 (=4+1) nucleotides */
+unsigned char aln_nt4_table[256] = {
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 5 /*'-'*/, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 2,  4, 4, 4, 1,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 2,  4, 4, 4, 1,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+char *aln_nt4_rev_table = "AGCTN-";
+
+/* char -> 22 (=20+1+1) amino acids */
+unsigned char aln_aa_table[256] = {
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,20,21, 21,22 /*'-'*/,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21, 0,21, 4,  3, 6,13, 7,  8, 9,21,11, 10,12, 2,21,
+	14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
+	21, 0,21, 4,  3, 6,13, 7,  8, 9,21,11, 10,12, 2,21,
+	14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21
+};
+char *aln_aa_rev_table = "ARNDCQEGHILKMFPSTWYV*X-";
+                       /* 01234567890123456789012 */
+
+/* translation table. They are useless in stdaln.c, but when you realize you need it, you need not write the table again. */
+unsigned char aln_trans_table_eu[66] = {
+	11,11, 2, 2,  1, 1,15,15, 16,16,16,16,  9,12, 9, 9,
+	 6, 6, 3, 3,  7, 7, 7, 7,  0, 0, 0, 0, 19,19,19,19,
+	 5, 5, 8, 8,  1, 1, 1, 1, 14,14,14,14, 10,10,10,10,
+	20,20,18,18, 20,17, 4, 4, 15,15,15,15, 10,10,13,13, 21, 22
+};
+char *aln_trans_table_eu_char = "KKNNRRSSTTTTIMIIEEDDGGGGAAAAVVVVQQHHRRRRPPPPLLLL**YY*WCCSSSSLLFFX";
+                              /* 01234567890123456789012345678901234567890123456789012345678901234 */
+int aln_sm_blosum62[] = {
+/*	 A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *  X */
+	 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0,
+	-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1,
+	-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1,
+	-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1,
+	 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2,
+	-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1,
+	-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1,
+	 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1,
+	-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1,
+	-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1,
+	-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1,
+	-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1,
+	-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1,
+	-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1,
+	-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2,
+	 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0,
+	 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0,
+	-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2,
+	-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1,
+	 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1,
+	-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4,
+	 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1
+};
+
+int aln_sm_blosum45[] = {
+/*	 A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *  X */
+	 5,-2,-1,-2,-1,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-2,-2, 0,-5, 0,
+	-2, 7, 0,-1,-3, 1, 0,-2, 0,-3,-2, 3,-1,-2,-2,-1,-1,-2,-1,-2,-5,-1,
+	-1, 0, 6, 2,-2, 0, 0, 0, 1,-2,-3, 0,-2,-2,-2, 1, 0,-4,-2,-3,-5,-1,
+	-2,-1, 2, 7,-3, 0, 2,-1, 0,-4,-3, 0,-3,-4,-1, 0,-1,-4,-2,-3,-5,-1,
+	-1,-3,-2,-3,12,-3,-3,-3,-3,-3,-2,-3,-2,-2,-4,-1,-1,-5,-3,-1,-5,-2,
+	-1, 1, 0, 0,-3, 6, 2,-2, 1,-2,-2, 1, 0,-4,-1, 0,-1,-2,-1,-3,-5,-1,
+	-1, 0, 0, 2,-3, 2, 6,-2, 0,-3,-2, 1,-2,-3, 0, 0,-1,-3,-2,-3,-5,-1,
+	 0,-2, 0,-1,-3,-2,-2, 7,-2,-4,-3,-2,-2,-3,-2, 0,-2,-2,-3,-3,-5,-1,
+	-2, 0, 1, 0,-3, 1, 0,-2,10,-3,-2,-1, 0,-2,-2,-1,-2,-3, 2,-3,-5,-1,
+	-1,-3,-2,-4,-3,-2,-3,-4,-3, 5, 2,-3, 2, 0,-2,-2,-1,-2, 0, 3,-5,-1,
+	-1,-2,-3,-3,-2,-2,-2,-3,-2, 2, 5,-3, 2, 1,-3,-3,-1,-2, 0, 1,-5,-1,
+	-1, 3, 0, 0,-3, 1, 1,-2,-1,-3,-3, 5,-1,-3,-1,-1,-1,-2,-1,-2,-5,-1,
+	-1,-1,-2,-3,-2, 0,-2,-2, 0, 2, 2,-1, 6, 0,-2,-2,-1,-2, 0, 1,-5,-1,
+	-2,-2,-2,-4,-2,-4,-3,-3,-2, 0, 1,-3, 0, 8,-3,-2,-1, 1, 3, 0,-5,-1,
+	-1,-2,-2,-1,-4,-1, 0,-2,-2,-2,-3,-1,-2,-3, 9,-1,-1,-3,-3,-3,-5,-1,
+	 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-3,-1,-2,-2,-1, 4, 2,-4,-2,-1,-5, 0,
+	 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-1,-1, 2, 5,-3,-1, 0,-5, 0,
+	-2,-2,-4,-4,-5,-2,-3,-2,-3,-2,-2,-2,-2, 1,-3,-4,-3,15, 3,-3,-5,-2,
+	-2,-1,-2,-2,-3,-1,-2,-3, 2, 0, 0,-1, 0, 3,-3,-2,-1, 3, 8,-1,-5,-1,
+	 0,-2,-3,-3,-1,-3,-3,-3,-3, 3, 1,-2, 1, 0,-3,-1, 0,-3,-1, 5,-5,-1,
+	-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 1,-5,
+	 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 0,-2,-1,-1,-5,-1
+};
+
+int aln_sm_nt[] = {
+/*	 X  A  G  R  C  M  S  V  T  W  K  D  Y  H  B  N */
+	-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+	-2, 2,-1, 1,-2, 1,-2, 0,-2, 1,-2, 0,-2, 0,-2, 0,
+	-2,-1, 2, 1,-2,-2, 1, 0,-2,-2, 1, 0,-2,-2, 0, 0,
+	-2, 1, 1, 1,-2,-1,-1, 0,-2,-1,-1, 0,-2, 0, 0, 0,
+	-2,-2,-2,-2, 2, 1, 1, 0,-1,-2,-2,-2, 1, 0, 0, 0,
+	-2, 1,-2,-1, 1, 1,-1, 0,-2,-1,-2, 0,-1, 0, 0, 0,
+	-2,-2, 1,-1, 1,-1, 1, 0,-2,-2,-1, 0,-1, 0, 0, 0,
+	-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2,-2,-2,-1,-2,-2,-2, 2, 1, 1, 0, 1, 0, 0, 0,
+	-2, 1,-2,-1,-2,-1,-2, 0, 1, 1,-1, 0,-1, 0, 0, 0,
+	-2,-2, 1,-1,-2,-2,-1, 0, 1,-1, 1, 0,-1, 0, 0, 0,
+	-2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2,-2,-2, 1,-1,-1, 0, 1,-1,-1, 0, 1, 0, 0, 0,
+	-2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+int aln_sm_read[] = {
+/*	  X   A   G   R   C   M   S   V   T   W   K   D   Y   H   B   N  */
+	-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,
+	-17,  2,-17,  1,-17,  1,-17,  0,-17,  1,-17,  0,-17,  0,-17,  0,
+	-17,-17,  2,  1,-17,-17,  1,  0,-17,-17,  1,  0,-17,-17,  0,  0,
+	-17,  1,  1,  1,-17,-17,-17,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,-17,-17,-17,  2,  1,  1,  0,-17,-17,-17,-17,  1,  0,  0,  0,
+	-17,  1,-17,-17,  1,  1,-17,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,-17,  1,-17,  1,-17,  1,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,  0,  0,  0,  0,  0,  0,  0,-17,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,-17,-17,-17,-17,-17,-17,  2,  1,  1,  0,  1,  0,  0,  0,
+	-17,  1,-17,-17,-17,-17,-17,  0,  1,  1,-17,  0,-17,  0,  0,  0,
+	-17,-17,  1,-17,-17,-17,-17,  0,  1,-17,  1,  0,-17,  0,  0,  0,
+	-17,  0,  0,  0,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,-17,-17,  1,-17,-17,  0,  1,-17,-17,  0,  1,  0,  0,  0,
+	-17,  0,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
+};
+
+int aln_sm_hs[] = {
+/*     A    G    C    T    N */
+	  91, -31,-114,-123, -44,
+	 -31, 100,-125,-114, -42,
+	-123,-125, 100, -31, -42,
+	-114,-114, -31,  91, -42,
+	 -44, -42, -42, -42, -43
+};
+
+/********************/
+/* START OF align.c */
+/********************/
+
+AlnParam aln_param_nt2nt   = { 10,  2,  2, aln_sm_nt, 16, 75 };
+AlnParam aln_param_rd2rd   = { 20, 19, 19, aln_sm_read, 16, 75 };
+AlnParam aln_param_aa2aa   = { 12,  2,  2, aln_sm_blosum62, 22, 50 };
+
+AlnAln *aln_init_AlnAln()
+{
+	AlnAln *aa;
+	aa = (AlnAln*)MYALLOC(sizeof(AlnAln));
+	aa->path = 0;
+	aa->out1 = aa->out2 = aa->outm = 0;
+	aa->path_len = 0;
+	return aa;
+}
+void aln_free_AlnAln(AlnAln *aa)
+{
+	MYFREE(aa->path);
+	MYFREE(aa->out1);
+	MYFREE(aa->out2);
+	MYFREE(aa->outm);
+	MYFREE(aa);
+}
+
+/***************************/
+/* START OF common_align.c */
+/***************************/
+
+#define LOCAL_OVERFLOW_THRESHOLD 32000
+#define LOCAL_OVERFLOW_REDUCE 16000
+#define NT_LOCAL_SCORE int
+#define NT_LOCAL_SHIFT 16
+#define NT_LOCAL_MASK 0xffff
+
+#define SET_INF(s) (s).M = (s).I = (s).D = MINOR_INF;
+
+#define set_M(MM, cur, p, sc)							\
+{														\
+	if ((p)->M >= (p)->I) {								\
+		if ((p)->M >= (p)->D) {							\
+			(MM) = (p)->M + (sc); (cur)->Mt = FROM_M;	\
+		} else {										\
+			(MM) = (p)->D + (sc); (cur)->Mt = FROM_D;	\
+		}												\
+	} else {											\
+		if ((p)->I > (p)->D) {							\
+			(MM) = (p)->I + (sc); (cur)->Mt = FROM_I;	\
+		} else {										\
+			(MM) = (p)->D + (sc); (cur)->Mt = FROM_D;	\
+		}												\
+	}													\
+}
+#define set_I(II, cur, p)								\
+{														\
+	if ((p)->M - gap_open > (p)->I - gap_ext) {			\
+		(cur)->It = FROM_M;								\
+		(II) = (p)->M - gap_open;						\
+	} else {											\
+		(cur)->It = FROM_I;								\
+		(II) = (p)->I - gap_ext;						\
+	}													\
+}
+#define set_end_I(II, cur, p)							\
+{														\
+	if (gap_end >= 0) {									\
+		if ((p)->M > (p)->I) {							\
+			(cur)->It = FROM_M;							\
+			(II) = (p)->M - gap_end;					\
+		} else {										\
+			(cur)->It = FROM_I;							\
+			(II) = (p)->I - gap_end;					\
+		}												\
+	} else set_I(II, cur, p);							\
+}
+#define set_D(DD, cur, p)								\
+{														\
+	if ((p)->M - gap_open > (p)->D - gap_ext) {			\
+		(cur)->Dt = FROM_M;								\
+		(DD) = (p)->M - gap_open;						\
+	} else {											\
+		(cur)->Dt = FROM_D;								\
+		(DD) = (p)->D - gap_ext;						\
+	}													\
+}
+#define set_end_D(DD, cur, p)							\
+{														\
+	if (gap_end >= 0) {									\
+		if ((p)->M > (p)->D) {							\
+			(cur)->Dt = FROM_M;							\
+			(DD) = (p)->M - gap_end;					\
+		} else {										\
+			(cur)->Dt = FROM_D;							\
+			(DD) = (p)->D - gap_end;					\
+		}												\
+	} else set_D(DD, cur, p);							\
+}
+
+typedef struct
+{
+	unsigned char Mt:3, It:2, Dt:2;
+} dpcell_t;
+
+typedef struct
+{
+	int M, I, D;
+} dpscore_t;
+
+/* build score profile for accelerating alignment, in theory */
+void aln_init_score_array(unsigned char *seq, int len, int row, int *score_matrix, int **s_array)
+{
+	int *tmp, *tmp2, i, k;
+	for (i = 0; i != row; ++i) {
+		tmp = score_matrix + i * row;
+		tmp2 = s_array[i];
+		for (k = 0; k != len; ++k)
+			tmp2[k] = tmp[seq[k]];
+	}
+}
+/***************************
+ * banded global alignment *
+ ***************************/
+int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+		path_t *path, int *path_len)
+{
+	register int i, j;
+	dpcell_t **dpcell, *q;
+	dpscore_t *curr, *last, *s;
+	path_t *p;
+	int b1, b2, tmp_end;
+	int *mat, end, max;
+	unsigned char type, ctype;
+
+	int gap_open, gap_ext, gap_end, b;
+	int *score_matrix, N_MATRIX_ROW;
+
+	/* initialize some align-related parameters. just for compatibility */
+	gap_open = ap->gap_open;
+	gap_ext = ap->gap_ext;
+	gap_end = ap->gap_end;
+	b = ap->band_width;
+	score_matrix = ap->matrix;
+	N_MATRIX_ROW = ap->row;
+	
+	if (len1 == 0 || len2 == 0) {
+		*path_len = 0;
+		return 0;
+	}
+	/* calculate b1 and b2 */
+	if (len1 > len2) {
+		b1 = len1 - len2 + b;
+		b2 = b;
+	} else {
+		b1 = b;
+		b2 = len2 - len1 + b;
+	}
+	if (b1 > len1) b1 = len1;
+	if (b2 > len2) b2 = len2;
+	--seq1; --seq2;
+
+	/* allocate memory */
+	end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1);
+	dpcell = (dpcell_t**)MYALLOC(sizeof(dpcell_t*) * (len2 + 1));
+	for (j = 0; j <= len2; ++j)
+		dpcell[j] = (dpcell_t*)MYALLOC(sizeof(dpcell_t) * end);
+	for (j = b2 + 1; j <= len2; ++j)
+		dpcell[j] -= j - b2;
+	curr = (dpscore_t*)MYALLOC(sizeof(dpscore_t) * (len1 + 1));
+	last = (dpscore_t*)MYALLOC(sizeof(dpscore_t) * (len1 + 1));
+	
+	/* set first row */
+	SET_INF(*curr); curr->M = 0;
+	for (i = 1, s = curr + 1; i < b1; ++i, ++s) {
+		SET_INF(*s);
+		set_end_D(s->D, dpcell[0] + i, s - 1);
+	}
+	s = curr; curr = last; last = s;
+
+	/* core dynamic programming, part 1 */
+	tmp_end = (b2 < len2)? b2 : len2 - 1;
+	for (j = 1; j <= tmp_end; ++j) {
+		q = dpcell[j]; s = curr; SET_INF(*s);
+		set_end_I(s->I, q, last);
+		end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		++s; ++q;
+		for (i = 1; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_D(s->D, q, s - 1);
+		if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
+			set_end_I(s->I, q, last + i);
+		} else s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+	}
+	/* last row for part 1, use set_end_D() instead of set_D() */
+	if (j == len2 && b2 != len2 - 1) {
+		q = dpcell[j]; s = curr; SET_INF(*s);
+		set_end_I(s->I, q, last);
+		end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		++s; ++q;
+		for (i = 1; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
+			set_I(s->I, q, last + i);
+			set_end_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_end_D(s->D, q, s - 1);
+		if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
+			set_end_I(s->I, q, last + i);
+		} else s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+		++j;
+	}
+
+	/* core dynamic programming, part 2 */
+	for (; j <= len2 - b2 + 1; ++j) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		end = j + b1 - 1;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_D(s->D, q, s - 1);
+		s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+	}
+
+	/* core dynamic programming, part 3 */
+	for (; j < len2; ++j) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
+		set_end_I(s->I, q, last + i);
+		set_D(s->D, q, s - 1);
+		s = curr; curr = last; last = s;
+	}
+	/* last row */
+	if (j == len2) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_end_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
+		set_end_I(s->I, q, last + i);
+		set_end_D(s->D, q, s - 1);
+		s = curr; curr = last; last = s;
+	}
+
+	/* backtrace */
+	i = len1; j = len2;
+	q = dpcell[j] + i;
+	s = last + len1;
+	max = s->M; type = q->Mt; ctype = FROM_M;
+	if (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; }
+	if (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; }
+
+	p = path;
+	p->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */
+	++p;
+	do {
+		switch (ctype) {
+			case FROM_M: --i; --j; break;
+			case FROM_I: --j; break;
+			case FROM_D: --i; break;
+		}
+		q = dpcell[j] + i;
+		ctype = type;
+		switch (type) {
+			case FROM_M: type = q->Mt; break;
+			case FROM_I: type = q->It; break;
+			case FROM_D: type = q->Dt; break;
+		}
+		p->ctype = ctype; p->i = i; p->j = j;
+		++p;
+	} while (i || j);
+	*path_len = p - path - 1;
+
+	/* free memory */
+	for (j = b2 + 1; j <= len2; ++j)
+		dpcell[j] += j - b2;
+	for (j = 0; j <= len2; ++j)
+		MYFREE(dpcell[j]);
+	MYFREE(dpcell);
+	MYFREE(curr); MYFREE(last);
+	
+	return max;
+}
+/*************************************************
+ * local alignment combined with banded strategy *
+ *************************************************/
+int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+				   path_t *path, int *path_len, int do_align)
+{
+	register NT_LOCAL_SCORE *s;
+	register int i;
+	int q, r, qr, tmp_len, qr_shift;
+	int **s_array, *score_array;
+	int e, f;
+	int is_overflow, of_base;
+	NT_LOCAL_SCORE *eh, curr_h, last_h, curr_last_h;
+	int j, start_i, start_j, end_i, end_j;
+	path_t *p;
+	int score_f, score_r, score_g;
+	int start, end, max_score;
+
+	int gap_open, gap_ext;
+	int *score_matrix, N_MATRIX_ROW;
+
+	/* initialize some align-related parameters. just for compatibility */
+	gap_open = ap->gap_open;
+	gap_ext = ap->gap_ext;
+//	b = ap->band_width;
+	score_matrix = ap->matrix;
+	N_MATRIX_ROW = ap->row;
+
+	if (len1 == 0 || len2 == 0) return -1;
+
+	/* allocate memory */
+	eh = (NT_LOCAL_SCORE*)MYALLOC(sizeof(NT_LOCAL_SCORE) * (len1 + 1));
+	s_array = (int**)MYALLOC(sizeof(int*) * N_MATRIX_ROW);
+	for (i = 0; i != N_MATRIX_ROW; ++i)
+		s_array[i] = (int*)MYALLOC(sizeof(int) * len1);
+	/* initialization */
+	aln_init_score_array(seq1, len1, N_MATRIX_ROW, score_matrix, s_array);
+	q = gap_open - gap_ext;
+	r = gap_ext;
+	qr = q + r;
+	qr_shift = (qr+1) << NT_LOCAL_SHIFT;
+	tmp_len = len1 + 1;
+	start_i = start_j = end_i = end_j = 0;
+	for (i = 0, max_score = 0; i != N_MATRIX_ROW * N_MATRIX_ROW; ++i)
+		if (max_score < score_matrix[i]) max_score = score_matrix[i];
+	/* convert the coordinate */
+	--seq1; --seq2;
+	for (i = 0; i != N_MATRIX_ROW; ++i) --s_array[i];
+
+	/* forward dynamic programming */
+	for (i = 0, s = eh; i != tmp_len; ++i, ++s) *s = 0;
+	score_f = 0;
+	is_overflow = of_base = 0;
+	for (j = 1; j <= len2; ++j) {
+		last_h = f = 0;
+		score_array = s_array[seq2[j]];
+		if (is_overflow) { /* adjust eh[] array if overflow occurs. */
+			/* If LOCAL_OVERFLOW_REDUCE is too small, optimal alignment might be missed.
+			 * If it is too large, this block will be excuted frequently and therefore
+			 * slow down the whole program.
+			 * Acually, smaller LOCAL_OVERFLOW_REDUCE might also help to reduce the
+			 * number of assignments because it sets some cells to zero when overflow
+			 * happens. */
+			int tmp, tmp2;
+			score_f -= LOCAL_OVERFLOW_REDUCE;
+			of_base += LOCAL_OVERFLOW_REDUCE;
+			is_overflow = 0;
+			for (i = 1, s = eh; i <= tmp_len; ++i, ++s) {
+				tmp = *s >> NT_LOCAL_SHIFT; tmp2 = *s & NT_LOCAL_MASK;
+				if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
+				else tmp2 -= LOCAL_OVERFLOW_REDUCE;
+				if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
+				else tmp -= LOCAL_OVERFLOW_REDUCE;
+				*s = (tmp << NT_LOCAL_SHIFT) | tmp2;
+			}
+		}
+		for (i = 1, s = eh; i != tmp_len; ++i, ++s) {
+			/* prepare for calculate current h */
+			curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];
+			if (curr_h < 0) curr_h = 0;
+			if (last_h > qr) { /* initialize f */
+				f = (f > last_h - q)? f - r : last_h - qr;
+				if (curr_h < f) curr_h = f;
+			}
+			if (*(s+1) >= qr_shift) { /* initialize e */
+				curr_last_h = *(s+1) >> NT_LOCAL_SHIFT;
+				e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;
+				if (curr_h < e) curr_h = e;
+				*s = (last_h << NT_LOCAL_SHIFT) | e;
+			} else *s = last_h << NT_LOCAL_SHIFT; /* e = 0 */
+			last_h = curr_h;
+			if (score_f < curr_h) {
+				score_f = curr_h; end_i = i; end_j = j;
+				if (score_f > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
+			}
+		}
+		*s = last_h << NT_LOCAL_SHIFT;
+	}
+	score_f += of_base;
+
+	if (path == 0) goto end_func; /* skip path-filling */
+
+	/* reverse dynamic programming */
+	for (i = end_i, s = eh + end_i; i >= 0; --i, --s) *s = 0;
+	if (end_i == 0 || end_j == 0) goto end_func; /* no local match */
+	score_r = score_matrix[seq1[end_i] * N_MATRIX_ROW + seq2[end_j]];
+	is_overflow = of_base = 0;
+	start_i = end_i; start_j = end_j;
+	eh[end_i] = ((NT_LOCAL_SCORE)(qr + score_r)) << NT_LOCAL_SHIFT; /* in order to initialize f and e, 040408 */
+	start = end_i - 1;
+	end = end_i - 3;
+	if (end <= 0) end = 0;
+
+	/* second pass DP can be done in a band, speed will thus be enhanced */
+	for (j = end_j - 1; j != 0; --j) {
+		last_h = f = 0;
+		score_array = s_array[seq2[j]];
+		if (is_overflow) { /* adjust eh[] array if overflow occurs. */
+			int tmp, tmp2;
+			score_r -= LOCAL_OVERFLOW_REDUCE;
+			of_base += LOCAL_OVERFLOW_REDUCE;
+			is_overflow = 0;
+			for (i = start, s = eh + start + 1; i >= end; --i, --s) {
+				tmp = *s >> NT_LOCAL_SHIFT; tmp2 = *s & NT_LOCAL_MASK;
+				if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
+				else tmp2 -= LOCAL_OVERFLOW_REDUCE;
+				if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
+				else tmp -= LOCAL_OVERFLOW_REDUCE;
+				*s = (tmp << NT_LOCAL_SHIFT) | tmp2;
+			}
+		}
+		for (i = start, s = eh + start + 1; i != end; --i, --s) {
+			/* prepare for calculate current h */
+			curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];
+			if (curr_h < 0) curr_h = 0;
+			if (last_h > qr) { /* initialize f */
+				f = (f > last_h - q)? f - r : last_h - qr;
+				if (curr_h < f) curr_h = f;
+			}
+			if (*(s-1) >= qr_shift) { /* initialize e */
+				curr_last_h = *(s-1) >> NT_LOCAL_SHIFT;
+				e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;
+				if (curr_h < e) curr_h = e;
+				*s = (last_h << NT_LOCAL_SHIFT) | e;
+			} else *s = last_h << NT_LOCAL_SHIFT; /* e = 0 */
+			last_h = curr_h;
+			if (score_r < curr_h) {
+				score_r = curr_h; start_i = i; start_j = j;
+				if (score_r + of_base - qr == score_f) {
+					j = 1; break;
+				}
+				if (score_r > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
+			}
+		}
+		*s = last_h << NT_LOCAL_SHIFT;
+		/* recalculate start and end, the boundaries of the band */
+		if ((eh[start] >> NT_LOCAL_SHIFT) <= qr) --start;
+		if (start <= 0) start = 0;
+		end = start_i - (start_j - j) - (score_r + of_base + (start_j - j) * max_score) / r - 1;
+		if (end <= 0) end = 0;
+	}
+
+	if (path_len == 0) {
+		path[0].i = start_i; path[0].j = start_j;
+		path[1].i = end_i; path[1].j = end_j;
+		goto end_func;
+	}
+
+	score_r += of_base;
+	score_r -= qr;
+
+#ifdef DEBUG
+	/* this seems not a bug */
+	if (score_f != score_r)
+		fprintf(stderr, "[aln_local_core] unknown flaw occurs: score_f(%d) != score_r(%d)\n", score_f, score_r);
+#endif
+
+	if (do_align) { /* call global alignment to fill the path */
+		score_g = 0;
+		j = (end_i - start_i > end_j - start_j)? end_i - start_i : end_j - start_j;
+		++j; /* j is the maximum band_width */
+		for (i = ap->band_width;; i <<= 1) {
+			AlnParam ap_real = *ap;
+			ap_real.gap_end = -1;
+			ap_real.band_width = i;
+			score_g = aln_global_core(seq1 + start_i, end_i - start_i + 1, seq2 + start_j,
+									  end_j - start_j + 1, &ap_real, path, path_len);
+			if (score_g == score_r || score_f == score_g) break;
+			if (i > j) break;
+		}
+		if (score_r > score_g && score_f > score_g)
+			fprintf(stderr, "[aln_local_core] Cannot find reasonable band width. Continue anyway.\n");
+		score_f = score_g;
+
+		/* convert coordinate */
+		for (p = path + *path_len - 1; p >= path; --p) {
+			p->i += start_i - 1;
+			p->j += start_j - 1;
+		}
+	} else { /* just store the start and end */
+		*path_len = 2;
+		path[1].i = start_i; path[1].j = start_j;
+		path->i = end_i; path->j = end_j;
+	}
+
+end_func:
+	/* free */
+	MYFREE(eh);
+	for (i = 0; i != N_MATRIX_ROW; ++i) {
+		++s_array[i];
+		MYFREE(s_array[i]);
+	}
+	MYFREE(s_array);
+	return score_f;
+}
+AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
+					   int is_global, int do_align, int len1, int len2)
+{
+	unsigned char *seq11, *seq22;
+	int score;
+	int i, j, l;
+	path_t *p;
+	char *out1, *out2, *outm;
+	AlnAln *aa;
+
+	if (len1 < 0) len1 = strlen(seq1);
+	if (len2 < 0) len2 = strlen(seq2);
+
+	aa = aln_init_AlnAln();
+	seq11 = (unsigned char*)MYALLOC(sizeof(unsigned char) * len1);
+	seq22 = (unsigned char*)MYALLOC(sizeof(unsigned char) * len2);
+	aa->path = (path_t*)MYALLOC(sizeof(path_t) * (len1 + len2 + 1));
+
+	if (ap->row < 10) { /* 4-nucleotide alignment */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_nt4_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_nt4_table[(int)seq2[j]];
+	} else if (ap->row < 20) { /* 16-nucleotide alignment */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_nt16_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_nt16_table[(int)seq2[j]];
+	} else { /* amino acids */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_aa_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_aa_table[(int)seq2[j]];
+	}
+	
+	if (is_global) score = aln_global_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len);
+	else score = aln_local_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len, do_align);
+	aa->score = score;
+
+	if (do_align) {
+		out1 = aa->out1 = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));
+		out2 = aa->out2 = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));
+		outm = aa->outm = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));
+
+		--seq1; --seq2;
+		--seq11; --seq22;
+
+		p = aa->path + aa->path_len - 1;
+
+		for (l = 0; p >= aa->path; --p, ++l) {
+			switch (p->ctype) {
+			case FROM_M: out1[l] = seq1[p->i]; out2[l] = seq2[p->j];
+				outm[l] = (seq11[p->i] == seq22[p->j] && seq11[p->i] != ap->row)? '|' : ' ';
+				break;
+			case FROM_I: out1[l] = '-'; out2[l] = seq2[p->j]; outm[l] = ' '; break;
+			case FROM_D: out1[l] = seq1[p->i]; out2[l] = '-'; outm[l] = ' '; break;
+			}
+		}
+		out1[l] = out2[l] = outm[l] = '\0';
+		++seq11; ++seq22;
+	}
+
+	MYFREE(seq11);
+	MYFREE(seq22);
+
+	p = aa->path + aa->path_len - 1;
+	aa->start1 = p->i;
+	aa->end1 = aa->path->i;
+	aa->start2 = p->j;
+	aa->end2 = aa->path->j;
+
+	return aa;
+}
+AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int is_global, int do_align)
+{
+	return aln_stdaln_aux(seq1, seq2, ap, is_global, do_align, -1, -1);
+}
diff --git a/stdaln.h b/stdaln.h
new file mode 100644
index 0000000..312e0ca
--- /dev/null
+++ b/stdaln.h
@@ -0,0 +1,105 @@
+/*
+ * stdaln.h -- standard alignment (local and banded global alignment)
+ *
+ * Copyright (c) 2003-2006, Heng Li <liheng at genomics.org.cn>
+ *                                  <lh3lh3 at gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef LH3_STDALN_H_
+#define LH3_STDALN_H_
+
+
+#define STDALN_VERSION 0.9.5
+
+
+#ifndef MYALLOC
+#	define MYALLOC malloc
+#endif
+#ifndef MYFREE
+#	define MYFREE free
+#endif
+
+#define FROM_M 0
+#define FROM_I 1
+#define FROM_D 2
+
+/* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
+#define MINOR_INF -1073741823
+
+typedef struct
+{
+	int gap_open;
+	int gap_ext;
+	int gap_end;
+
+	int *matrix;
+	int row;
+	int band_width;
+} AlnParam;
+
+typedef struct
+{
+	int i, j;
+	unsigned char ctype;
+} path_t;
+
+typedef struct
+{
+	path_t *path; /* for advanced users... :-) */
+	int path_len; /* for advanced users... :-) */
+	int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
+	int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
+	int score; /* score */
+
+	char *out1, *out2; /* print them, and then you will know */
+	char *outm;
+} AlnAln;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap, int is_global, int do_align, int len1, int len2);
+	AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int is_global, int do_align);
+	void aln_free_AlnAln(AlnAln *aa);
+
+#ifdef __cplusplus
+}
+#endif
+
+/********************
+ * global variables *
+ ********************/
+
+extern AlnParam aln_param_nt2nt; /* = { 10,  2,  2, aln_sm_nt, 16, 75 }; */
+extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
+extern AlnParam aln_param_rd2rd; /* = { 12,  2,  2, aln_sm_blosum62, 22, 50 }; */
+
+/* common nucleotide score matrix for 16 bases */
+extern int           aln_sm_nt[];
+
+/* BLOSUM62 and BLOSUM45 */
+extern int           aln_sm_blosum62[], aln_sm_blosum45[];
+
+/* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
+extern int           aln_sm_read[];
+
+/* human-mouse score matrix for 4 bases */
+extern int           aln_sm_hs[];
+
+#endif
diff --git a/string.h b/string.h
new file mode 100644
index 0000000..8d4ac73
--- /dev/null
+++ b/string.h
@@ -0,0 +1,217 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __STRING_RJ_H
+#define __STRING_RJ_H
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include "vector.h"
+
+/**
+ * String
+ */
+
+#ifndef SWAP_TMP
+#define SWAP_TMP
+#define swap_tmp(a, b, t) t = a; a = b; b = t
+#endif
+
+typedef struct {
+	char *string;
+	int size;
+	int capacity;
+} String;
+
+typedef struct {
+	char *string;
+	int  size;
+} VirtualString;
+
+static inline String* init_string(int cap){
+	String *str;
+	str = (String*)malloc(sizeof(String));
+	str->size = 0;
+	str->capacity = (cap&0x1)? cap:cap+1;
+	str->string = (char*)malloc(sizeof(char) * (str->capacity + 1));
+	str->string[0] = 0;
+	return str;
+}
+
+static inline char* substr(char *string, int start, int end, char *dst){
+	int i, size;
+	char *str;
+	size = strlen(string);
+	if(start > size) start = size;
+	else if(start < 0) start = 0;
+	if(end > size) end = size;
+	else if(end < 0) end = 0;
+	size = end - start;
+	if(size < 0) size = 0;
+	if(dst != NULL) str = dst;
+	else str = (char*)malloc(sizeof(char) * (size + 1));
+	for(i=start;i<end;i++){
+		str[i-start] = string[i];
+	}
+	str[size] = '\0';
+	return str;
+}
+
+static inline char* catstr(int n_str, ...){
+	char *str, *s;
+	int i, len;
+	va_list params;
+	
+	len = 0;
+	str = NULL;
+	va_start(params, n_str);
+	for(i=0;i<n_str;i++){
+		s = va_arg(params, char*);
+		len += strlen(s);
+		str = realloc(str, len + 1);
+		if(i == 0) str[0] = 0;
+		strcat(str, s);
+	}
+	va_end(params);
+	return str;
+}
+
+static inline void chomp_string(String *str){
+	if(str->size && str->string[str->size - 1] == '\n'){
+		str->size --;
+		str->string[str->size] = 0;
+	}
+}
+
+static inline void trim_string(String *str){
+	int i, j;
+	i = str->size - 1;
+	while(i >= 0 && (str->string[i] == '\n' || str->string[i] == '\t' || str->string[i] == ' ')) i--; 
+	str->size = i + 1;
+	i = 0;
+	while(i < str->size && (str->string[i] == '\n' || str->string[i] == '\t' || str->string[i] == ' ')) i++;
+	if(i){
+		for(j=i;j<str->size;j++){ str->string[j-i] = str->string[j]; }
+		str->size -= i;
+	}
+	str->string[str->size] = 0;
+}
+
+static inline void append_string(String *str, char *src, int offlen){
+	int i;
+	if(offlen + str->size >= str->capacity){
+		if(offlen < str->size) str->capacity = str->size * 2 + 1;
+		else str->capacity = offlen * 2 + 1;
+		str->string = (char*)realloc(str->string, str->capacity + 1);
+	}
+	for(i=0;i<offlen;i++) str->string[str->size + i] = src[i];
+	str->size += offlen;
+	str->string[str->size] = 0;
+}
+
+static inline String* as_string(char *chs){
+	int len;
+	String *str;
+	len = strlen(chs);
+	str = init_string(len);
+	append_string(str, chs, len);
+	return str;
+}
+
+static inline void add_char_string(String *str, char ch){
+	if(str->size == str->capacity){
+		str->capacity = str->size * 2 + 1;
+		str->string = (char*)realloc(str->string, str->capacity + 1);
+	}
+	str->string[str->size] = ch;
+	str->size ++;
+	str->string[str->size] = 0;
+}
+
+static inline void clear_string(String *str){ str->size = 0; }
+
+static inline int split_string(String *str, char separator, Vector *virtual_strings){
+	VirtualString *vstr;
+	int n_tab, i, s;
+	n_tab = 0;
+	i = 0;
+	s = 0;
+	while(i <= str->size){
+		if(i == str->size || str->string[i] == separator){
+			vstr = get_next_vec_ref(virtual_strings);
+			vstr->string = str->string + s;
+			n_tab ++;
+			vstr->size = i - s;
+			s = i + 1;
+		}
+		i ++;
+	}
+	return n_tab;
+}
+
+static inline int split_vstring(VirtualString *str, char separator, Vector *virtual_strings, int cut){
+	VirtualString *vstr;
+	int n_tab, i, s;
+	n_tab = 0;
+	i = 0;
+	s = 0;
+	while(i <= str->size){
+		if(i == str->size || str->string[i] == separator){
+			if(cut) str->string[i] = '\0';
+			vstr = get_next_vec_ref(virtual_strings);
+			vstr->string = str->string + s;
+			n_tab ++;
+			vstr->size = i - s;
+			s = i + 1;
+		}
+		i ++;
+	}
+	return n_tab;
+}
+
+static inline void reverse_string(String *str){
+	int i, j;
+	char c;
+	i = 0;
+	j = str->size - 1;
+	while(i < j){
+		swap_tmp(str->string[i], str->string[j], c);
+		i ++;
+		j --;
+	}
+}
+
+static inline void trunc_string(String *str, int size){
+	if(size >= str->size) return;
+	str->size = size;
+	str->string[size] = 0;
+}
+
+static inline String* clone_string(String *str){ 
+	String *clone;
+	clone = init_string(str->size);
+	append_string(clone, str->string, str->size);
+	return clone;
+}
+
+static inline void free_string(String *str){ free(str->string); free(str); }
+
+#endif
diff --git a/tags b/tags
new file mode 100644
index 0000000..6c88495
--- /dev/null
+++ b/tags
@@ -0,0 +1,821 @@
+!_TAG_FILE_FORMAT	2	/extended format; --format=1 will not append ;" to lines/
+!_TAG_FILE_SORTED	1	/0=unsorted, 1=sorted, 2=foldcase/
+!_TAG_PROGRAM_AUTHOR	Darren Hiebert	/dhiebert at users.sourceforge.net/
+!_TAG_PROGRAM_NAME	Exuberant Ctags	//
+!_TAG_PROGRAM_URL	http://ctags.sourceforge.net	/official site/
+!_TAG_PROGRAM_VERSION	5.8	//
+ALN_CIGAR_MAX_LEN	aln_cigar.h	26;"	d
+ALN_CIGAR_TYPE_CLIP1	aln_cigar.h	33;"	d
+ALN_CIGAR_TYPE_CLIP2	aln_cigar.h	34;"	d
+ALN_CIGAR_TYPE_DEL	aln_cigar.h	31;"	d
+ALN_CIGAR_TYPE_INS	aln_cigar.h	30;"	d
+ALN_CIGAR_TYPE_MAT	aln_cigar.h	29;"	d
+ALN_CIGAR_TYPE_NULL	aln_cigar.h	28;"	d
+ALN_CIGAR_TYPE_SKIP	aln_cigar.h	32;"	d
+ASM_KMER_MASK	asm_R2.h	32;"	d
+ASM_KMER_SIZE	asm_R2.h	31;"	d
+ASM_R2_H	asm_R2.h	20;"	d
+AlnAln	stdaln.h	/^} AlnAln;$/;"	t	typeref:struct:__anon16
+AlnCigar	aln_cigar.h	/^} AlnCigar;$/;"	t	typeref:struct:__anon56
+AlnParam	stdaln.h	/^} AlnParam;$/;"	t	typeref:struct:__anon14
+BaseCnt	divide.c	/^} BaseCnt;$/;"	t	typeref:struct:__anon55	file:
+BitVec	bitvec.h	/^} BitVec;$/;"	t	typeref:struct:__anon54
+Block	mergetag.c	/^} Block;$/;"	t	typeref:struct:__anon13	file:
+BloomFilter	bloom_filter.h	/^} BloomFilter;$/;"	t	typeref:struct:__anon34
+BufferedInputFile	file_reader.h	/^} BufferedInputFile;$/;"	t	typeref:struct:__anon33
+CC	Makefile	/^CC=gcc$/;"	m
+CFLAGS	Makefile	/^CFLAGS= -W -O2 -Wall -finline-functions -D_FILE_OFFSET_BITS=64$/;"	m
+C_N_2	divide.c	/^static inline uint32_t C_N_2(uint32_t n){$/;"	f	file:
+Cluster	rainbow.h	/^} Cluster;$/;"	t	typeref:struct:__anon20
+Ctg	mergecontig.h	/^} Ctg;$/;"	t	typeref:struct:__anon45
+CtgDB	mergecontig.h	/^} CtgDB;$/;"	t	typeref:struct:__anon46
+D	stdaln.c	/^	int M, I, D;$/;"	m	struct:__anon10	file:
+DELETE	ezmsim.c	/^enum muttype_t {NOCHANGE = 0, INSERT = 0x1000, SUBSTITUTE = 0xe000, DELETE = 0xf000};$/;"	e	enum:muttype_t	file:
+DEPTH	ezmsim.c	/^static double DEPTH = 10.0;$/;"	v	file:
+Div	rainbow.h	/^} Div;$/;"	t	typeref:struct:__anon23
+Dt	stdaln.c	/^	unsigned char Mt:3, It:2, Dt:2;$/;"	m	struct:__anon9	file:
+EF	asm_R2.h	/^} EF;$/;"	t	typeref:struct:__anon40
+EF_main	ezmsim.c	/^int EF_main(int argc, char *argv[])$/;"	f
+EF_usage	ezmsim.c	/^int EF_usage()$/;"	f
+ERR_RATE	ezmsim.c	/^static double ERR_RATE = 0.02;$/;"	v	file:
+FASTA_FLAG_NORMAL	file_reader.h	101;"	d
+FASTA_FLAG_NO_NAME	file_reader.h	102;"	d
+FASTA_FLAG_NO_SEQ	file_reader.h	103;"	d
+FASTQ_FLAG_NORMAL	file_reader.h	109;"	d
+FASTQ_FLAG_NO_NAME	file_reader.h	110;"	d
+FASTQ_FLAG_NO_QUAL	file_reader.h	112;"	d
+FASTQ_FLAG_NO_SEQ	file_reader.h	111;"	d
+FContig	asm_R2.h	/^} FContig;$/;"	t	typeref:struct:__anon38
+FROM_D	stdaln.h	39;"	d
+FROM_I	stdaln.h	38;"	d
+FROM_M	stdaln.h	37;"	d
+FRead	asm_R2.h	/^} FRead;$/;"	t	typeref:struct:__anon37
+FileReader	file_reader.h	/^} FileReader;$/;"	t	typeref:struct:__anon31
+GENERIC_SRC	Makefile	/^GENERIC_SRC= string.h bitvec.h file_reader.h hashset.h sort.h list.h dna.h heap.h stdaln.h vector.h$/;"	m
+GLIBS	Makefile	/^GLIBS=-lm$/;"	m
+Gpush_vec	vector.h	112;"	d
+HASH_FLAG_MACROS	hashset.h	53;"	d
+HOM_RATE	ezmsim.c	/^static double HOM_RATE = 0.0;$/;"	v	file:
+Heap	heap.h	/^} Heap;$/;"	t	typeref:struct:__anon11
+I	stdaln.c	/^	int M, I, D;$/;"	m	struct:__anon10	file:
+INDEL_EXTEND	ezmsim.c	/^static double INDEL_EXTEND = 0.3;$/;"	v	file:
+INDEL_FRAC	ezmsim.c	/^static double INDEL_FRAC = 0.1;$/;"	v	file:
+INIT_IDX	ezmsim.c	52;"	d	file:
+INIT_SEQ	ezmsim.c	51;"	d	file:
+INSERT	ezmsim.c	/^enum muttype_t {NOCHANGE = 0, INSERT = 0x1000, SUBSTITUTE = 0xe000, DELETE = 0xf000};$/;"	e	enum:muttype_t	file:
+It	stdaln.c	/^	unsigned char Mt:3, It:2, Dt:2;$/;"	m	struct:__anon9	file:
+KMER_NUM	rainbow.h	/^	uint32_t KMER_NUM;$/;"	m	struct:__anon20
+KMER_SIZE	rainbow.h	/^	uint32_t KMER_SIZE;$/;"	m	struct:__anon20
+KMER_SIZE_CTG	mergecontig.h	13;"	d
+K_allele	rainbow.h	/^	uint32_t k_allele, K_allele;$/;"	m	struct:__anon23
+LH3_STDALN_H_	stdaln.h	24;"	d
+LOCAL_OVERFLOW_REDUCE	stdaln.c	233;"	d	file:
+LOCAL_OVERFLOW_THRESHOLD	stdaln.c	232;"	d	file:
+LR_main	ezmsim.c	/^int LR_main(int argc, char *argv[])$/;"	f
+LR_usage	ezmsim.c	/^int LR_usage() {$/;"	f
+M	stdaln.c	/^	int M, I, D;$/;"	m	struct:__anon10	file:
+MAX_RD_LEN	asm_R2.h	30;"	d
+MERGECTG_H	mergectg.h	2;"	d
+MINOR_INF	stdaln.h	42;"	d
+MUT_RATE	ezmsim.c	/^static double MUT_RATE = 0.001;$/;"	v	file:
+MYALLOC	stdaln.h	31;"	d
+MYFREE	stdaln.h	34;"	d
+Mt	stdaln.c	/^	unsigned char Mt:3, It:2, Dt:2;$/;"	m	struct:__anon9	file:
+MurmurHash64A	hashset.h	/^static inline uint64_t MurmurHash64A(const void * key, int len, uint32_t seed){$/;"	f
+NOCHANGE	ezmsim.c	/^enum muttype_t {NOCHANGE = 0, INSERT = 0x1000, SUBSTITUTE = 0xe000, DELETE = 0xf000};$/;"	e	enum:muttype_t	file:
+NT_LOCAL_MASK	stdaln.c	236;"	d	file:
+NT_LOCAL_SCORE	stdaln.c	234;"	d	file:
+NT_LOCAL_SHIFT	stdaln.c	235;"	d	file:
+ONES_STEP_4	bitvec.h	92;"	d
+ONES_STEP_8	bitvec.h	93;"	d
+Overlap	asm_R2.h	/^} Overlap;$/;"	t	typeref:struct:__anon39
+PACKAGE_VERSION	ezmsim.c	11;"	d	file:
+PWDB	mergecontig.h	/^} PWDB;$/;"	t	typeref:struct:__anon48
+PWcontig	mergecontig.h	/^} PWcontig;$/;"	t	typeref:struct:__anon47
+RD_KMER_SIZE	mergectg.h	/^	uint32_t RD_KMER_SIZE; \/\/ parameter$/;"	m	struct:__anon8
+REC	mergetag.c	/^} REC;$/;"	t	typeref:struct:__anon12	file:
+ReadInfo	rainbow.h	/^} ReadInfo;$/;"	t	typeref:struct:__anon21
+SBT	rainbow.h	/^} SBT;$/;"	t	typeref:struct:__anon19
+SEQ_BLOCK_SIZE	ezmsim.c	/^static int SEQ_BLOCK_SIZE = 512;$/;"	v	file:
+SET_INF	stdaln.c	238;"	d	file:
+STDALN_VERSION	stdaln.h	27;"	d
+SUBSTITUTE	ezmsim.c	/^enum muttype_t {NOCHANGE = 0, INSERT = 0x1000, SUBSTITUTE = 0xe000, DELETE = 0xf000};$/;"	e	enum:muttype_t	file:
+SWAP_TMP	string.h	34;"	d
+SeqDB	rainbow.h	/^} SeqDB;$/;"	t	typeref:struct:__anon18
+SeqFileAttr	file_reader.h	/^} SeqFileAttr;$/;"	t	typeref:struct:__anon32
+Sequence	file_reader.h	/^} Sequence;$/;"	t	typeref:struct:__anon29
+SimpAssembler	simp_asm.h	/^} SimpAssembler;$/;"	t	typeref:struct:__anon51
+SimpContigInfo	simp_asm.h	/^} SimpContigInfo;$/;"	t	typeref:struct:__anon50
+SimpSeqInfo	simp_asm.h	/^} SimpSeqInfo;$/;"	t	typeref:struct:__anon49
+String	string.h	/^} String;$/;"	t	typeref:struct:__anon52
+Vector	vector.h	/^typedef struct Vector {$/;"	s
+Vector	vector.h	/^} Vector;$/;"	t	typeref:struct:Vector
+VirtualString	string.h	/^} VirtualString;$/;"	t	typeref:struct:__anon53
+__ALN_CIGAR_RJ_H	aln_cigar.h	21;"	d
+__BIT_VEC_RJ_H	bitvec.h	21;"	d
+__BLOOM_FILTER_RJ_H	bloom_filter.h	22;"	d
+__DNA_RJ_H	dna.h	21;"	d
+__FILE_READER_RJ_H	file_reader.h	21;"	d
+__HASH_SET_RJ	hashset.h	21;"	d
+__HEAP_RJ_H	heap.h	21;"	d
+__LIST_RJ_H	list.h	21;"	d
+__MERGECONTIG_H	mergecontig.h	2;"	d
+__RAINBOW_RJ_H	rainbow.h	21;"	d
+__SIMPLE_ASM_RJ_H	simp_asm.h	21;"	d
+__SORT_RJ_H	sort.h	21;"	d
+__STRING_RJ_H	string.h	21;"	d
+__VECTOR_H_RJ	vector.h	21;"	d
+__lh3_Jenkins_hash_64	hashset.h	/^static inline uint64_t __lh3_Jenkins_hash_64(uint64_t key){$/;"	f
+__lh3_Jenkins_hash_int	hashset.h	/^static inline uint32_t __lh3_Jenkins_hash_int(uint32_t key){$/;"	f
+__string_hashcode	hashset.h	/^static inline uint32_t __string_hashcode(const char *s){$/;"	f
+_aln_cigar_add_cigar	aln_cigar.h	/^static inline int _aln_cigar_add_cigar(AlnCigar *cs, int n_cigar, int len, int type){$/;"	f
+_aln_cigar_h_num_str_len	aln_cigar.h	/^static inline int _aln_cigar_h_num_str_len(int n){$/;"	f
+_call_key_col	divide.c	/^uint32_t _call_key_col(Div *div, uint32_t gid){$/;"	f
+_rj_hashset_find_prime	hashset.h	/^static inline uint64_t _rj_hashset_find_prime(uint64_t n){$/;"	f
+add_char_string	string.h	/^static inline void add_char_string(String *str, char ch){$/;"	f
+add_hashset_macro	hashset.h	172;"	d
+add_read2ef	asm_R2.c	/^void add_read2ef(EF *ef, char *seq, uint32_t seq_id, uint32_t rd_len, uint32_t rank){ add_read2ef_core(ef, seq, seq_id, rd_len, (rank == 0)? 1 : rank); }$/;"	f
+add_read2ef_core	asm_R2.c	/^void add_read2ef_core(EF *ef, char *seq, uint32_t seq_id, uint32_t rd_len, uint32_t rank){$/;"	f
+add_vec_size	vector.h	/^static inline int add_vec_size(Vector *vec, size_t add_size){$/;"	f
+align_reads_ef	asm_R2.c	/^void align_reads_ef(EF *ef){$/;"	f
+aln_aa_rev_table	stdaln.c	/^char *aln_aa_rev_table = "ARNDCQEGHILKMFPSTWYV*X-";$/;"	v
+aln_aa_table	stdaln.c	/^unsigned char aln_aa_table[256] = {$/;"	v
+aln_cigar_string	aln_cigar.h	/^static const char aln_cigar_string[8] = "?IDM?SHN";$/;"	v
+aln_cmp	mergecontig.c	/^int aln_cmp(const void *p0, const void *p1, void *ref) {$/;"	f
+aln_ext_size	simp_asm.h	/^	int      aln_ext_size;$/;"	m	struct:__anon51
+aln_free_AlnAln	stdaln.c	/^void aln_free_AlnAln(AlnAln *aa)$/;"	f
+aln_global_core	stdaln.c	/^int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,$/;"	f
+aln_init_AlnAln	stdaln.c	/^AlnAln *aln_init_AlnAln()$/;"	f
+aln_init_score_array	stdaln.c	/^void aln_init_score_array(unsigned char *seq, int len, int row, int *score_matrix, int **s_array)$/;"	f
+aln_local_core	stdaln.c	/^int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,$/;"	f
+aln_nt16_rev_table	stdaln.c	/^char *aln_nt16_rev_table = "XAGRCMSVTWKDYHBN-";$/;"	v
+aln_nt16_table	mergecontig.h	/^static unsigned char aln_nt16_table[256] = {$/;"	v
+aln_nt16_table	stdaln.c	/^unsigned char aln_nt16_table[256] = {$/;"	v
+aln_nt4_rev_table	stdaln.c	/^char *aln_nt4_rev_table = "AGCTN-";$/;"	v
+aln_nt4_table	stdaln.c	/^unsigned char aln_nt4_table[256] = {$/;"	v
+aln_param_aa2aa	stdaln.c	/^AlnParam aln_param_aa2aa   = { 12,  2,  2, aln_sm_blosum62, 22, 50 };$/;"	v
+aln_param_nt2nt	stdaln.c	/^AlnParam aln_param_nt2nt   = { 10,  2,  2, aln_sm_nt, 16, 75 };$/;"	v
+aln_param_rd2rd	stdaln.c	/^AlnParam aln_param_rd2rd   = { 20, 19, 19, aln_sm_read, 16, 75 };$/;"	v
+aln_sm_blosum45	stdaln.c	/^int aln_sm_blosum45[] = {$/;"	v
+aln_sm_blosum62	stdaln.c	/^int aln_sm_blosum62[] = {$/;"	v
+aln_sm_hs	stdaln.c	/^int aln_sm_hs[] = {$/;"	v
+aln_sm_nt	stdaln.c	/^int aln_sm_nt[] = {$/;"	v
+aln_sm_read	stdaln.c	/^int aln_sm_read[] = {$/;"	v
+aln_stdaln	stdaln.c	/^AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int is_global, int do_align)$/;"	f
+aln_stdaln_aux	stdaln.c	/^AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,$/;"	f
+aln_str	mergecontig.h	/^static inline void aln_str(char *s1, char *s2, int *mm, int *mn, int *score) {$/;"	f
+aln_trans_table_eu	stdaln.c	/^unsigned char aln_trans_table_eu[66] = {$/;"	v
+aln_trans_table_eu_char	stdaln.c	/^char *aln_trans_table_eu_char = "KKNNRRSSTTTTIMIIEEDDGGGGAAAAVVVVQQHHRRRRPPPPLLLL**YY*WCCSSSSLLFFX";$/;"	v
+alning_core	cluster.c	/^uint32_t alning_core(Cluster *cluster){$/;"	f
+ap	simp_asm.h	/^	AlnParam ap;$/;"	m	struct:__anon51
+append_cigars	aln_cigar.h	/^static inline int append_cigars(AlnCigar *cs, int n, int type, int len){$/;"	f
+append_string	string.h	/^static inline void append_string(String *str, char *src, int offlen){$/;"	f
+apply_array	sort.h	92;"	d
+apply_cigars	aln_cigar.h	/^static inline int apply_cigars(AlnCigar *dst, AlnCigar *c1, int n1, AlnCigar *c2, int n2){$/;"	f
+as_string	string.h	/^static inline String* as_string(char *chs){$/;"	f
+asm_ef	asm_R2.c	/^uint32_t asm_ef(FileReader *in, FILE *out, uint32_t min_ol, float min_sm, uint32_t min_read, uint32_t max_read){$/;"	f
+asm_ef_ctgs	asm_R2.c	/^void asm_ef_ctgs(EF *ef){$/;"	f
+avg_seq_len	file_reader.h	/^	int avg_seq_len;$/;"	m	struct:__anon32
+band_width	stdaln.h	/^	int band_width;$/;"	m	struct:__anon14
+base	divide.c	/^	uint32_t base;$/;"	m	struct:__anon55	file:
+base	rainbow.h	/^	uint32_t col, cnt, base;$/;"	m	struct:__anon22
+base_bit4_table	dna.h	/^static const uint8_t base_bit4_table[256] = {$/;"	v
+base_bit_table	dna.h	/^static const uint8_t base_bit_table[256] = {$/;"	v
+beg_seq2kmers	dna.h	107;"	d
+beg_seq2revkmers	dna.h	114;"	d
+begin_iter_bitvec	bitvec.h	/^static inline void begin_iter_bitvec(BitVec *bitv){ bitv->iter_idx = 0; }$/;"	f
+begin_iter_simpasm	simp_asm.h	/^static inline void begin_iter_simpasm(SimpAssembler *sa){ sa->iter_idx = 0; }$/;"	f
+bit2bits	dna.h	144;"	d
+bit4_base_table	dna.h	/^static const char bit4_base_table[16] = "-ACMGRSVTWYHKDBN";$/;"	v
+bit4_bit_table	dna.h	/^static const uint8_t bit4_bit_table[16] = { 4, 0, 1, 4,  2, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4 };$/;"	v
+bit_base_table	dna.h	/^static const char bit_base_table[6] = "ACGTN-";$/;"	v
+bits	bitvec.h	/^	uint64_t *bits;$/;"	m	struct:__anon54
+bits	bloom_filter.h	/^	BitVec *bits;$/;"	m	struct:__anon34
+bits2bit	dna.h	165;"	d
+bits2revseq	dna.h	/^static inline void bits2revseq(char *seq, uint64_t *bits, uint64_t off, uint32_t len){$/;"	f
+bits2seq	dna.h	/^static inline void bits2seq(char *seq, uint64_t *bits, uint64_t off, uint32_t len){$/;"	f
+bloom_filter_total_seeds	bloom_filter.h	/^static const uint32_t bloom_filter_total_seeds = 20;$/;"	v
+bsearch_vec	vector.h	/^static inline void* bsearch_vec(Vector *vec, void *q, cmp_vec_fun fun){$/;"	f
+bt	rainbow.h	/^	uint32_t bt;$/;"	m	struct:__anon19
+bts	rainbow.h	/^	u32list *bts;$/;"	m	struct:__anon20
+buf_cap	file_reader.h	/^	int buf_off, buf_size, buf_cap;$/;"	m	struct:__anon33
+buf_off	file_reader.h	/^	int buf_off, buf_size, buf_cap;$/;"	m	struct:__anon33
+buf_size	file_reader.h	/^	int buf_off, buf_size, buf_cap;$/;"	m	struct:__anon33
+buffer	file_reader.h	/^	char *buffer;$/;"	m	struct:__anon31
+buffer	file_reader.h	/^	void *buffer;$/;"	m	struct:__anon33
+buffer	vector.h	/^	void *buffer;$/;"	m	struct:Vector
+build_tree	mergectg.c	/^void build_tree(merge_t *merger) {$/;"	f
+cache	mergectg.h	/^	contigsv *cache;$/;"	m	struct:__anon8
+cache	rainbow.h	/^	u32slist *grps, *cache;$/;"	m	struct:__anon23
+cal_2seq_mm_core	cluster.c	/^uint8_t cal_2seq_mm_core(uint64_t *seq1, uint64_t *seq2, uint8_t len1, uint8_t len2){$/;"	f
+cal_mm	mergetag.c	/^uint32_t cal_mm(String *seqs, Block *b1, Block *b2){$/;"	f
+call_key_col	divide.c	/^uint32_t call_key_col(Div *div, uint32_t gid){$/;"	f
+cap	vector.h	/^	size_t cap;$/;"	m	struct:Vector
+capacity	file_reader.h	/^	int capacity;$/;"	m	struct:__anon31
+capacity	string.h	/^	int capacity;$/;"	m	struct:__anon52
+cat_cigars	aln_cigar.h	/^static inline int cat_cigars(AlnCigar *cigars1, int n_cigar1, AlnCigar *cigars2, int n_cigar2){$/;"	f
+cat_vec	vector.h	/^static inline int cat_vec(Vector *dst, Vector *src){$/;"	f
+catstr	string.h	/^static inline char* catstr(int n_str, ...){$/;"	f
+cbs	rainbow.h	/^	cbv *cbs;$/;"	m	struct:__anon23
+change_seeds_bloomfilter	bloom_filter.h	/^static inline void change_seeds_bloomfilter(BloomFilter *bf){ bf->seed_off = (bf->seed_off + bf->n_seed) % bloom_filter_total_seeds; }$/;"	f
+chash_code	hashset.h	499;"	d
+chash_equals	hashset.h	500;"	d
+chomp_string	string.h	/^static inline void chomp_string(String *str){$/;"	f
+cid	mergectg.h	/^	uint32_t cid; \/\/$/;"	m	struct:__anon8
+cid	mergetag.c	/^	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;$/;"	m	struct:__anon12	file:
+cigars2string	aln_cigar.h	/^static inline char* cigars2string(AlnCigar *cigars, int n_cigar, char *str){$/;"	f
+cigars_lengths	aln_cigar.h	/^static inline void cigars_lengths(AlnCigar *cigars, int n_cigar, int *aln_size, int *seq1_size, int *seq2_size){$/;"	f
+cigars_seq2aln	aln_cigar.h	/^static inline int cigars_seq2aln(char *dst, AlnCigar *c, int n, int seq_idx, char *seq){$/;"	f
+clear_bitvec	bitvec.h	/^static inline void clear_bitvec(BitVec *bitv){ bitv->n_bit = 0; }$/;"	f
+clear_bloomfilter	bloom_filter.h	/^static inline void clear_bloomfilter(BloomFilter *bf){ zeros_bitvec(bf->bits); }$/;"	f
+clear_ctg	mergectg.c	/^void clear_ctg(contig_t *ctg) {$/;"	f
+clear_entity_del	hashset.h	60;"	d
+clear_entity_null	hashset.h	59;"	d
+clear_hashset_macro	hashset.h	265;"	d
+clear_heap	heap.h	/^static inline void clear_heap(Heap *heap){ clear_rjheapv(heap->ptrs); }$/;"	f
+clear_string	string.h	/^static inline void clear_string(String *str){ str->size = 0; }$/;"	f
+clear_vec	vector.h	217;"	d
+clone_string	string.h	/^static inline String* clone_string(String *str){ $/;"	f
+close_bif	file_reader.h	/^static inline void close_bif(BufferedInputFile *bif){$/;"	f
+closed	asm_R2.h	/^	uint32_t len:31, closed:1;$/;"	m	struct:__anon38
+closed	mergectg.h	/^	int closed;$/;"	m	struct:__anon3
+closed	simp_asm.h	/^	uint32_t len:31, closed:1;$/;"	m	struct:__anon50
+cls_id	mergecontig.h	/^	uint32_t cls_id;$/;"	m	struct:__anon45
+cluster_invoker	main.c	/^int cluster_invoker(int argc, char **argv){$/;"	f
+clustering	cluster.c	/^void clustering(Cluster *cluster, FileReader *fr2, int is_fq2, int fix_rd_len, FILE *out){$/;"	f
+clustering_ctg	mergecontig.c	/^PWDB* clustering_ctg(PWDB *db, uint32_t min_overlap, float het) {$/;"	f
+cmp	heap.h	/^	heap_comp_func cmp;$/;"	m	struct:__anon11
+cmp_2nums_proc	sort.h	26;"	d
+cmp_ctg_clsid	mergecontig.c	/^int cmp_ctg_clsid(const void *p0, const void *p1) {$/;"	f
+cmp_ids	mergecontig.h	/^static inline int cmp_ids(const void *e1, const void *e2) {$/;"	f
+cmp_kmer_pos	mergecontig.h	/^static inline int cmp_kmer_pos (const void *e1, const void *e2) {$/;"	f
+cmp_ol_func	asm_R2.c	/^int cmp_ol_func(const void *e1, const void *e2){$/;"	f
+cmp_sbt	cluster.c	/^inline int cmp_sbt(const void *e1, const void *e2){$/;"	f
+cmp_sr_alnhit	simp_asm.h	/^static inline int cmp_sr_alnhit(const void *e1, const void *e2){$/;"	f
+cmp_vec_fun	vector.h	/^typedef int (*cmp_vec_fun)(const void *k1, const void *k2);$/;"	t
+cns_len	mergetag.c	/^	uint32_t cns_off, cns_len;$/;"	m	struct:__anon13	file:
+cns_off	mergetag.c	/^	uint32_t cns_off, cns_len;$/;"	m	struct:__anon13	file:
+cnt	divide.c	/^	uint32_t cnt;$/;"	m	struct:__anon55	file:
+cnt	rainbow.h	/^	uint32_t col, cnt, base;$/;"	m	struct:__anon22
+col	rainbow.h	/^	uint32_t col, cnt, base;$/;"	m	struct:__anon22
+col_base_t	rainbow.h	/^} col_base_t;$/;"	t	typeref:struct:__anon22
+comment	file_reader.h	/^	String comment;$/;"	m	struct:__anon29
+compile_cigars	aln_cigar.h	/^static inline int compile_cigars(AlnCigar *dst, AlnCigar *c1, int n1, AlnCigar *c2, int n2, int seq_idx){$/;"	f
+consensus	mergetag.c	/^void consensus(recv *divs, String *seqs, uint32_t beg, uint32_t end, uint32_t *cns_off, uint32_t *cns_len){$/;"	f
+contig_code	mergectg.h	46;"	d
+contig_eq	mergectg.h	47;"	d
+contig_seq_t	mergectg.h	/^} contig_seq_t;$/;"	t	typeref:struct:__anon4
+contig_t	mergectg.h	/^} contig_t; $/;"	t	typeref:struct:__anon3
+count_hashset_macro	hashset.h	263;"	d
+count_heap	heap.h	/^static inline size_t count_heap(Heap *heap){ return count_rjheapv(heap->ptrs); }$/;"	f
+count_ones_bit32	bitvec.h	/^static inline uint32_t count_ones_bit32(uint32_t v){$/;"	f
+count_ones_bit64	bitvec.h	/^static inline int count_ones_bit64(const uint64_t x){$/;"	f
+ctg_dir	simp_asm.h	/^	uint32_t ctg_id, ctg_dir:1, ctg_off:20, used:1, rank:10;$/;"	m	struct:__anon49
+ctg_id	asm_R2.h	/^	uint32_t ctg_id:24, ctg_off:19, used:1;$/;"	m	struct:__anon37
+ctg_id	simp_asm.h	/^	uint32_t ctg_id, ctg_dir:1, ctg_off:20, used:1, rank:10;$/;"	m	struct:__anon49
+ctg_kmer_t	mergectg.h	/^} ctg_kmer_t;$/;"	t	typeref:struct:__anon5
+ctg_off	asm_R2.h	/^	uint32_t ctg_id:24, ctg_off:19, used:1;$/;"	m	struct:__anon37
+ctg_off	simp_asm.h	/^	uint32_t ctg_id, ctg_dir:1, ctg_off:20, used:1, rank:10;$/;"	m	struct:__anon49
+ctgnum	mergecontig.h	/^	uint32_t ctgnum;$/;"	m	struct:__anon46
+ctgs	asm_R2.h	/^	Vector   *ctgs;$/;"	m	struct:__anon40
+ctgs	mergecontig.h	/^	ctglist *ctgs;$/;"	m	struct:__anon46
+ctgs	mergectg.h	/^	contigv *ctgs;$/;"	m	struct:__anon8
+ctgs	simp_asm.h	/^	ctgv     *ctgs;$/;"	m	struct:__anon51
+ctgv	mergecontig.h	/^	ctglist *ctgv;$/;"	m	struct:__anon48
+ctype	stdaln.h	/^	unsigned char ctype;$/;"	m	struct:__anon15
+cuhash_code	hashset.h	509;"	d
+cuhash_equals	hashset.h	510;"	d
+cuhash_t	hashset.h	/^typedef struct { char *key; uint32_t val; } cuhash_t;$/;"	t	typeref:struct:__anon28
+define_apply_array	sort.h	230;"	d
+define_bubble_sort	sort.h	29;"	d
+define_hashset	hashset.h	371;"	d
+define_list	list.h	214;"	d
+define_list_core	list.h	39;"	d
+define_list_ext	list.h	166;"	d
+define_merge	sort.h	153;"	d
+define_native_list	list.h	218;"	d
+define_quick_sort	sort.h	114;"	d
+define_reverse_array	sort.h	217;"	d
+define_revsere_vec	vector.h	177;"	d
+define_search_array	sort.h	241;"	d
+define_unique_merge	sort.h	202;"	d
+delimiter	file_reader.h	/^	char delimiter;$/;"	m	struct:__anon31
+deps	rainbow.h	/^	u32list *deps;$/;"	m	struct:__anon23
+destroy_tree	mergectg.c	/^void destroy_tree(pathtree_t *t) {$/;"	f
+div_invoker	main.c	/^int div_invoker(int argc, char **argv){$/;"	f
+div_reads	divide.c	/^uint32_t div_reads(Div *div, FileReader *fr, FILE *out){$/;"	f
+dividing	divide.c	/^void dividing(Div *div, uint32_t old_gid, FILE *out){$/;"	f
+dividing_core	divide.c	/^void dividing_core(Div *div, uint32_t gid, int dep){$/;"	f
+dna_rev_seq	dna.h	/^static inline uint64_t dna_rev_seq(uint64_t seq, uint8_t seq_size){$/;"	f
+dna_xor2ones	dna.h	/^static inline uint64_t dna_xor2ones(uint64_t seq){$/;"	f
+dpcell_t	stdaln.c	/^} dpcell_t;$/;"	t	typeref:struct:__anon9	file:
+dpscore_t	stdaln.c	/^} dpscore_t;$/;"	t	typeref:struct:__anon10	file:
+dump_hashset_macro	hashset.h	277;"	d
+dump_vec	vector.h	/^static inline size_t dump_vec(Vector *vec, FILE *out){$/;"	f
+e_size	vector.h	/^	unsigned int e_size;$/;"	m	struct:Vector
+ef	mergectg.h	/^	EF *ef;$/;"	m	struct:__anon8
+ef_id	asm_R2.h	/^	uint32_t ef_id;$/;"	m	struct:__anon40
+ef_usage	asm_R2.c	/^int ef_usage(){$/;"	f
+encap_bitvec	bitvec.h	/^static inline void encap_bitvec(BitVec *bitv, uint64_t num){$/;"	f
+encap_hashset_macro	hashset.h	317;"	d
+encap_vec	vector.h	/^static inline int encap_vec(Vector *vec, unsigned int add_size){$/;"	f
+end1	stdaln.h	/^	int start1, end1; \/* start and end of the first sequence, coordinations are 1-based *\/$/;"	m	struct:__anon16
+end2	stdaln.h	/^	int start2, end2; \/* start and end of the second sequence, coordinations are 1-based *\/$/;"	m	struct:__anon16
+end_seq2kmers	dna.h	112;"	d
+end_seq2kmers	dna.h	119;"	d
+err_xopen_core	ezmsim.c	/^FILE *err_xopen_core(const char *func, const char *fn, const char *mode)$/;"	f
+eseq	asm_R2.h	/^	char     eseq[MAX_RD_LEN];$/;"	m	struct:__anon40
+exact_limit	rainbow.h	/^	uint32_t exact_limit;$/;"	m	struct:__anon20
+execute_pwaln	mergecontig.c	/^void execute_pwaln(CtgDB *db, uint32_t min_overlap, float het, uint32_t max_nctg) {$/;"	f
+exists_entity	hashset.h	56;"	d
+exists_hashset_macro	hashset.h	153;"	d
+ezmsim_EF_core	ezmsim.c	/^void ezmsim_EF_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, unsigned int size_l, unsigned int size_r, unsigned char *cut, int pos, int distance, int ovlp, int stp, int reverse, int is_hap)$/;"	f
+ezmsim_LR_core	ezmsim.c	/^void ezmsim_LR_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int size_l, int size_r, unsigned char *cut, int pos)$/;"	f
+fclose_filereader	file_reader.c	/^void fclose_filereader(FileReader *fr){$/;"	f
+ffread	hashset.h	275;"	d
+ffwrite	hashset.h	274;"	d
+fidx	file_reader.h	/^	uint32_t fidx;$/;"	m	struct:__anon31
+file	file_reader.h	/^	FILE *file;$/;"	m	struct:__anon30
+file	file_reader.h	/^	FILE *file;$/;"	m	struct:__anon33
+filename	file_reader.h	/^	char *filename;$/;"	m	struct:__anon30
+files	file_reader.h	/^	Vector *files;$/;"	m	struct:__anon31
+find_overlap	asm_R2.c	/^void find_overlap(char *seq1, uint32_t len1, uint32_t off1, char *seq2, uint32_t len2, uint32_t off2, uint32_t *l_ol, uint32_t *r_ol, uint32_t *n_mm){$/;"	f
+flag	mergectg.h	/^	int flag;  \/\/ if == 0 first use, init; else reset$/;"	m	struct:__anon8
+flags	rainbow.h	/^	BitVec  *flags;$/;"	m	struct:__anon20
+flip_bitvec	bitvec.h	/^static inline void flip_bitvec(BitVec *bitv, uint64_t idx){ bitv->bits[idx>>6] ^= 1LLU << (idx&0x3FU); }$/;"	f
+flip_cigars	aln_cigar.h	/^static inline void flip_cigars(AlnCigar *cigars, int n_cigar){$/;"	f
+fopen_filereader	file_reader.c	/^FileReader* fopen_filereader(char *filename){$/;"	f
+fopen_filereader2	file_reader.c	/^FileReader* fopen_filereader2(char *prefix, char *postfix){$/;"	f
+fopen_m_filereader	file_reader.c	/^FileReader* fopen_m_filereader(int n_file, char **filenames){$/;"	f
+fr_file_t	file_reader.h	/^} fr_file_t;$/;"	t	typeref:struct:__anon30
+fr_fread	file_reader.c	/^static inline int fr_fread(void *buf, size_t e_size, size_t size, FILE *in){$/;"	f	file:
+fread_all	file_reader.c	/^char *fread_all(FileReader *fr){$/;"	f
+fread_fasta	file_reader.h	107;"	d
+fread_fasta_adv	file_reader.c	/^int fread_fasta_adv(Sequence **seq_ptr, FileReader *fr, int fasta_flag){$/;"	f
+fread_fastq	file_reader.h	116;"	d
+fread_fastq_adv	file_reader.c	/^int fread_fastq_adv(Sequence **seq_ptr, FileReader *fr, int fastq_flag){$/;"	f
+fread_line	file_reader.c	/^int fread_line(String *line, FileReader *fr){$/;"	f
+fread_line2	file_reader.c	/^int fread_line2(String *line, FileReader *fr){$/;"	f
+fread_table	file_reader.c	/^int fread_table(FileReader *fr){$/;"	f
+free_bitvec	bitvec.h	/^static inline void free_bitvec(BitVec *bitv){$/;"	f
+free_bloomfilter	bloom_filter.h	/^static inline void free_bloomfilter(BloomFilter *bf){$/;"	f
+free_cluster	cluster.c	/^void free_cluster(Cluster *cluster){$/;"	f
+free_ctg	mergectg.c	/^void free_ctg(contig_t *ctg) {$/;"	f
+free_ctgdb	mergecontig.c	/^void free_ctgdb(CtgDB *db) {$/;"	f
+free_ctgs	mergectg.c	/^void free_ctgs(merge_t *merger) {$/;"	f
+free_div	divide.c	/^void free_div(Div *div){$/;"	f
+free_ef	asm_R2.c	/^void free_ef(EF *ef){$/;"	f
+free_hashset_macro	hashset.h	310;"	d
+free_heap	heap.h	/^static inline void free_heap(Heap *heap){ free_rjheapv(heap->ptrs); free(heap); }$/;"	f
+free_load_ctgdb	mergecontig.c	/^void free_load_ctgdb(CtgDB *db) {$/;"	f
+free_merger	mergectg.c	/^void free_merger(merge_t *merger) {$/;"	f
+free_pwdb	mergecontig.c	/^void free_pwdb(PWDB *db) {$/;"	f
+free_sequence	file_reader.h	59;"	d
+free_simpasm	simp_asm.h	/^static inline void free_simpasm(SimpAssembler *sa){$/;"	f
+free_string	string.h	/^static inline void free_string(String *str){ free(str->string); free(str); }$/;"	f
+free_tree	mergectg.c	/^void free_tree(merge_t *merger) {$/;"	f
+free_vec	vector.h	/^static inline void free_vec(Vector *vec){$/;"	f
+froll_back	file_reader.c	/^int froll_back(FileReader *fr){$/;"	f
+gap_end	stdaln.h	/^	int gap_end;$/;"	m	struct:__anon14
+gap_ext	stdaln.h	/^	int gap_ext;$/;"	m	struct:__anon14
+gap_open	stdaln.h	/^	int gap_open;$/;"	m	struct:__anon14
+get_2bit16	bitvec.h	33;"	d
+get_2bit32	bitvec.h	34;"	d
+get_2bit64	bitvec.h	35;"	d
+get_2bit8	bitvec.h	32;"	d
+get_bit16	bitvec.h	28;"	d
+get_bit32	bitvec.h	29;"	d
+get_bit64	bitvec.h	30;"	d
+get_bit8	bitvec.h	27;"	d
+get_bitvec	bitvec.h	/^static inline uint64_t get_bitvec(BitVec *bitv, uint64_t idx){ return (bitv->bits[idx>>6] >> (idx&0x3FU)) & 0x01LLU; }$/;"	f
+get_bloomfilter	bloom_filter.h	/^static inline int  get_bloomfilter(BloomFilter *bf, const void *key, uint32_t len){$/;"	f
+get_col_len	file_reader.h	89;"	d
+get_col_str	file_reader.h	88;"	d
+get_col_vstr	file_reader.h	87;"	d
+get_hashset_macro	hashset.h	94;"	d
+get_last_vec_ref	vector.h	/^static inline void* get_last_vec_ref(Vector *vec){$/;"	f
+get_next_vec_ref	vector.h	/^static inline void* get_next_vec_ref(Vector *vec){$/;"	f
+get_pool_ctg	asm_R2.c	/^FContig* get_pool_ctg(EF *ef){$/;"	f
+get_pool_vec	asm_R2.c	/^Vector* get_pool_vec(EF *ef){$/;"	f
+get_vec	vector.h	/^static inline int get_vec(Vector *vec, size_t idx, void *e){$/;"	f
+get_vec_ref	vector.h	/^static inline void* get_vec_ref(Vector *vec, size_t idx){$/;"	f
+gget_vec	vector.h	133;"	d
+gid	mergetag.c	/^	uint32_t gid, off, len;$/;"	m	struct:__anon13	file:
+gid	mergetag.c	/^	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;$/;"	m	struct:__anon12	file:
+gid_map	rainbow.h	/^	u32list *gid_map;$/;"	m	struct:__anon20
+gidoff	rainbow.h	/^	uint32_t gidoff;$/;"	m	struct:__anon20
+gidoff	rainbow.h	/^	uint32_t gidoff;$/;"	m	struct:__anon23
+gids	rainbow.h	/^	u32list *gids;$/;"	m	struct:__anon20
+gids	rainbow.h	/^	u32list *gids;$/;"	m	struct:__anon23
+gpeer_vec	vector.h	139;"	d
+gpop_vec	vector.h	120;"	d
+gpush_vec	vector.h	111;"	d
+grps	rainbow.h	/^	u32slist *grps, *cache;$/;"	m	struct:__anon23
+gset_vec	vector.h	126;"	d
+guess_seq_file	file_reader.c	/^void guess_seq_file(FileReader *fr, SeqFileAttr *attr){$/;"	f
+guess_seq_file_type	file_reader.c	/^int guess_seq_file_type(FileReader *fr){$/;"	f
+hash64shift	hashset.h	/^static inline uint64_t hash64shift(uint64_t key){$/;"	f
+heap_comp_func	heap.h	/^typedef int (*heap_comp_func)(const void *e1, const void *e2, void *ref);$/;"	t
+het	mergecontig.h	/^	float het;$/;"	m	struct:__anon47
+het	mergectg.h	/^	float het; \/\/ parameter$/;"	m	struct:__anon8
+hp	mergecontig.h	/^	Heap *hp;$/;"	m	struct:__anon48
+i	stdaln.h	/^	int i, j;$/;"	m	struct:__anon15
+i32hash_code	hashset.h	495;"	d
+i32hash_equals	hashset.h	496;"	d
+id	mergecontig.h	/^	uint32_t id;$/;"	m	struct:__anon43
+id	mergecontig.h	/^	uint32_t id;$/;"	m	struct:__anon45
+id	mergectg.h	/^	uint32_t id; \/\/ which contig$/;"	m	struct:__anon5
+id	mergectg.h	/^	uint32_t id;$/;"	m	struct:__anon3
+id	mergectg.h	/^	uint32_t id;$/;"	m	struct:__anon4
+id0	mergecontig.h	/^	uint32_t id0;$/;"	m	struct:__anon47
+id1	mergecontig.h	/^	uint32_t id1;$/;"	m	struct:__anon47
+id_tt	mergecontig.h	/^} id_tt;$/;"	t	typeref:struct:__anon43
+idx	ezmsim.c	/^	uint64_t *idx;$/;"	m	struct:__anon26	file:
+idx_t	ezmsim.c	/^} idx_t;$/;"	t	typeref:struct:__anon26	file:
+idxs	rainbow.h	/^	uint32_t idxs[2];$/;"	m	struct:__anon20
+inc_tag	asm_R2.h	/^	uint32_t inc_tag;$/;"	m	struct:__anon40
+index	asm_R2.h	/^	rhash    *index;$/;"	m	struct:__anon40
+index	mergectg.h	/^	rdkhash *index;$/;"	m	struct:__anon3
+index	rainbow.h	/^	khash *index;$/;"	m	struct:__anon20
+index_bitvec	bitvec.h	/^static inline void index_bitvec(BitVec *bitv){$/;"	f
+index_rds	mergectg.c	/^void index_rds(merge_t *merger, contig_t *ctg) {$/;"	f
+indexing_cluster	cluster.c	/^void indexing_cluster(Cluster *cluster, FileReader *fr, int is_fq, int fix_rd_len){$/;"	f
+init_bif	file_reader.h	/^static inline BufferedInputFile* init_bif(FILE *file, int buf_size){$/;"	f
+init_bitvec	bitvec.h	/^static inline BitVec* init_bitvec(uint64_t n_bit){$/;"	f
+init_bloomfilter	bloom_filter.h	/^static inline BloomFilter* init_bloomfilter(size_t size, uint32_t n_seed){$/;"	f
+init_cluster	cluster.c	/^Cluster* init_cluster(uint32_t max_mm, uint32_t exact_limit, uint32_t KMER_SIZE, uint32_t KMER_NUM){$/;"	f
+init_ctgdb	mergecontig.c	/^CtgDB* init_ctgdb(void ) {$/;"	f
+init_delimiters	file_reader.c	/^int* init_delimiters(char *expr){$/;"	f
+init_div	divide.c	/^Div* init_div(uint32_t k_allele, uint32_t K_allele, float min_freq){$/;"	f
+init_ef	asm_R2.c	/^EF* init_ef(uint32_t ef_id, char *eseq, uint32_t rd_len, uint32_t min_ol, float min_sm){$/;"	f
+init_hashset_macro	hashset.h	63;"	d
+init_heap	heap.h	/^static inline Heap* init_heap(heap_comp_func cmp, void *ref){$/;"	f
+init_memvec	vector.h	/^static inline void init_memvec(Vector *vec, unsigned int e_size, unsigned int init_size){$/;"	f
+init_merger	mergectg.c	/^merge_t* init_merger(uint32_t min_kmer, uint32_t min_overlap, float het, uint32_t kmersize, uint32_t max_cluster, uint32_t need_asm) {$/;"	f
+init_simpasm	simp_asm.h	/^static inline SimpAssembler* init_simpasm(uint32_t n_cpu, uint32_t kmer_size, uint32_t rd_len, uint32_t strand, uint32_t min_ol, float min_sm, uint32_t max_mm, int allow_gap){$/;"	f
+init_string	string.h	/^static inline String* init_string(int cap){$/;"	f
+init_vec	vector.h	/^static inline Vector* init_vec(unsigned int e_size, unsigned int init_size){$/;"	f
+is_entity_del	hashset.h	55;"	d
+is_entity_null	hashset.h	54;"	d
+is_fq	file_reader.h	/^	int is_fq;$/;"	m	struct:__anon32
+is_similar_enough	mergectg.c	/^int is_similar_enough(merge_t *merger, contig_t *c1, contig_t *c2) {$/;"	f
+iter_bitvec	bitvec.h	/^static inline uint64_t iter_bitvec(BitVec *bitv){$/;"	f
+iter_hashset_macro	hashset.h	237;"	d
+iter_idx	bitvec.h	/^	uint64_t iter_idx;$/;"	m	struct:__anon54
+iter_idx	simp_asm.h	/^	uint32_t iter_idx;$/;"	m	struct:__anon51
+iter_simpasm	simp_asm.h	/^static inline SimpContigInfo* iter_simpasm(SimpAssembler *sa){$/;"	f
+j	stdaln.h	/^	int i, j;$/;"	m	struct:__anon15
+jenkins_one_at_a_time_hash	hashset.h	/^static inline uint32_t jenkins_one_at_a_time_hash(char *key, size_t len){$/;"	f
+k_allele	rainbow.h	/^	uint32_t k_allele, K_allele;$/;"	m	struct:__anon23
+key	hashset.h	/^typedef struct { char *key; uint32_t val; } cuhash_t;$/;"	m	struct:__anon28
+key	hashset.h	/^typedef struct { uint32_t key, val; } uuhash_t;$/;"	m	struct:__anon27
+key	mergectg.h	/^typedef struct {uint32_t key; uint32_t oldid; char *path;} uuchash_t;$/;"	m	struct:__anon6
+kmer	asm_R2.h	/^	uint32_t kmer:10, rps_idx:22;$/;"	m	struct:__anon36
+kmer	mergecontig.h	/^	uint64_t kmer;$/;"	m	struct:__anon41
+kmer	mergectg.h	/^	uint64_t kmer, kpos;$/;"	m	struct:__anon5
+kmer	mergectg.h	/^	uint64_t kmer:62, kpos:2;$/;"	m	struct:__anon2
+kmer1	rainbow.h	/^	uint32_t kmer1, kmer2, seqid;$/;"	m	struct:__anon17
+kmer2	rainbow.h	/^	uint32_t kmer1, kmer2, seqid;$/;"	m	struct:__anon17
+kmer_code	mergecontig.h	41;"	d
+kmer_code	mergectg.h	72;"	d
+kmer_eq	mergecontig.h	42;"	d
+kmer_eq	mergectg.h	73;"	d
+kmer_equals	rainbow.h	46;"	d
+kmer_hashcode	rainbow.h	45;"	d
+kmer_mask	dna.h	105;"	d
+kmer_pos_t	mergecontig.h	/^} kmer_pos_t;$/;"	t	typeref:struct:__anon42
+kmer_t	rainbow.h	/^} kmer_t;$/;"	t	typeref:struct:__anon17
+kmer_tt	mergecontig.h	/^} kmer_tt;$/;"	t	typeref:struct:__anon41
+kpos	mergecontig.h	/^	uint64_t kpos;$/;"	m	struct:__anon41
+kpos	mergectg.h	/^	uint64_t kmer, kpos;$/;"	m	struct:__anon5
+kpos	mergectg.h	/^	uint64_t kmer:62, kpos:2;$/;"	m	struct:__anon2
+l	ezmsim.c	/^	int l, m; \/* length and maximum buffer size *\/$/;"	m	struct:__anon24	file:
+l	ezmsim.c	/^	int l, m; \/* length and maximum buffer size *\/$/;"	m	struct:__anon25	file:
+l	ezmsim.c	/^	uint64_t l, m;$/;"	m	struct:__anon26	file:
+l_ol	asm_R2.h	/^	uint32_t l_rid:20, r_rid:20, l_ol:8, r_ol:8, n_mm:7, used:1;$/;"	m	struct:__anon39
+l_rid	asm_R2.h	/^	uint32_t l_rid:20, r_rid:20, l_ol:8, r_ol:8, n_mm:7, used:1;$/;"	m	struct:__anon39
+last	mergecontig.h	/^	uint64_t last; \/\/last kmer position$/;"	m	struct:__anon44
+last	mergectg.h	/^	uint64_t last; \/\/last kmer position$/;"	m	struct:__anon7
+last_brk	file_reader.h	/^	int last_brk;$/;"	m	struct:__anon31
+lastoffset	mergecontig.h	/^	uint32_t lastoffset;$/;"	m	struct:__anon42
+lastoffset	mergecontig.h	/^	uint32_t lastoffset;$/;"	m	struct:__anon43
+left	mergectg.h	/^	pathtree_t *left;$/;"	m	struct:pathtree_t
+len	aln_cigar.h	/^	uint16_t len:13, type:3;$/;"	m	struct:__anon56
+len	asm_R2.h	/^	uint32_t len:31, closed:1;$/;"	m	struct:__anon38
+len	mergetag.c	/^	uint32_t gid, off, len;$/;"	m	struct:__anon13	file:
+len	rainbow.h	/^	uint32_t len;$/;"	m	struct:__anon19
+len	simp_asm.h	/^	uint32_t len:31, closed:1;$/;"	m	struct:__anon50
+len	simp_asm.h	/^	uint32_t seqid, len;$/;"	m	struct:__anon49
+len1	mergetag.c	/^	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;$/;"	m	struct:__anon12	file:
+len2	mergetag.c	/^	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;$/;"	m	struct:__anon12	file:
+lend_ctgv_merger	mergectg.c	/^contig_t* lend_ctgv_merger(merge_t *merger) {$/;"	f
+lend_ulist_div	divide.c	/^u32list* lend_ulist_div(Div *div){$/;"	f
+line	file_reader.h	/^	String *line;$/;"	m	struct:__anon31
+line_breaker	file_reader.h	/^	char line_breaker;$/;"	m	struct:__anon31
+link_t	mergecontig.h	/^} link_t;$/;"	t	typeref:struct:__anon44
+link_t	mergectg.h	/^} link_t;$/;"	t	typeref:struct:__anon7
+linking_core	cluster.c	/^uint32_t linking_core(Cluster *cluster, uint32_t seqid, uint64_t *seq, uint32_t seqlen){$/;"	f
+links	rainbow.h	/^	u32list *links;$/;"	m	struct:__anon20
+load_ctgdb	mergecontig.c	/^CtgDB* load_ctgdb(FileReader *fr1, FileReader *fr2) {$/;"	f
+load_hashset_macro	hashset.h	293;"	d
+load_seqdb	cluster.c	/^SeqDB* load_seqdb(FileReader *fr, int is_fq, int fix_rd_len){$/;"	f
+m	ezmsim.c	/^	int l, m; \/* length and maximum buffer size *\/$/;"	m	struct:__anon24	file:
+m	ezmsim.c	/^	int l, m; \/* length and maximum buffer size *\/$/;"	m	struct:__anon25	file:
+m	ezmsim.c	/^	uint64_t l, m;$/;"	m	struct:__anon26	file:
+m_idx	mergectg.h	/^	idxv *m_idx;  \/\/ merged multiple index$/;"	m	struct:__anon3
+m_rds	mergectg.h	/^	u32list *m_rds;  \/\/ merged reads index$/;"	m	struct:__anon3
+main	ezmsim.c	/^int main (int argc, char *argv[])$/;"	f
+main	main.c	/^int main(int argc, char **argv){$/;"	f
+main	mergetag.c	/^int main(int argc, char **argv){$/;"	f
+main	rbasm_main.c	/^int main(int argc, char **argv){$/;"	f
+maq_mut_diref	ezmsim.c	/^void maq_mut_diref(const seq_t *seq, int is_hap, mutseq_t *hap1, mutseq_t *hap2)$/;"	f
+maq_print_mutref	ezmsim.c	/^void maq_print_mutref(const char *name, const seq_t *seq, mutseq_t *hap1, mutseq_t *hap2)$/;"	f
+markers	rainbow.h	/^	u64list *markers[4];$/;"	m	struct:__anon23
+matrix	stdaln.h	/^	int *matrix;$/;"	m	struct:__anon14
+max_cluster	mergectg.h	/^	uint32_t max_cluster; \/\/parameter$/;"	m	struct:__anon8
+max_mm	rainbow.h	/^	uint32_t max_mm;$/;"	m	struct:__anon20
+max_pair_len	rainbow.h	/^	uint32_t max_pair_len;$/;"	m	struct:__anon20
+max_rd_len	rainbow.h	/^	uint8_t  rd_len, max_rd_len;$/;"	m	struct:__anon18
+max_read	mergectg.h	/^	uint32_t max_read; \/\/ parameter for asm$/;"	m	struct:__anon8
+max_seq_len	file_reader.h	/^	int max_seq_len;$/;"	m	struct:__anon32
+max_seqid	rainbow.h	/^	uint32_t max_seqid;$/;"	m	struct:__anon20
+merge_along_tree	mergectg.c	/^void merge_along_tree(merge_t *merger, pathtree_t *tree) {$/;"	f
+merge_core	mergectg.c	/^void merge_core(merge_t *merger) {$/;"	f
+merge_core	mergetag.c	/^void merge_core(recv *divs, String *seqs, uint32_t max_mm, int task, blockv *blocks, FILE *out){$/;"	f
+merge_ctgs	mergectg.c	/^void merge_ctgs(merge_t *merger, FileReader *in, FILE *out) {$/;"	f
+merge_invoker	main.c	/^int merge_invoker(int argc, char **argv) {$/;"	f
+merge_leaves	mergectg.c	/^void merge_leaves(merge_t *merger, uint32_t id1, uint32_t id2) {$/;"	f
+merge_t	mergectg.h	/^} merge_t;$/;"	t	typeref:struct:__anon8
+min_freq	rainbow.h	/^	float min_freq;$/;"	m	struct:__anon23
+min_kmer	mergectg.h	/^	uint32_t min_kmer; \/\/ parameter: # kmers to define two similar contigs$/;"	m	struct:__anon8
+min_ol	asm_R2.h	/^	uint32_t min_ol;$/;"	m	struct:__anon40
+min_ol	mergectg.h	/^	uint32_t min_ol; \/\/parameter for asm$/;"	m	struct:__anon8
+min_overlap	mergectg.h	/^	uint32_t min_overlap; \/\/ parameter$/;"	m	struct:__anon8
+min_read	mergectg.h	/^	uint32_t min_read; \/\/ parameter for asm$/;"	m	struct:__anon8
+min_seq_len	file_reader.h	/^	int min_seq_len;$/;"	m	struct:__anon32
+min_sm	asm_R2.h	/^	float    min_sm;$/;"	m	struct:__anon40
+min_sm	mergectg.h	/^	float min_sm; \/\/ parameter for asm$/;"	m	struct:__anon8
+mut_t	ezmsim.c	/^typedef unsigned short mut_t;$/;"	t	file:
+mutmsk	ezmsim.c	/^static mut_t mutmsk = (mut_t)0xf000;$/;"	v	file:
+mutseq_t	ezmsim.c	/^} mutseq_t;$/;"	t	typeref:struct:__anon25	file:
+muttype_t	ezmsim.c	/^enum muttype_t {NOCHANGE = 0, INSERT = 0x1000, SUBSTITUTE = 0xe000, DELETE = 0xf000};$/;"	g	file:
+n_bit	bitvec.h	/^	uint64_t n_bit;$/;"	m	struct:__anon54
+n_cap	bitvec.h	/^	uint64_t n_cap;$/;"	m	struct:__anon54
+n_col	rainbow.h	/^	uint32_t n_col;$/;"	m	struct:__anon23
+n_mm	asm_R2.h	/^	uint32_t l_rid:20, r_rid:20, l_ol:8, r_ol:8, n_mm:7, used:1;$/;"	m	struct:__anon39
+n_rd	rainbow.h	/^	uint32_t n_rd;$/;"	m	struct:__anon18
+n_seed	bloom_filter.h	/^	uint32_t n_seed, seed_off;$/;"	m	struct:__anon34
+name	file_reader.h	/^	String name;$/;"	m	struct:__anon29
+native_number_cmp	list.h	216;"	d
+need_asm	mergectg.h	/^	uint32_t need_asm; \/\/ parameter$/;"	m	struct:__anon8
+nst_nt4_table	ezmsim.c	/^uint8_t nst_nt4_table[256] = {$/;"	v
+num_cmp_script	sort.h	27;"	d
+num_max	list.h	34;"	d
+num_min	list.h	33;"	d
+off	mergetag.c	/^	uint32_t gid, off, len;$/;"	m	struct:__anon13	file:
+off1	mergetag.c	/^	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;$/;"	m	struct:__anon12	file:
+off2	mergetag.c	/^	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;$/;"	m	struct:__anon12	file:
+offset	mergecontig.h	/^	uint32_t offset;$/;"	m	struct:__anon42
+offset	mergecontig.h	/^	uint32_t offset;$/;"	m	struct:__anon43
+offset	mergecontig.h	/^	uint32_t offset;$/;"	m	struct:__anon44
+offset	mergectg.h	/^	int offset;  \/\/ offset w.r.t. the current contig$/;"	m	struct:__anon5
+offset	mergectg.h	/^	int offset; \/\/ current kmer offset$/;"	m	struct:__anon7
+offset2	mergectg.h	/^	int offset2; \/\/ offset of query contig$/;"	m	struct:__anon5
+olbisearch	mergecontig.c	/^static inline int olbisearch(uint64_t a[], uint64_t q, int i, int j) {$/;"	f	file:
+old_clsid	mergecontig.h	/^	uint32_t old_clsid;$/;"	m	struct:__anon45
+oldid	mergectg.h	/^typedef struct {uint32_t key; uint32_t oldid; char *path;} uuchash_t;$/;"	m	struct:__anon6
+ols	asm_R2.h	/^	Vector   *ols;$/;"	m	struct:__anon40
+ols	simp_asm.h	/^	sr_hitv  *ols;$/;"	m	struct:__anon51
+one2bitvec	bitvec.h	/^static inline void one2bitvec(BitVec *bitv){ encap_bitvec(bitv, 1); one_bitvec(bitv, bitv->n_bit); bitv->n_bit ++; }$/;"	f
+one_bitvec	bitvec.h	/^static inline void one_bitvec(BitVec *bitv, uint64_t idx){ bitv->bits[idx>>6] |= 1LLU << (idx&0x3FU); }$/;"	f
+ones_bitvec	bitvec.h	/^static inline void ones_bitvec(BitVec *bitv){ memset(bitv->bits, 0xFFU, bitv->n_cap \/ 8); }$/;"	f
+open_bif	file_reader.h	/^static inline BufferedInputFile* open_bif(char *filename){$/;"	f
+open_bif2	file_reader.h	/^static inline BufferedInputFile* open_bif2(char *filename, char *suffix){$/;"	f
+open_file_for_append	file_reader.h	/^static inline FILE* open_file_for_append(char *name, char *suffix){$/;"	f
+open_file_for_read	file_reader.h	/^static inline FILE* open_file_for_read(char *name, char *suffix){$/;"	f
+open_file_for_write	file_reader.h	/^static inline FILE* open_file_for_write(char *name, char *suffix){$/;"	f
+out1	stdaln.h	/^	char *out1, *out2; \/* print them, and then you will know *\/$/;"	m	struct:__anon16
+out2	stdaln.h	/^	char *out1, *out2; \/* print them, and then you will know *\/$/;"	m	struct:__anon16
+outm	stdaln.h	/^	char *outm;$/;"	m	struct:__anon16
+output_ef_ctgs	asm_R2.c	/^void output_ef_ctgs(EF *ef, FILE *out){$/;"	f
+overlap	mergecontig.h	/^	uint32_t overlap;$/;"	m	struct:__anon47
+path	mergectg.h	/^	String *path;$/;"	m	struct:__anon3
+path	mergectg.h	/^typedef struct {uint32_t key; uint32_t oldid; char *path;} uuchash_t;$/;"	m	struct:__anon6
+path	stdaln.h	/^	path_t *path; \/* for advanced users... :-) *\/$/;"	m	struct:__anon16
+path_len	stdaln.h	/^	int path_len; \/* for advanced users... :-) *\/$/;"	m	struct:__anon16
+path_t	stdaln.h	/^} path_t;$/;"	t	typeref:struct:__anon15
+pathtree_t	mergectg.h	/^struct pathtree_t {$/;"	s
+pathtree_t	mergectg.h	/^typedef struct pathtree_t pathtree_t;$/;"	t	typeref:struct:pathtree_t
+peer_heap	heap.h	/^static inline void* peer_heap(Heap *heap){ return (count_rjheapv(heap->ptrs)? get_rjheapv(heap->ptrs, 0) : NULL );}$/;"	f
+poisson_num_gen	ezmsim.c	/^int poisson_num_gen(double lamda)$/;"	f
+pool_ctg	asm_R2.h	/^	Vector   *pool_ctg;$/;"	m	struct:__anon40
+pool_vec	asm_R2.h	/^	Vector   *pool_vec;$/;"	m	struct:__anon40
+pop_heap	heap.h	/^static inline void* pop_heap(Heap *heap){$/;"	f
+pop_vec	vector.h	/^static inline int pop_vec(Vector *vec, void *e){$/;"	f
+pos	mergecontig.h	/^	uint64_t pos;$/;"	m	struct:__anon42
+prefix_path	mergectg.c	/^void prefix_path(char *s1, char *s2, int n, char *pre) {$/;"	f
+prepare_hashset_macro	hashset.h	117;"	d
+prepare_reads	mergectg.c	/^void prepare_reads(merge_t *merger, FileReader *in, uint32_t lastcid) {$/;"	f
+prepare_seq_seqdb	cluster.c	/^uint8_t prepare_seq_seqdb(SeqDB *sdb, uint32_t rid, uint64_t *seqs){$/;"	f
+print_alignments	asm_R2.c	/^void print_alignments(EF *ef){$/;"	f
+print_asm	mergectg.c	/^void print_asm(merge_t *merger, FILE *out) {$/;"	f
+print_asm2	mergectg.c	/^void print_asm2(merge_t *merger, FILE *out) {$/;"	f
+print_clusters	mergecontig.c	/^void print_clusters(PWDB *db) {$/;"	f
+print_ctgdb	mergecontig.c	/^void print_ctgdb(CtgDB *db) {$/;"	f
+print_pretty_seq	file_reader.h	/^static inline void print_pretty_seq(FILE *out, String *seq, int line_width){$/;"	f
+ps1	rainbow.h	/^	u32list *ps1;$/;"	m	struct:__anon23
+ps2	rainbow.h	/^	u32list *ps2;$/;"	m	struct:__anon23
+ptr	file_reader.h	/^	int ptr;$/;"	m	struct:__anon31
+ptrs	heap.h	/^	rjheapv *ptrs;$/;"	m	struct:__anon11
+push_heap	heap.h	/^static inline void push_heap(Heap *heap, void *p){$/;"	f
+push_simpasm	simp_asm.h	/^static inline void push_simpasm(SimpAssembler *sa, uint32_t seqid, char *seq, uint32_t seqlen, uint8_t rank){$/;"	f
+push_vec	vector.h	/^static inline void push_vec(Vector *vec, void *e){$/;"	f
+put_bloomfilter	bloom_filter.h	/^static inline void put_bloomfilter(BloomFilter *bf, const void *key, uint32_t len){$/;"	f
+put_cache_ctgs	mergectg.c	/^void put_cache_ctgs(merge_t *merger, contig_t *ctg) {$/;"	f
+put_hashset_macro	hashset.h	205;"	d
+put_pool_ctg	asm_R2.c	/^void put_pool_ctg(EF *ef, FContig *ctg){$/;"	f
+put_pool_vec	asm_R2.c	/^void put_pool_vec(EF *ef, Vector *vec){$/;"	f
+pw_aln_contigs	mergecontig.c	/^PWDB* pw_aln_contigs(CtgDB *db, uint32_t min_overlap, float het) {$/;"	f
+pw_aln_contigs_brute	mergecontig.c	/^PWDB* pw_aln_contigs_brute(CtgDB *db) {$/;"	f
+pwctgs	mergecontig.h	/^	pwctglist *pwctgs;$/;"	m	struct:__anon48
+qsort_vec	vector.h	/^static inline void qsort_vec(Vector *vec, cmp_vec_fun fun){$/;"	f
+qual	file_reader.h	/^	String qual;$/;"	m	struct:__anon29
+r2r	simp_asm.h	/^	u64hash  *r2r;$/;"	m	struct:__anon51
+r_ol	asm_R2.h	/^	uint32_t l_rid:20, r_rid:20, l_ol:8, r_ol:8, n_mm:7, used:1;$/;"	m	struct:__anon39
+r_rid	asm_R2.h	/^	uint32_t l_rid:20, r_rid:20, l_ol:8, r_ol:8, n_mm:7, used:1;$/;"	m	struct:__anon39
+ran_normal	ezmsim.c	/^double ran_normal()$/;"	f
+rank	asm_R2.h	/^	uint32_t rd_len:10, rank:10;$/;"	m	struct:__anon37
+rank	mergectg.h	/^	uint32_t rank;$/;"	m	struct:__anon1
+rank	mergetag.c	/^	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;$/;"	m	struct:__anon12	file:
+rank	rainbow.h	/^	uint32_t seqid, seqoff, seqlen1:10, seqlen2:10, rank:6, revsed:6;$/;"	m	struct:__anon21
+rank	simp_asm.h	/^	uint32_t ctg_id, ctg_dir:1, ctg_off:20, used:1, rank:10;$/;"	m	struct:__anon49
+rank_bitvec	bitvec.h	/^static inline uint64_t rank_bitvec(BitVec *bitv, uint64_t idx){$/;"	f
+rank_cigars_seqlen	aln_cigar.h	/^static inline int rank_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){$/;"	f
+rd_kmer_code	mergectg.h	26;"	d
+rd_kmer_eq	mergectg.h	27;"	d
+rd_kmer_t	mergectg.h	/^} rd_kmer_t;$/;"	t	typeref:struct:__anon2
+rd_len	asm_R2.h	/^	uint32_t rd_len:10, rank:10;$/;"	m	struct:__anon37
+rd_len	mergectg.h	/^	uint32_t rd_len;$/;"	m	struct:__anon1
+rd_len	rainbow.h	/^	uint8_t  rd_len, max_rd_len;$/;"	m	struct:__anon18
+rds	asm_R2.h	/^	Vector   *rds;$/;"	m	struct:__anon40
+rds	mergectg.h	/^	readv *rds;$/;"	m	struct:__anon3
+rds	rainbow.h	/^	rilist *rds;$/;"	m	struct:__anon23
+rds	simp_asm.h	/^	seqv     *rds;$/;"	m	struct:__anon51
+read_bif	file_reader.h	/^static inline int64_t read_bif(BufferedInputFile *bif, void *data, int64_t size){$/;"	f
+read_t	mergectg.h	/^} read_t;$/;"	t	typeref:struct:__anon1
+reduce_vec_size	vector.h	/^static inline int reduce_vec_size(Vector *vec, size_t size){$/;"	f
+ref	heap.h	/^	void *ref;$/;"	m	struct:__anon11
+ref_apply_array	sort.h	103;"	d
+ref_iter_hashset_macro	hashset.h	251;"	d
+refine_cigars	aln_cigar.h	/^static inline int refine_cigars(AlnCigar *c, int n){$/;"	f
+remove_hashset_macro	hashset.h	211;"	d
+remove_heap	heap.h	/^static inline void remove_heap(Heap *heap, size_t idx){$/;"	f
+reset_div	divide.c	/^void reset_div(Div *div){$/;"	f
+reset_ef	asm_R2.c	/^void reset_ef(EF *ef, uint32_t ef_id, char *eseq, uint32_t rd_len, uint32_t min_ol, float min_sm){$/;"	f
+reset_filereader	file_reader.c	/^int reset_filereader(FileReader *fr){$/;"	f
+reset_iter_hashset_macro	hashset.h	235;"	d
+reset_merger	mergectg.c	/^void reset_merger(merge_t *merger) {$/;"	f
+reset_simpasm	simp_asm.h	/^static inline void reset_simpasm(SimpAssembler *sa){$/;"	f
+reset_vec	vector.h	/^static inline void reset_vec(Vector *vec){$/;"	f
+return_ctgv_merger	mergectg.c	/^void return_ctgv_merger(merge_t *merger, contig_t *ctg) {$/;"	f
+return_ulist_div	divide.c	/^void return_ulist_div(Div *div, u32list *list){ if(list){ clear_u32list(list); push_u32slist(div->cache, list); } }$/;"	f
+rev_rank_cigars_seqlen	aln_cigar.h	/^static inline int rev_rank_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){$/;"	f
+rev_select_cigars_seqlen	aln_cigar.h	/^static inline int rev_select_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){$/;"	f
+reverse_cigars	aln_cigar.h	/^static inline void reverse_cigars(AlnCigar *cs, int n){$/;"	f
+reverse_dna	dna.h	/^static inline void reverse_dna(char *seq, int len){$/;"	f
+reverse_string	string.h	/^static inline void reverse_string(String *str){$/;"	f
+reverse_vec	vector.h	/^static inline void reverse_vec(Vector *vec){$/;"	f
+revsed	rainbow.h	/^	uint32_t seqid, seqoff, seqlen1:10, seqlen2:10, rank:6, revsed:6;$/;"	m	struct:__anon21
+revseq2bits	dna.h	/^static inline void revseq2bits(uint64_t *bits, uint64_t bitoff, char *seq, uint32_t seqlen){$/;"	f
+rhash_code	asm_R2.h	40;"	d
+rhash_eq	asm_R2.h	41;"	d
+rhash_t	asm_R2.h	/^} rhash_t;$/;"	t	typeref:struct:__anon36
+rid	asm_R2.h	/^typedef struct { uint32_t rid:16, roff:16; } rp_t;$/;"	m	struct:__anon35
+rid	mergetag.c	/^	uint32_t rid, cid, gid, rank, off1, len1, off2, len2;$/;"	m	struct:__anon12	file:
+rids	asm_R2.h	/^	Vector   *rids;$/;"	m	struct:__anon38
+rids	simp_asm.h	/^	u32list  *rids;$/;"	m	struct:__anon51
+right	mergectg.h	/^	pathtree_t *right;$/;"	m	struct:pathtree_t
+roff	asm_R2.h	/^typedef struct { uint32_t rid:16, roff:16; } rp_t;$/;"	m	struct:__anon35
+row	stdaln.h	/^	int row;$/;"	m	struct:__anon14
+rp_t	asm_R2.h	/^typedef struct { uint32_t rid:16, roff:16; } rp_t;$/;"	t	typeref:struct:__anon35
+rps	asm_R2.h	/^	Vector   *rps;$/;"	m	struct:__anon40
+rps_idx	asm_R2.h	/^	uint32_t kmer:10, rps_idx:22;$/;"	m	struct:__anon36
+s	ezmsim.c	/^	mut_t *s; \/* sequence *\/$/;"	m	struct:__anon25	file:
+s	ezmsim.c	/^	unsigned char *s; \/* sequence *\/$/;"	m	struct:__anon24	file:
+sbts	rainbow.h	/^	sbtv    *sbts;$/;"	m	struct:__anon20
+score	mergecontig.h	/^	int score; $/;"	m	struct:__anon47
+score	stdaln.h	/^	int score; \/* score *\/$/;"	m	struct:__anon16
+sdb	rainbow.h	/^	SeqDB    *sdb, *sdb2;$/;"	m	struct:__anon20
+sdb2	rainbow.h	/^	SeqDB    *sdb, *sdb2;$/;"	m	struct:__anon20
+search_array	vector.h	159;"	d
+search_array_dsc	vector.h	168;"	d
+seed_off	bloom_filter.h	/^	uint32_t n_seed, seed_off;$/;"	m	struct:__anon34
+seeds	bloom_filter.h	/^static const uint32_t seeds[20] = $/;"	v
+select_cigars_seqlen	aln_cigar.h	/^static inline int select_cigars_seqlen(AlnCigar *cigars, int n_cigar, int len, int seq_idx){$/;"	f
+seq	asm_R2.h	/^	String   *seq;$/;"	m	struct:__anon38
+seq	asm_R2.h	/^	char     seq[MAX_RD_LEN+1];$/;"	m	struct:__anon37
+seq	file_reader.h	/^	String seq;$/;"	m	struct:__anon29
+seq	mergecontig.h	/^	char *seq;$/;"	m	struct:__anon45
+seq	mergectg.h	/^	char *seq;$/;"	m	struct:__anon4
+seq	mergectg.h	/^	char seq[MAX_RD_LEN+1];$/;"	m	struct:__anon1
+seq	rainbow.h	/^	uint64_t seq[8];$/;"	m	struct:__anon19
+seq	simp_asm.h	/^	String   *seq;$/;"	m	struct:__anon50
+seq1	rainbow.h	/^	uint64_t seq1[10], seq2[10];$/;"	m	struct:__anon20
+seq2	rainbow.h	/^	uint64_t seq1[10], seq2[10];$/;"	m	struct:__anon20
+seq2bits	dna.h	/^static inline void seq2bits(uint64_t *bits, uint64_t bitoff, char *seq, uint32_t seqlen){$/;"	f
+seq2kmer	dna.h	/^static inline uint64_t seq2kmer(char *seq, uint32_t ksize){$/;"	f
+seq2revkmer	dna.h	/^static inline uint64_t seq2revkmer(char *seq, uint32_t ksize){$/;"	f
+seq_id	asm_R2.h	/^	uint32_t seq_id;$/;"	m	struct:__anon37
+seq_id	mergectg.h	/^	uint32_t seq_id;$/;"	m	struct:__anon1
+seq_read_fasta	ezmsim.c	/^int seq_read_fasta(FILE *fp, seq_t *seq, char *locus, char *comment)$/;"	f
+seq_set_block_size	ezmsim.c	/^void seq_set_block_size(int size)$/;"	f
+seq_t	ezmsim.c	/^} seq_t;$/;"	t	typeref:struct:__anon24	file:
+seqid	rainbow.h	/^	uint32_t kmer1, kmer2, seqid;$/;"	m	struct:__anon17
+seqid	rainbow.h	/^	uint32_t seqid, seqoff, seqlen1:10, seqlen2:10, rank:6, revsed:6;$/;"	m	struct:__anon21
+seqid	simp_asm.h	/^	uint32_t seqid, len;$/;"	m	struct:__anon49
+seqlen1	rainbow.h	/^	uint32_t seqid, seqoff, seqlen1:10, seqlen2:10, rank:6, revsed:6;$/;"	m	struct:__anon21
+seqlen2	rainbow.h	/^	uint32_t seqid, seqoff, seqlen1:10, seqlen2:10, rank:6, revsed:6;$/;"	m	struct:__anon21
+seqlens	rainbow.h	/^	u8list  *seqlens;$/;"	m	struct:__anon18
+seqoff	rainbow.h	/^	uint32_t seqid, seqoff, seqlen1:10, seqlen2:10, rank:6, revsed:6;$/;"	m	struct:__anon21
+seqoffs	rainbow.h	/^	u64list *seqoffs;$/;"	m	struct:__anon18
+seqs	rainbow.h	/^	u8list *seqs;$/;"	m	struct:__anon23
+seqs	rainbow.h	/^	uint64_t *seqs;$/;"	m	struct:__anon18
+set_D	stdaln.c	278;"	d	file:
+set_I	stdaln.c	256;"	d	file:
+set_M	stdaln.c	240;"	d	file:
+set_end_D	stdaln.c	288;"	d	file:
+set_end_I	stdaln.c	266;"	d	file:
+set_entity_del	hashset.h	58;"	d
+set_entity_null	hashset.h	57;"	d
+set_inc_tag_ef	asm_R2.c	/^void set_inc_tag_ef(EF *ef, uint32_t inc){$/;"	f
+set_vec	vector.h	/^static inline void set_vec(Vector *vec, size_t idx, void *e){$/;"	f
+set_vec_size	vector.h	/^static inline int set_vec_size(Vector *vec, size_t size){$/;"	f
+sids	simp_asm.h	/^	u32list  *sids;$/;"	m	struct:__anon50
+sim_pairs	mergectg.h	/^	uint32_t sim_pairs;$/;"	m	struct:__anon8
+simple_assemble	simp_asm.h	/^static inline void simple_assemble(SimpAssembler *sa){$/;"	f
+simple_join_contigs	simp_asm.h	/^static inline int simple_join_contigs(SimpAssembler *sa, SR_AlnHit *hit){$/;"	f
+simple_move_rids	simp_asm.h	/^static inline void simple_move_rids(SimpAssembler *sa, uint32_t dst, uint32_t src, int off){$/;"	f
+simple_reverse_contig	simp_asm.h	/^static inline void simple_reverse_contig(SimpAssembler *sa, uint32_t ctg_id){$/;"	f
+size	bloom_filter.h	/^	size_t size;$/;"	m	struct:__anon34
+size	file_reader.h	/^	int size;$/;"	m	struct:__anon31
+size	string.h	/^	int  size;$/;"	m	struct:__anon53
+size	string.h	/^	int size;$/;"	m	struct:__anon52
+size	vector.h	/^	size_t size;$/;"	m	struct:Vector
+sort_array	sort.h	48;"	d
+sorting_core	cluster.c	/^uint32_t sorting_core(Cluster *cluster){$/;"	f
+split_string	string.h	/^static inline int split_string(String *str, char separator, Vector *virtual_strings){$/;"	f
+split_vstring	string.h	/^static inline int split_vstring(VirtualString *str, char separator, Vector *virtual_strings, int cut){$/;"	f
+sra	simp_asm.h	/^	SR_SeqDB *sra;$/;"	m	struct:__anon51
+start1	stdaln.h	/^	int start1, end1; \/* start and end of the first sequence, coordinations are 1-based *\/$/;"	m	struct:__anon16
+start2	stdaln.h	/^	int start2, end2; \/* start and end of the second sequence, coordinations are 1-based *\/$/;"	m	struct:__anon16
+stdin_filereader	file_reader.c	/^FileReader* stdin_filereader(){$/;"	f
+strindex	ezmsim.c	/^int strindex(idx_t *index, unsigned char *s, unsigned char *t)$/;"	f
+string	string.h	/^	char *string;$/;"	m	struct:__anon52
+string	string.h	/^	char *string;$/;"	m	struct:__anon53
+string2cigars	aln_cigar.h	/^static inline int string2cigars(AlnCigar *cigars, char *str, int len){$/;"	f
+string_filereader	file_reader.c	/^FileReader* string_filereader(char *string){$/;"	f
+sub32seqbits	dna.h	/^static inline uint64_t sub32seqbits(uint64_t *src, uint64_t off){$/;"	f
+sub_cigars	aln_cigar.h	/^static inline int sub_cigars(AlnCigar *dst, AlnCigar *cigars, int n_cigar, int off, int len){$/;"	f
+sub_seq_cigars	aln_cigar.h	/^static inline int sub_seq_cigars(AlnCigar *dst, AlnCigar *c, int n, int seq_idx, int off, int len){$/;"	f
+substr	string.h	/^static inline char* substr(char *string, int start, int end, char *dst){$/;"	f
+sums	bitvec.h	/^	uint64_t *sums;$/;"	m	struct:__anon54
+swap_tmp	string.h	35;"	d
+sys_prime_list	hashset.h	/^static const uint64_t sys_prime_list[61] = {$/;"	v
+sz	mergecontig.h	/^	uint32_t sz;   \/\/union tree depth$/;"	m	struct:__anon45
+tabs	file_reader.h	/^	Vector *tabs;$/;"	m	struct:__anon31
+tid	mergectg.h	/^	uint32_t tid; \/\/ leaf records contig ID$/;"	m	struct:pathtree_t
+tracing_core	cluster.c	/^void tracing_core(Cluster *cluster, uint32_t bt){$/;"	f
+tree	mergectg.h	/^	pathtree_t *tree;$/;"	m	struct:__anon8
+trim_string	string.h	/^static inline void trim_string(String *str){$/;"	f
+trunc_string	string.h	/^static inline void trunc_string(String *str, int size){$/;"	f
+type	aln_cigar.h	/^	uint16_t len:13, type:3;$/;"	m	struct:__anon56
+u32hash_code	hashset.h	489;"	d
+u32hashcode	hashset.h	480;"	d
+u64hash_code	hashset.h	490;"	d
+u64hashcode	hashset.h	481;"	d
+uc	ezmsim.c	/^void uc(unsigned char *s)$/;"	f
+uniq	asm_R2.h	/^	u64hash  *uniq;$/;"	m	struct:__anon40
+update_ctg2merge	mergectg.c	/^void update_ctg2merge(merge_t *merger) {$/;"	f
+usage	ezmsim.c	/^int usage()$/;"	f
+usage	main.c	/^int usage(){$/;"	f
+usage	mergetag.c	/^int usage(){$/;"	f
+used	asm_R2.h	/^	uint32_t ctg_id:24, ctg_off:19, used:1;$/;"	m	struct:__anon37
+used	asm_R2.h	/^	uint32_t l_rid:20, r_rid:20, l_ol:8, r_ol:8, n_mm:7, used:1;$/;"	m	struct:__anon39
+used	simp_asm.h	/^	uint32_t ctg_id, ctg_dir:1, ctg_off:20, used:1, rank:10;$/;"	m	struct:__anon49
+uuchash_code	mergectg.h	77;"	d
+uuchash_equals	mergectg.h	78;"	d
+uuchash_t	mergectg.h	/^typedef struct {uint32_t key; uint32_t oldid; char *path;} uuchash_t;$/;"	t	typeref:struct:__anon6
+uuhash_code	hashset.h	504;"	d
+uuhash_equals	hashset.h	505;"	d
+uuhash_t	hashset.h	/^typedef struct { uint32_t key, val; } uuhash_t;$/;"	t	typeref:struct:__anon27
+uxxhash_equals	hashset.h	491;"	d
+val	hashset.h	/^typedef struct { char *key; uint32_t val; } cuhash_t;$/;"	m	struct:__anon28
+val	hashset.h	/^typedef struct { uint32_t key, val; } uuhash_t;$/;"	m	struct:__anon27
+vec_memcpy	vector.h	/^static inline void vec_memcpy(void *dst, void *src, size_t size){$/;"	f
+vec_size	vector.h	65;"	d
+version	main.c	/^const char *version = "2.0.1";$/;"	v
+vline	file_reader.h	/^	String *vline;$/;"	m	struct:__anon31
+xopen	ezmsim.c	103;"	d	file:
+zero2bitvec	bitvec.h	/^static inline void zero2bitvec(BitVec *bitv){ encap_bitvec(bitv, 1); zero_bitvec(bitv, bitv->n_bit); bitv->n_bit ++; }$/;"	f
+zero_bitvec	bitvec.h	/^static inline void zero_bitvec(BitVec *bitv, uint64_t idx){ bitv->bits[idx>>6] &= ~(1LLU << (idx&0x3FU)); }$/;"	f
+zeros_bitvec	bitvec.h	/^static inline void zeros_bitvec(BitVec *bitv){ memset(bitv->bits, 0, bitv->n_cap \/ 8); }$/;"	f
diff --git a/vector.h b/vector.h
new file mode 100644
index 0000000..4ca1491
--- /dev/null
+++ b/vector.h
@@ -0,0 +1,234 @@
+/*
+ * 
+ * Copyright (c) 2011, Jue Ruan <ruanjue at gmail.com>
+ *
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+ 
+#ifndef __VECTOR_H_RJ
+#define __VECTOR_H_RJ
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+typedef struct Vector {
+	void *buffer;
+	size_t size;
+	size_t cap;
+	unsigned int e_size;
+} Vector;
+
+typedef int (*cmp_vec_fun)(const void *k1, const void *k2);
+
+static inline void init_memvec(Vector *vec, unsigned int e_size, unsigned int init_size){
+	vec->e_size = e_size;
+	vec->size   = 0;
+	vec->cap    = init_size;
+	vec->buffer = malloc(((unsigned int)init_size) * e_size);
+	if(vec->buffer == NULL){
+		fprintf(stderr, " -- Out of memory in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__);
+		abort();
+	}
+	memset(vec->buffer, 0, ((unsigned int)init_size) * e_size);
+}
+
+static inline Vector* init_vec(unsigned int e_size, unsigned int init_size){
+	Vector *vec = (Vector*)malloc(sizeof(Vector));
+	if(vec == NULL){
+		fprintf(stderr, " -- Out of memory in %s -- %s:%d --\n", __FUNCTION__, __FILE__, __LINE__);
+		abort();
+	}
+	init_memvec(vec, e_size, init_size);
+	return vec;
+}
+
+// Faster than gcc's memcpy
+static inline void vec_memcpy(void *dst, void *src, size_t size){
+	register size_t i;
+	for(i=0;i<size;i++){ ((uint8_t*)dst)[i] = ((uint8_t*)src)[i]; }
+}
+
+#define vec_size(v) ((v)->size)
+
+static inline int encap_vec(Vector *vec, unsigned int add_size){
+	size_t size;
+	if(add_size + vec->size > vec->cap){
+		size = add_size + vec->size;
+		while(size > vec->cap){
+			if(vec->cap < 0xFFFFFU){
+				if(vec->cap) vec->cap <<= 1;
+				else vec->cap = 8;
+			} else {
+				vec->cap += 0xFFFFFU;
+			}
+		}
+		vec->buffer = realloc(vec->buffer, vec->cap * vec->e_size);
+		if(vec->buffer == NULL){
+			fprintf(stderr, " -- Out of memory, try alloc %ld bytes in %s -- %s:%d --\n", (long int)vec->cap * vec->e_size, __FUNCTION__, __FILE__, __LINE__);
+			abort();
+		}
+		memset(vec->buffer + vec_size(vec) * vec->e_size, 0, (vec->cap - vec_size(vec)) * vec->e_size);
+	}
+	return 1;
+}
+
+static inline int add_vec_size(Vector *vec, size_t add_size){
+	encap_vec(vec, add_size);
+	vec->size += add_size;
+	return 1;
+}
+
+static inline int reduce_vec_size(Vector *vec, size_t size){
+	if(size > vec_size(vec)) return 0;
+	vec->size -= size;
+	return 1;
+}
+
+static inline int set_vec_size(Vector *vec, size_t size){
+	return vec->size = size;
+}
+
+static inline void push_vec(Vector *vec, void *e){
+	encap_vec(vec, 1);
+	vec_memcpy(vec->buffer + (vec_size(vec)) * vec->e_size, e, vec->e_size);
+	vec->size ++;
+}
+
+#define gpush_vec(vec, v, data_type) (encap_vec(vec, 1), (((data_type *)(vec)->buffer)[vec_size(vec)] = v), (vec)->size ++)
+#define Gpush_vec(vec, v, data_type) ((((data_type *)(vec)->buffer)[vec_size(vec)] = v), (vec)->size ++)
+
+static inline int pop_vec(Vector *vec, void *e){
+	if(reduce_vec_size(vec, 1) == 0) return 0;
+	vec_memcpy(e, vec->buffer + vec_size(vec) * vec->e_size, vec->e_size);
+	return 1;
+}
+
+#define gpop_vec(vec, v, data_type) (reduce_vec_size(vec, 1)? (v = ((data_type *)(vec)->buffer)[vec_size(vec)], 1) : 0)
+
+static inline void set_vec(Vector *vec, size_t idx, void *e){
+	vec_memcpy(vec->buffer + idx * vec->e_size, e, vec->e_size);
+}
+
+#define gset_vec(vec, idx, v, data_type) ((data_type*)(vec)->buffer)[idx] = v
+
+static inline int get_vec(Vector *vec, size_t idx, void *e){
+	vec_memcpy(e, vec->buffer + idx * vec->e_size, vec->e_size);
+	return 1;
+}
+
+#define gget_vec(vec, idx, data_type) ((data_type *)(vec)->buffer)[idx]
+
+static inline void* get_vec_ref(Vector *vec, size_t idx){
+	return vec->buffer + idx * vec->e_size;
+}
+
+#define gpeer_vec(vec, data_type) (vec_size(vec)? ((data_type *)(vec)->buffer)[vec_size(vec) - 1] : 0)
+
+static inline void* get_last_vec_ref(Vector *vec){
+	if(vec_size(vec)) return vec->buffer + (vec_size(vec) - 1) * vec->e_size;
+	else return NULL;
+}
+
+static inline void* get_next_vec_ref(Vector *vec){
+	add_vec_size(vec, 1);
+	return vec->buffer + (vec_size(vec) - 1) * vec->e_size;
+}
+
+static inline void qsort_vec(Vector *vec, cmp_vec_fun fun){
+	qsort(vec->buffer, vec_size(vec), vec->e_size, fun);
+}
+
+static inline void* bsearch_vec(Vector *vec, void *q, cmp_vec_fun fun){
+	return bsearch(q, vec->buffer, vec_size(vec), vec->e_size, fun);
+}
+
+#define search_array(uniq_flag, array, size, key, val_macro, ret) long i##uniq_flag, j##uniq_flag, m##uniq_flag;\
+	i##uniq_flag = 0; j##uniq_flag = size; while(i##uniq_flag < j##uniq_flag){\
+	m##uniq_flag = i##uniq_flag + (j##uniq_flag - i##uniq_flag) / 2;\
+	if(val_macro((array), m##uniq_flag) < key) i##uniq_flag = m##uniq_flag + 1;\
+	else j##uniq_flag = m##uniq_flag;\
+	}\
+	if(i##uniq_flag < (long)size && val_macro((array), i##uniq_flag) == (key)) ret = i##uniq_flag; \
+	else ret = - (i##uniq_flag + 1)
+
+#define search_array_dsc(uniq_flag, array, size, key, val_macro, ret) long i##uniq_flag, j##uniq_flag, m##uniq_flag;\
+	i##uniq_flag = 0; j##uniq_flag = size; while(i##uniq_flag < j##uniq_flag){\
+	m##uniq_flag = i##uniq_flag + (j##uniq_flag - i##uniq_flag) / 2;\
+	if(val_macro((array), m##uniq_flag) > key) i##uniq_flag = m##uniq_flag + 1;\
+	else j##uniq_flag = m##uniq_flag;\
+	}\
+	if(i##uniq_flag < (long)size && val_macro((array), i##uniq_flag) == (key)) ret = i##uniq_flag; \
+	else ret = - (i##uniq_flag + 1)
+
+#define define_revsere_vec(tag, e_type)                    \
+static inline void reverse_##tag(Vector *vec){             \
+	size_t i, j;                                           \
+	e_type t;                                              \
+	if(vec_size(vec) == 0) return;                         \
+	i = 0;                                                 \
+	j = vec_size(vec) - 1;                                 \
+	while(i < j){                                          \
+		t = gget_vec(vec, i, e_type);                      \
+		gset_vec(vec, gget_vec(vec, j, e_type), i, e_type);\
+		gset_vec(vec, t, j, e_type);                       \
+		i ++;                                              \
+		j --;                                              \
+	}                                                      \
+}
+
+static inline void reverse_vec(Vector *vec){
+	size_t i, j;
+	void *buf;
+	if(vec_size(vec) == 0) return;
+	buf = malloc(vec->e_size);
+	i = 0;
+	j = vec_size(vec) - 1;
+	while(i < j){
+		vec_memcpy(buf, vec->buffer + i * vec->e_size, vec->e_size);
+		vec_memcpy(vec->buffer + i * vec->e_size, vec->buffer + j * vec->e_size, vec->e_size);
+		vec_memcpy(vec->buffer + j * vec->e_size, buf, vec->e_size);
+		i ++;
+		j --;
+	}
+	free(buf);
+}
+
+static inline int cat_vec(Vector *dst, Vector *src){
+	if(dst->e_size != src->e_size) return -1;
+	add_vec_size(dst, vec_size(src));
+	vec_memcpy(dst->buffer + (vec_size(dst) - vec_size(src)) * dst->e_size, src->buffer, vec_size(src) * src->e_size);
+	return 0;
+}
+
+#define clear_vec(vec) set_vec_size(vec, 0)
+
+static inline void reset_vec(Vector *vec){
+	set_vec_size(vec, 0);
+	memset(vec->buffer, 0, vec->e_size * vec->cap);
+}
+
+static inline size_t dump_vec(Vector *vec, FILE *out){
+	return fwrite(vec->buffer, vec->e_size, vec->size, out);
+}
+
+static inline void free_vec(Vector *vec){
+	if(vec == NULL) return;
+	free(vec->buffer);
+	free(vec);
+}
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bio-rainbow.git



More information about the debian-med-commit mailing list