[med-svn] [soapaligner] 07/10: New upstream version 2.20

Thu Dec 28 07:09:50 UTC 2017

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository soapaligner.

commit 7495091c11f49cbc5d7663a38e4f6cdfe3935569
Author: Andreas Tille <tille at debian.org>
Date:   Thu Dec 28 08:06:55 2017 +0100

    New upstream version 2.20
---
 BWT.c                                 | 1459 +++++++++++++++++++++++++++++++++
 BWT.h                                 |  268 ++++++
 BWTAln.c                              |  919 +++++++++++++++++++++
 BWTAln.h                              |   39 +
 ChangLogCode.Apr1                     |   64 ++
 ChangeLog1                            |   16 +
 DNACount.c                            | 1159 ++++++++++++++++++++++++++
 DNACount.h                            |   91 ++
 GPLv3                                 |  674 +++++++++++++++
 HSP.c                                 |  339 ++++++++
 HSP.h                                 |  126 +++
 INSTALL                               |   15 +
 Makefile                              |   65 ++
 Match.c                               |  393 +++++++++
 Match.h                               |  138 ++++
 MemManager.c                          | 1093 ++++++++++++++++++++++++
 MemManager.h                          |  151 ++++
 MiscUtilities.c                       | 1368 +++++++++++++++++++++++++++++++
 MiscUtilities.h                       |   83 ++
 NEWS                                  |    6 +
 PairMatch.c                           |  483 +++++++++++
 SeqIO.c                               |  201 +++++
 SeqIO.h                               |   42 +
 TextConverter.c                       |  917 +++++++++++++++++++++
 TextConverter.h                       |   93 +++
 Timing.c                              |  169 ++++
 Timing.h                              |   37 +
 TypeNLimit.h                          |   78 ++
 debian/README.Debian                  |    6 -
 debian/README.source                  |    5 -
 debian/changelog                      |    5 -
 debian/compat                         |    1 -
 debian/control                        |   36 -
 debian/copyright                      |   56 --
 debian/docs                           |    1 -
 debian/genomics.cn-soap2.install      |    1 -
 debian/genomics.cn-soap2.manpages     |    1 -
 debian/install                        |    1 -
 debian/manpages                       |    1 -
 debian/patches/compiler_options.patch |   22 -
 debian/patches/series                 |    1 -
 debian/rules                          |    8 -
 debian/source/format                  |    1 -
 debian/upstream/metadata              |   14 -
 debian/watch                          |    3 -
 extratools.c                          |  227 +++++
 extratools.h                          |  165 ++++
 kstring.c                             |   35 +
 kstring.h                             |   46 ++
 r250.c                                |  128 +++
 r250.h                                |   31 +
 soap.1                                |  142 ++++
 soap.c                                |  391 +++++++++
 soap.h                                |   24 +
 soap.man                              |  145 ++++
 soapio.c                              |  302 +++++++
 soapio.h                              |   68 ++
 stdaln.c                              |  856 +++++++++++++++++++
 stdaln.h                              |  146 ++++
 59 files changed, 13192 insertions(+), 163 deletions(-)

diff --git a/BWT.c b/BWT.c
new file mode 100644
index 0000000..e3d9b7c
--- /dev/null
+++ b/BWT.c
@@ -0,0 +1,1459 @@
+/*
+
+   BWT.c	BWT-Index
+
+   This module contains an implementation of BWT-index for alphabet size = 4.
+   The functions provided include:
+    Load functions for loading BWT to memory;
+    Core functions for accessing core Inverse Psi values;
+	Search functions for searching patterns from text;
+	Text retrieval functions for retrieving text from BWT.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.L
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <emmintrin.h>
+#include <mmintrin.h>
+#include "BWT.h"
+#include "MiscUtilities.h"
+#include "DNACount.h"
+#include "TextConverter.h"
+#include "MemManager.h"
+#include "r250.h"
+#include "HSP.h"
+
+// static functions
+static INLINE unsigned int BWTOccValueExplicit(const BWT *bwt, const unsigned int occIndexExplicit, const unsigned int character);
+static INLINE void BWTAllOccValueExplicit(const BWT *bwt, const unsigned int occIndexExplicit, unsigned int* __restrict occValueExplicit);
+static INLINE unsigned int BWTSaIndexToChar(const BWT *bwt, const unsigned int saIndex);
+static INLINE unsigned int BWTGetWordPackedText(const unsigned int *packedText, const unsigned int index, const unsigned int shift, const unsigned int numOfBit);
+
+static INLINE void BWTPrefetchOccValueExplicit(const BWT *bwt, const unsigned int occIndexExplicit);
+static INLINE void BWTPrefetchBWT(const BWT *bwt, const unsigned int index);
+
+
+int SaIndexGroupDPHitOrder1(const void *saIndexGroup, const int index1, const int index2);
+int SaIndexGroupDPHitOrder2(const void *saIndexGroup, const int index1, const int index2);
+
+
+static INLINE unsigned int BWTSaIndexToChar(const BWT *bwt, const unsigned int saIndex) {
+
+	return (saIndex > bwt->cumulativeFreq[1]) + (saIndex > bwt->cumulativeFreq[2])
+										   + (saIndex > bwt->cumulativeFreq[3]);
+
+}
+
+BWT *BWTCreate(MMPool *mmPool, const unsigned int textLength, unsigned int *decodeTable) {
+
+	BWT *bwt;
+
+	bwt = MMPoolDispatch(mmPool, sizeof(BWT));
+
+	bwt->textLength = 0;
+	bwt->inverseSa = 0;
+
+	bwt->cumulativeFreq = MMPoolDispatch(mmPool, (ALPHABET_SIZE + 1) * sizeof(unsigned int));
+	initializeVAL(bwt->cumulativeFreq, ALPHABET_SIZE + 1, 0);
+
+	bwt->bwtSizeInWord = 0;
+	bwt->saValueOnBoundary = NULL;
+
+	// Generate decode tables
+	if (decodeTable == NULL) {
+		bwt->decodeTable = MMPoolDispatch(mmPool, DNA_OCC_CNT_TABLE_SIZE_IN_WORD * sizeof(unsigned int));
+		GenerateDNAOccCountTable(bwt->decodeTable);
+	} else {
+		bwt->decodeTable = decodeTable;
+	}
+
+	bwt->occMajorSizeInWord = BWTOccValueMajorSizeInWord(textLength);
+	bwt->occValueMajor = MMPoolDispatch(mmPool, bwt->occMajorSizeInWord * sizeof(unsigned int));
+
+	bwt->occSizeInWord = 0;
+	bwt->occValue = NULL;
+
+	bwt->saInterval = ALL_ONE_MASK;
+	bwt->saValueSize = 0;
+	bwt->saValue = NULL;
+
+	bwt->inverseSaInterval = ALL_ONE_MASK;
+	bwt->inverseSaSize = 0;
+	bwt->inverseSa = NULL;
+
+	return bwt;
+
+}
+
+BWT *BWTLoad(MMPool *mmPool, const char *bwtCodeFileName, const char *occValueFileName, 
+			 const char *saValueFileName, const char *inverseSaFileName, const char *saIndexRangeFileName,
+			 unsigned int *decodeTable) {
+
+	unsigned int i;
+	FILE *bwtCodeFile, *occValueFile, *saValueFile = NULL, *inverseSaFile = NULL, *saIndexRangeFile = NULL;
+	BWT *bwt;
+	unsigned int tmp;
+	unsigned int bwtCodeLengthInFile;
+	unsigned int numOfSaIndexRange;
+
+	bwtCodeFile = (FILE*)fopen64(bwtCodeFileName, "rb");
+	if (bwtCodeFile == NULL) {
+		fprintf(stderr, "BWTLoad() : cannot open bwtCodeFile!\n");
+		exit(1);
+	}
+
+	occValueFile = (FILE*)fopen64(occValueFileName, "rb");
+	if (occValueFile == NULL) {
+		fprintf(stderr, "BWTLoad() : cannot open occValueFile!\n");
+		exit(1);
+	}
+
+	if (saValueFileName != NULL && saValueFileName[0] != '\0' && saValueFileName[0] != '-') {
+		saValueFile = (FILE*)fopen64(saValueFileName, "rb");
+		if (saValueFile == NULL) {
+			fprintf(stderr, "BWTLoad() : cannot open saValueFile!\n");
+			exit(1);
+		}
+	}
+
+	if (inverseSaFileName != NULL && inverseSaFileName[0] != '\0' && inverseSaFileName[0] != '-') {
+		inverseSaFile = (FILE*)fopen64(inverseSaFileName, "rb");
+		if (inverseSaFile == NULL) {
+			fprintf(stderr, "BWTLoad() : cannot open inverseSaFile!\n");
+			exit(1);
+		}
+	}
+
+	if (saIndexRangeFileName != NULL && saIndexRangeFileName[0] != '\0' && saIndexRangeFileName[0] != '-') {
+		saIndexRangeFile = (FILE*)fopen64(saIndexRangeFileName, "rb");
+		if (saIndexRangeFile == NULL) {
+			fprintf(stderr, "BWTLoad() : cannot open saIndexRangeFile!\n");
+			exit(1);
+		}
+	}
+
+	bwt = MMPoolDispatch(mmPool, sizeof(BWT));
+
+	fread(&bwt->inverseSa0, sizeof(unsigned int), 1, bwtCodeFile);
+
+	bwt->cumulativeFreq = MMPoolDispatch(mmPool, (ALPHABET_SIZE + 1) * sizeof(unsigned int));
+	bwt->cumulativeFreq[0] = 0;
+	fread(bwt->cumulativeFreq + 1, sizeof(unsigned int), ALPHABET_SIZE, bwtCodeFile);
+	bwt->textLength = bwt->cumulativeFreq[ALPHABET_SIZE];
+
+	fread(&tmp, sizeof(unsigned int), 1, occValueFile);
+	if (tmp != bwt->inverseSa0) {
+		fprintf(stderr, "BWTLoad(): OccValue inverseSa0 not match!\n");
+		exit(1);
+	}
+	for (i=1; i<=ALPHABET_SIZE; i++) {
+		fread(&tmp, sizeof(unsigned int), 1, occValueFile);
+		if (tmp != bwt->cumulativeFreq[i]) {
+			fprintf(stderr, "BWTLoad(): OccValue cumulativeFreq not match!\n");
+			exit(1);
+		}
+	}
+
+	bwt->bwtSizeInWord = BWTResidentSizeInWord(bwt->textLength) + WORD_BETWEEN_OCC / 2;	// + 8 words so that the 128 bits before and after an explicit occ are in the same aligned 64 byte
+	bwtCodeLengthInFile = BWTFileSizeInWord(bwt->textLength);
+	bwt->bwtCode = MMUnitAllocate(bwt->bwtSizeInWord * sizeof(unsigned int));
+	fread(bwt->bwtCode, sizeof(unsigned int), bwtCodeLengthInFile, bwtCodeFile);
+	fclose(bwtCodeFile);
+	BWTClearTrailingBwtCode(bwt);
+
+	bwt->occSizeInWord = BWTOccValueMinorSizeInWord(bwt->textLength) ;
+	bwt->occMajorSizeInWord = BWTOccValueMajorSizeInWord(bwt->textLength);
+	bwt->occValue = MMUnitAllocate(bwt->occSizeInWord * sizeof(unsigned int));
+	fread(bwt->occValue, sizeof(unsigned int), bwt->occSizeInWord, occValueFile);
+	bwt->occValueMajor = MMUnitAllocate(bwt->occMajorSizeInWord * sizeof(unsigned int));
+	fread(bwt->occValueMajor, sizeof(unsigned int), bwt->occMajorSizeInWord, occValueFile);
+	fclose(occValueFile);
+
+	if (decodeTable == NULL) {
+		bwt->decodeTable = MMUnitAllocate(DNA_OCC_CNT_TABLE_SIZE_IN_WORD * sizeof(unsigned int));
+		GenerateDNAOccCountTable(bwt->decodeTable);
+		bwt->decodeTableGenerated = TRUE;
+	} else {
+		bwt->decodeTable = decodeTable;
+		bwt->decodeTableGenerated = FALSE;
+	}
+
+	bwt->saValueOnBoundary = NULL;
+	if (saValueFile == NULL) {
+		bwt->saInterval = ALL_ONE_MASK;
+		bwt->saValueSize = 0;
+		bwt->saValue = NULL;
+	} else {
+		fread(&tmp, sizeof(unsigned int), 1, saValueFile);
+		if (tmp != bwt->inverseSa0) {
+			fprintf(stderr, "BWTLoad(): SaValue inverseSa0 not match!\n");
+			exit(1);
+		}
+		for (i=1; i<=ALPHABET_SIZE; i++) {
+			fread(&tmp, sizeof(unsigned int), 1, saValueFile);
+			if (tmp != bwt->cumulativeFreq[i]) {
+				fprintf(stderr, "BWTLoad(): SaValue cumulativeFreq not match!\n");
+				exit(1);
+			}
+		}
+		fread(&bwt->saInterval, sizeof(unsigned int), 1, saValueFile);
+		bwt->saValueSize = (bwt->textLength + bwt->saInterval) / bwt->saInterval * sizeof(unsigned int);
+		bwt->saValue = MMUnitAllocate(bwt->saValueSize);
+		fread(bwt->saValue, 1, bwt->saValueSize, saValueFile);
+		bwt->saValue[0] = (unsigned int)-1;	// Special handling for bwt
+		fclose(saValueFile);
+
+		BWTGenerateSaValueOnBoundary(mmPool, bwt);
+	}
+
+	if (inverseSaFile == NULL) {
+		bwt->inverseSaInterval = ALL_ONE_MASK;
+		bwt->inverseSaSize = 0;
+		bwt->inverseSa = NULL;
+	} else {
+		fread(&tmp, sizeof(unsigned int), 1, inverseSaFile);
+		if (tmp != bwt->inverseSa0) {
+			fprintf(stderr, "BWTLoad(): InverseSaValue inverseSa0 not match!\n");
+			exit(1);
+		}
+		for (i=1; i<=ALPHABET_SIZE; i++) {
+			fread(&tmp, sizeof(unsigned int), 1, inverseSaFile);
+			if (tmp != bwt->cumulativeFreq[i]) {
+				fprintf(stderr, "BWTLoad(): InverseSaValue cumulativeFreq not match!\n");
+				exit(1);
+			}
+		}
+		fread(&bwt->inverseSaInterval, sizeof(unsigned int), 1, inverseSaFile);
+		bwt->inverseSaSize = (bwt->textLength + bwt->inverseSaInterval) / bwt->inverseSaInterval * sizeof(unsigned int);
+		bwt->inverseSa = MMUnitAllocate(bwt->inverseSaSize);
+		fread(bwt->inverseSa, 1, bwt->inverseSaSize, inverseSaFile);
+		fclose(inverseSaFile);
+	}
+
+	// Load Sa index range
+	if (saIndexRangeFile == NULL) {
+		bwt->saIndexRange = NULL;
+		bwt->saIndexRangeNumOfChar = 0;
+		bwt->saIndexRangeSize = 0;
+	} else {
+		fread(&tmp, sizeof(unsigned int), 1, saIndexRangeFile);
+		if (tmp != bwt->inverseSa0) {
+			fprintf(stderr, "BWTLoad(): SaIndex inverseSa0 not match!\n");
+			exit(1);
+		}
+		for (i=1; i<=ALPHABET_SIZE; i++) {
+			fread(&tmp, sizeof(unsigned int), 1, saIndexRangeFile);
+			if (tmp != bwt->cumulativeFreq[i]) {
+				fprintf(stderr, "BWTLoad(): SaIndex cumulativeFreq not match!\n");
+				exit(1);
+			}
+		}
+		fread(&bwt->saIndexRangeNumOfChar, sizeof(unsigned int), 1, saIndexRangeFile);
+		numOfSaIndexRange = 1 << (bwt->saIndexRangeNumOfChar * 2);	// 4^saIndexRangeNumOfChar
+		bwt->saIndexRange = MMUnitAllocate(numOfSaIndexRange * sizeof(SaIndexRange));
+		fread(bwt->saIndexRange, sizeof(SaIndexRange), numOfSaIndexRange, saIndexRangeFile);
+		bwt->saIndexRangeSize = numOfSaIndexRange * sizeof(SaIndexRange);
+		fclose(saIndexRangeFile);
+	}
+
+	return bwt;
+
+}
+
+
+void BWTFree(MMPool *mmPool, BWT *bwt) {
+
+	MMPoolReturn(mmPool, bwt->cumulativeFreq, ALPHABET_SIZE * sizeof(unsigned int));
+	MMUnitFree(bwt->bwtCode, bwt->bwtSizeInWord * sizeof(unsigned int));
+
+	if (bwt->occValue != NULL) {
+		MMUnitFree(bwt->occValue, bwt->occSizeInWord * sizeof(unsigned int));
+	}
+	if (bwt->occValueMajor != NULL) {
+		MMUnitFree(bwt->occValueMajor, bwt->occMajorSizeInWord * sizeof(unsigned int));
+	}
+
+	if (bwt->saValue != NULL) {
+		MMUnitFree(bwt->saValue, bwt->saValueSize);
+	}
+	if (bwt->inverseSa != NULL) {
+		MMUnitFree(bwt->inverseSa, bwt->inverseSaSize);
+	}
+
+	if (bwt->decodeTableGenerated == TRUE) {
+		MMUnitFree(bwt->decodeTable, DNA_OCC_CNT_TABLE_SIZE_IN_WORD * sizeof(unsigned int));
+	}
+
+	if (bwt->saIndexRange != NULL) {
+		MMUnitFree(bwt->saIndexRange, bwt->saIndexRangeSize);
+	}
+
+	if (bwt->saValueOnBoundary != NULL) {
+		MMPoolReturn(mmPool, bwt->saValueOnBoundary, sizeof(unsigned int) * 2 * ALPHABET_SIZE);
+	}
+
+	MMPoolReturn(mmPool, bwt, sizeof(BWT));
+
+}
+/*
+void BWTPrintMemoryUsage(const BWT *bwt, FILE *output, const unsigned int packedDNASize) {
+
+	unsigned int totalMemorySize;
+
+	fprintf(output, "BWT code size    : %u\n", bwt->bwtSizeInWord * sizeof(unsigned int));
+	fprintf(output, "Occ value size   : %u\n", (bwt->occSizeInWord + bwt->occMajorSizeInWord) * sizeof(unsigned int));
+	if (bwt->saValueSize > 0) {
+		fprintf(output, "SA value size    : %u\n", bwt->saValueSize);
+	}
+	if (bwt->inverseSaSize > 0) {
+		fprintf(output, "Inverse SA size  : %u\n", bwt->inverseSaSize);
+	}
+	if (bwt->saIndexRange > 0) {
+		fprintf(output, "SA index rangee  : %u\n", bwt->saIndexRangeSize);
+	}
+	if (packedDNASize > 0) {
+		fprintf(output, "Packed DNA size  : %u\n", packedDNASize);
+	}
+	
+	totalMemorySize = (bwt->bwtSizeInWord + bwt->occSizeInWord + bwt->occMajorSizeInWord) * sizeof(unsigned int)
+					   + bwt->saValueSize + bwt->inverseSaSize + bwt->saIndexRangeSize + packedDNASize;
+	fprintf(output, "Total memory     : %u\n", totalMemorySize);
+	fprintf(output, "Bit per char     : %.2f\n", 
+			(float)totalMemorySize / ((float)bwt->textLength / BITS_IN_BYTE));
+
+}
+//*/
+void BWTGenerateSaValueOnBoundary(MMPool *mmPool, BWT *bwt) {
+
+	unsigned int i;
+
+	if (bwt->saValueOnBoundary == NULL) {
+		bwt->saValueOnBoundary = MMPoolDispatch(mmPool, sizeof(unsigned int) * 2 * ALPHABET_SIZE);
+	}
+
+	for (i=0; i<ALPHABET_SIZE; i++) {
+		bwt->saValueOnBoundary[i * 2 + 1] = BWTSaValue(bwt, bwt->cumulativeFreq[i + 1]);
+		if (bwt->cumulativeFreq[i] < bwt->textLength) {
+			bwt->saValueOnBoundary[i * 2] = BWTSaValue(bwt, bwt->cumulativeFreq[i] + 1);
+		} else {
+			bwt->saValueOnBoundary[i * 2] = bwt->saValueOnBoundary[i * 2 + 1];
+		}
+	}
+
+}
+
+// Ordering of index1 and index2 is not important; this module will handle the ordering
+// index1 and index2 can be on the same aligned 128 bit region or can be on adjacant aligned 128 bit region
+// If index1 and index2 are in the same aligned 128 bit region, one of them must be on the boundary
+// These requirements are to reduce the no. of branches in the program flow
+
+unsigned int BWTDecode(const BWT *bwt, const unsigned int index1, const unsigned int index2, const unsigned int character) {
+
+	unsigned int numChar1, numChar2, minIndex, maxIndex, minIndex128, maxIndex128;
+	unsigned int r;
+
+	const static unsigned int ALIGN_16 partitionOne1[4]  = { 47, 31, 15, 0 };
+	const static unsigned int ALIGN_16 partitionOne2[4]  = { 0, 15, 31, 47 };
+	const static unsigned int ALIGN_16 partitionZero1[4]  = { 63, 47, 31, 15 };
+	const static unsigned int ALIGN_16 partitionZero2[4]  = { 15, 31, 47, 63 };
+
+	// SSE registers
+	__m128i r1e, r2e;
+	__m128i mcl;
+	__m128i m0, m1;
+	__m128i r1a, r1b, r1c;
+	__m128i r2a, r2b, r2c;
+
+	// Sort index1 and index2
+	r = (index1 - index2) & -(index1 < index2);
+	minIndex = index2 + r;
+	maxIndex = index1 - r;
+
+	// Locate 128 bit boundary
+	minIndex128 = lastAlignedBoundary(minIndex, CHAR_PER_128);
+	maxIndex128 = lastAlignedBoundary(maxIndex - (maxIndex - minIndex > CHAR_PER_128), CHAR_PER_128);
+
+	// Determine no.of characters to count
+	numChar1 = maxIndex128 - minIndex;
+	numChar2 = maxIndex - maxIndex128;
+
+	// Load encoding into register here in the hope of hiding some memory latency
+	r1e = _mm_load_si128((__m128i *)(bwt->bwtCode + minIndex128 / CHAR_PER_WORD));	// Load encoding into register
+	r2e = _mm_load_si128((__m128i *)(bwt->bwtCode + maxIndex128 / CHAR_PER_WORD));	// Load encoding into register
+
+	// Set character extraction masks 
+	m0 = _mm_set1_epi32(0xFFFFFFFF + (character & 1));	// Character selection mask for even bits
+	m1 = _mm_set1_epi32(0xFFFFFFFF + (character >> 1));	// Character selection mask for odd bits
+	mcl = _mm_set1_epi32(0x55555555);					// Set bit-clearing mask to 0x55555555....(alternate 1-bit)
+
+	// Set counting mask for 2 x 128 bits
+
+	r1a = _mm_set1_epi32(numChar1);		// Load number of characters into register
+	r2a = _mm_set1_epi32(numChar2);		// Load number of characters into register
+
+	r1b = _mm_load_si128((__m128i*)partitionOne1);	// Load partition into register
+	r2b = _mm_load_si128((__m128i*)partitionOne2);	// Load partition into register
+
+	r1c = _mm_load_si128((__m128i*)partitionZero1);	// Load partition into register
+	r2c = _mm_load_si128((__m128i*)partitionZero2);	// Load partition into register
+
+	r1b = _mm_cmpgt_epi32(r1a, r1b);				// Compare to generate 4x32 bit mask; the word with counting boundary is all ones
+	r2b = _mm_cmpgt_epi32(r2a, r2b);				// Compare to generate 4x32 bit mask; the word with counting boundary is all ones
+				
+	r1c = _mm_cmpgt_epi32(r1a, r1c);				// Compare to generate 4x32 bit mask; the word with counting boundary is all zeros
+	r2c = _mm_cmpgt_epi32(r2a, r2c);				// Compare to generate 4x32 bit mask; the word with counting boundary is all zeros
+
+	r1b = _mm_srli_epi32(r1b, (16 - numChar1 % 16) * 2);	// Shift bits so that all word comform to the requirement of counting the word with counting boundary 
+	r2b = _mm_slli_epi32(r2b, (16 - numChar2 % 16) * 2);	// Shift bits so that all word comform to the requirement of counting the word with counting boundary
+
+	r1c = _mm_or_si128(r1b, r1c);	// Combine two masks
+	r2c = _mm_or_si128(r2b, r2c);	// Combine two masks
+
+	r1c = _mm_and_si128(r1c, mcl);	// Combine with bit-clearing mask (now = 0x55555555....)
+	r2c = _mm_and_si128(r2c, mcl);	// Combine with bit-clearing mask (now = 0x55555555....)
+
+	// Start counting; encoding has been loaded into register earlier
+
+	r1b = _mm_srli_epi32(r1e, 1);	// Shift encoding to right by 1 bit
+	r2b = _mm_srli_epi32(r2e, 1);	// Shift encoding to right by 1 bit
+
+	r1a = _mm_xor_si128(r1e, m0);	// Check even-bits with mask
+	r2a = _mm_xor_si128(r2e, m0);	// Check even-bits with mask
+
+	r1b = _mm_xor_si128(r1b, m1);	// Check odd-bits with mask
+	r2b = _mm_xor_si128(r2b, m1);	// Check odd-bits with mask
+
+	r1a = _mm_and_si128(r1a, r1b);	// Combine even and odd bits
+	r2a = _mm_and_si128(r2a, r2b);	// Combine even and odd bits
+
+	r1a = _mm_and_si128(r1a, r1c);	// Combine with counting mask, which has been combined with bit-clearing mask of 0x55555555.... 
+	r2a = _mm_and_si128(r2a, r2c);	// Combine with counting mask, which has been combined with bit-clearing mask of 0x55555555.... 
+
+	// Combine 2 x 128 bits and continue counting
+
+	r1a = _mm_add_epi32(r1a, r2a);		// Combine 2 x 128 bits by adding them together
+
+	mcl = _mm_set1_epi32(0x33333333);	// Set bit-clearing mask to 0x33333333....(alternate 2-bits)
+
+	r1b = _mm_srli_epi32(r1a, 2);		// Shift intermediate result to right by 2 bit
+	r1a = _mm_and_si128(r1a, mcl);		// Clear alternate 2-bits of intermediate result by combining with bit-clearing mask (now = 0x33333333....)
+	r1b = _mm_and_si128(r1b, mcl);		// Clear alternate 2-bits of shifted intermediate result by combining with bit-clearing mask (now = 0x33333333....)
+	r1a = _mm_add_epi32(r1a, r1b);		// Combine shifted and non-shifted intermediate results by adding them together
+
+	mcl = _mm_set1_epi32(0x0F0F0F0F);	// Set bit-clearing mask to 0x0F0F0F0F....(alternate 4-bits)
+	m0 = _mm_setzero_si128();			// Set an all-zero mask
+
+	r1b = _mm_srli_epi32(r1a, 4);		// Shift intermediate result to right by 2 bit
+	r1a = _mm_add_epi32(r1a, r1b);		// Combine shifted and non-shifted intermediate results by adding them together
+	r1a = _mm_and_si128(r1a, mcl);		// Clear alternate 4-bits of intermediate result by combining with bit-clearing mask (now = 0xOFOFOFOF....)
+
+	r1a = _mm_sad_epu8(r1a, m0);		// Treating the 128 bit as 16 x 8 bit; summing up the 1st 8 x 8 bit into 1st 64-bit and 2nd 8 x 8 bit into 2nd 64-bit
+
+	return _mm_extract_epi16(r1a, 0) + _mm_extract_epi16(r1a, 4);	// Extract and return result from register
+
+}
+
+// Ordering of index1 and index2 is not important; this module will handle the ordering
+// index1 and index2 can be on the same aligned 128 bit region or can be on adjacant aligned 128 bit region
+// If index1 and index2 are in the same aligned 128 bit region, one of them must be on the boundary
+// These requirements are to reduce the no. of branches in the program flow
+
+void BWTDecodeAll(const BWT *bwt, const unsigned int index1, const unsigned int index2, unsigned int* __restrict occValue) {
+
+	unsigned int numChar1, numChar2, minIndex, maxIndex, minIndex128, maxIndex128;
+	unsigned int r;
+
+	const static unsigned int ALIGN_16 partitionOne1[4]  = { 47, 31, 15, 0 };
+	const static unsigned int ALIGN_16 partitionOne2[4]  = { 0, 15, 31, 47 };
+	const static unsigned int ALIGN_16 partitionZero1[4]  = { 63, 47, 31, 15 };
+	const static unsigned int ALIGN_16 partitionZero2[4]  = { 15, 31, 47, 63 };
+
+	// SSE registers
+	__m128i r1e, r2e;
+	__m128i mcl;
+	__m128i rc, rg, rt;
+	__m128i ra1, ra2;
+	__m128i rc1, rc2;
+	__m128i rg1, rg2;
+	__m128i rt1, rt2;
+
+
+	// Sort index1 and index2
+	r = (index1 - index2) & -(index1 < index2);
+	minIndex = index2 + r;
+	maxIndex = index1 - r;
+
+	// Locate 128 bit boundary
+	minIndex128 = lastAlignedBoundary(minIndex, CHAR_PER_128);
+	maxIndex128 = lastAlignedBoundary(maxIndex - (maxIndex - minIndex > CHAR_PER_128), CHAR_PER_128);
+
+	// Determine no.of characters to count
+	numChar1 = maxIndex128 - minIndex;
+	numChar2 = maxIndex - maxIndex128;
+
+	// Load encoding into register here in the hope of hiding some memory latency
+	r1e = _mm_load_si128((__m128i *)(bwt->bwtCode + minIndex128 / CHAR_PER_WORD));	// Load encoding into register
+	r2e = _mm_load_si128((__m128i *)(bwt->bwtCode + maxIndex128 / CHAR_PER_WORD));	// Load encoding into register
+
+	// Set character extraction masks 
+	mcl = _mm_set1_epi32(0x55555555);						// Set bit-clearing mask to 0x55555555....(alternate 1-bit)
+
+	// Set counting mask for 2 x 128 bits
+
+	ra1 = _mm_set1_epi32(numChar1);		// Load number of characters into register
+	ra2 = _mm_set1_epi32(numChar2);		// Load number of characters into register
+
+	rc1 = _mm_load_si128((__m128i*)partitionOne1);	// Load partition into register
+	rc2 = _mm_load_si128((__m128i*)partitionOne2);	// Load partition into register
+
+	rg1 = _mm_load_si128((__m128i*)partitionZero1);	// Load partition into register
+	rg2 = _mm_load_si128((__m128i*)partitionZero2);	// Load partition into register
+
+	rc1 = _mm_cmpgt_epi32(ra1, rc1);				// Compare to generate 4x32 bit mask; the word with counting boundary is all ones
+	rc2 = _mm_cmpgt_epi32(ra2, rc2);				// Compare to generate 4x32 bit mask; the word with counting boundary is all ones
+
+	rg1 = _mm_cmpgt_epi32(ra1, rg1);				// Compare to generate 4x32 bit mask; the word with counting boundary is all zeros
+	rg2 = _mm_cmpgt_epi32(ra2, rg2);				// Compare to generate 4x32 bit mask; the word with counting boundary is all zeros
+
+	rc1 = _mm_srli_epi32(rc1, (16 - numChar1 % 16) * 2);	// Shift bits so that all word comform to the requirement of counting the word with counting boundary 
+	rc2 = _mm_slli_epi32(rc2, (16 - numChar2 % 16) * 2);	// Shift bits so that all word comform to the requirement of counting the word with counting boundary
+
+	ra1 = _mm_or_si128(rc1, rg1);	// Combine two masks
+	ra2 = _mm_or_si128(rc2, rg2);	// Combine two masks
+
+	// Start counting; encoding has been loaded into register earlier
+	r1e = _mm_and_si128(r1e, ra1);	// Combine encoding with counting mask
+	r2e = _mm_and_si128(r2e, ra2);	// Combine encoding with counting mask
+
+	// ra1, ra2, rc1, rc2, rg1, rg2, rt1, rt2 all retired
+
+	// Shift and combine with character selection mask
+
+	ra1 = _mm_srli_epi32(r1e, 1);	// Shift encoding to right by 1 bit
+	ra2 = _mm_srli_epi32(r2e, 1);	// Shift encoding to right by 1 bit
+
+	rt1 = _mm_and_si128(r1e, mcl);	// Check even-bits = '1'
+	rt2 = _mm_and_si128(r2e, mcl);	// Check even-bits = '1'
+
+	rg1 = _mm_and_si128(ra1, mcl);	// Check odd-bits = '1'
+	rg2 = _mm_and_si128(ra2, mcl);	// Check odd-bits = '1'
+
+	rc1 = _mm_andnot_si128(r1e, mcl);	// Check even-bits = '0'
+	rc2 = _mm_andnot_si128(r2e, mcl);	// Check even-bits = '0'
+
+	ra1 = _mm_andnot_si128(ra1, mcl);	// Check odd-bits = '0'
+	ra2 = _mm_andnot_si128(ra2, mcl);	// Check odd-bits = '0'
+
+	// r1e, r2e retired
+
+	// Count for 'c' 'g' 't'
+
+	r1e = _mm_and_si128(ra1, rt1);		// Combine even and odd bits
+	r2e = _mm_and_si128(ra2, rt2);		// Combine even and odd bits
+	ra1 = _mm_and_si128(rg1, rc1);		// Combine even and odd bits
+	ra2 = _mm_and_si128(rg2, rc2);		// Combine even and odd bits
+	rc1 = _mm_and_si128(rg1, rt1);		// Combine even and odd bits
+	rc2 = _mm_and_si128(rg2, rt2);		// Combine even and odd bits
+
+	rc = _mm_add_epi32(r1e, r2e);		// Combine 2 x 128 bits by adding them together
+	rg = _mm_add_epi32(ra1, ra2);		// Combine 2 x 128 bits by adding them together
+	rt = _mm_add_epi32(rc1, rc2);		// Combine 2 x 128 bits by adding them together
+
+	// All except rc, rg, rt retired
+
+	// Continue counting rc, rg, rt
+
+	mcl = _mm_set1_epi32(0x33333333);	// Set bit-clearing mask to 0x33333333....(alternate 2-bits)
+
+	rc1 = _mm_srli_epi32(rc, 2);		// Shift intermediate result to right by 2 bit
+	rg1 = _mm_srli_epi32(rg, 2);		// Shift intermediate result to right by 2 bit
+	rt1 = _mm_srli_epi32(rt, 2);		// Shift intermediate result to right by 2 bit
+
+	rc2 = _mm_and_si128(rc, mcl);		// Clear alternate 2-bits of intermediate result by combining with bit-clearing mask (now = 0x33333333....)
+	rg2 = _mm_and_si128(rg, mcl);		// Clear alternate 2-bits of intermediate result by combining with bit-clearing mask (now = 0x33333333....)
+	rt2 = _mm_and_si128(rt, mcl);		// Clear alternate 2-bits of intermediate result by combining with bit-clearing mask (now = 0x33333333....)
+
+	rc1 = _mm_and_si128(rc1, mcl);		// Clear alternate 2-bits of shifted intermediate result by combining with bit-clearing mask (now = 0x33333333....)
+	rg1 = _mm_and_si128(rg1, mcl);		// Clear alternate 2-bits of shifted intermediate result by combining with bit-clearing mask (now = 0x33333333....)
+	rt1 = _mm_and_si128(rt1, mcl);		// Clear alternate 2-bits of shifted intermediate result by combining with bit-clearing mask (now = 0x33333333....)
+
+	rc = _mm_add_epi32(rc1, rc2);		// Combine shifted and non-shifted intermediate results by adding them together
+	rg = _mm_add_epi32(rg1, rg2);		// Combine shifted and non-shifted intermediate results by adding them together
+	rt = _mm_add_epi32(rt1, rt2);		// Combine shifted and non-shifted intermediate results by adding them together
+
+	mcl = _mm_set1_epi32(0x0F0F0F0F);	// Set bit-clearing mask to 0x0F0F0F0F....(alternate 4-bits)
+	r1e = _mm_setzero_si128();			// Set an all-zero mask
+
+	rc1 = _mm_srli_epi32(rc, 4);		// Shift intermediate result to right by 2 bit
+	rg1 = _mm_srli_epi32(rg, 4);		// Shift intermediate result to right by 2 bit
+	rt1 = _mm_srli_epi32(rt, 4);		// Shift intermediate result to right by 2 bit
+
+	rc2 = _mm_add_epi32(rc, rc1);		// Combine shifted and non-shifted intermediate results by adding them together
+	rg2 = _mm_add_epi32(rg, rg1);		// Combine shifted and non-shifted intermediate results by adding them together
+	rt2 = _mm_add_epi32(rt, rt1);		// Combine shifted and non-shifted intermediate results by adding them together
+
+	rc = _mm_and_si128(rc2, mcl);		// Clear alternate 4-bits of intermediate result by combining with bit-clearing mask (now = 0xOFOFOFOF....)
+	rg = _mm_and_si128(rg2, mcl);		// Clear alternate 4-bits of intermediate result by combining with bit-clearing mask (now = 0xOFOFOFOF....)
+	rt = _mm_and_si128(rt2, mcl);		// Clear alternate 4-bits of intermediate result by combining with bit-clearing mask (now = 0xOFOFOFOF....)
+
+	rc = _mm_sad_epu8(rc, r1e);			// Treating the 128 bit as 16 x 8 bit; summing up the 1st 8 x 8 bit into 1st 64-bit and 2nd 8 x 8 bit into 2nd 64-bit
+	rg = _mm_sad_epu8(rg, r1e);			// Treating the 128 bit as 16 x 8 bit; summing up the 1st 8 x 8 bit into 1st 64-bit and 2nd 8 x 8 bit into 2nd 64-bit
+	rt = _mm_sad_epu8(rt, r1e);			// Treating the 128 bit as 16 x 8 bit; summing up the 1st 8 x 8 bit into 1st 64-bit and 2nd 8 x 8 bit into 2nd 64-bit
+
+	occValue[1] = _mm_extract_epi16(rc, 0) + _mm_extract_epi16(rc, 4);	// Extract result from register and store into variable
+	occValue[2] = _mm_extract_epi16(rg, 0) + _mm_extract_epi16(rg, 4);	// Extract result from register and store into variable
+	occValue[3] = _mm_extract_epi16(rt, 0) + _mm_extract_epi16(rt, 4);	// Extract result from register and store into variable
+	occValue[0] = maxIndex - minIndex - occValue[1] - occValue[2] - occValue[3];
+
+}
+
+
+unsigned int BWTOccValue(const BWT *bwt, unsigned int index, const unsigned int character) {
+
+	unsigned int occValue, decodeValue;
+	unsigned int occExplicitIndex, occIndex;
+	unsigned int r;
+
+	// $ is supposed to be positioned at inverseSa0 but it is not encoded
+	// therefore index is subtracted by 1 for adjustment
+	index -= (index > bwt->inverseSa0);
+
+#ifdef DEBUG
+	if (index > bwt->textLength) {
+		fprintf(stderr, "BWTOccValue() : index > textLength!\n");
+		exit(1);
+	}
+#endif
+
+	occExplicitIndex = (index + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL;	// Bidirectional encoding
+	occIndex = occExplicitIndex * OCC_INTERVAL;
+
+	//_mm_prefetch((char*)(memory + address[j+1]), _MM_HINT_NTA);
+
+
+	occValue = BWTOccValueExplicit(bwt, occExplicitIndex, character);
+#ifdef DEBUG
+	if (occValue > occIndex) {
+		fprintf(stderr, "BWTOccValue() : occValueExplicit > occIndex!\n");
+		exit(1);
+	}
+#endif
+
+	if (occIndex != index) {
+		decodeValue = BWTDecode(bwt, occIndex, index, character);
+		r = -(occIndex > index);
+		return occValue + (decodeValue & ~r) - (decodeValue & r);
+	} else {
+		return occValue;
+	}
+
+}
+
+void BWTOccValueTwoIndex(const BWT *bwt, unsigned int index1, unsigned int index2, const unsigned int character, unsigned int* __restrict occValue) {
+
+	unsigned int decodeValue, tempExplicit1, tempExplicit2, tempOccValue1, tempOccValue2;
+	unsigned int occExplicitIndex1, occIndex1;
+	unsigned int occExplicitIndex2, occIndex2;
+	unsigned int r;
+
+	// $ is supposed to be positioned at inverseSa0 but it is not encoded
+	// therefore index is subtracted by 1 for adjustment
+	index1 -= (index1 > bwt->inverseSa0);
+	index2 -= (index2 > bwt->inverseSa0);
+
+#ifdef DEBUG
+	if (index1 > bwt->textLength) {
+		fprintf(stderr, "BWTOccValueTwoIndex() : index1 > textLength!\n");
+		exit(1);
+	}
+	if (index2 > bwt->textLength) {
+		fprintf(stderr, "BWTOccValueTwoIndex() : index2 > textLength!\n");
+		exit(1);
+	}
+#endif
+
+	// Pre-fetch memory to be accessed
+	BWTPrefetchBWT(bwt, index1);
+	BWTPrefetchBWT(bwt, index2);
+
+	occExplicitIndex1 = (index1 + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL;	// Bidirectional encoding
+	occIndex1 = occExplicitIndex1 * OCC_INTERVAL;
+	occExplicitIndex2 = (index2 + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL;	// Bidirectional encoding
+	occIndex2 = occExplicitIndex2 * OCC_INTERVAL;
+
+	// Pre-fetch memory to be accessed
+	BWTPrefetchOccValueExplicit(bwt, occExplicitIndex1);
+	BWTPrefetchOccValueExplicit(bwt, occExplicitIndex2);
+
+
+	if (occIndex1 != index1) {
+		decodeValue = BWTDecode(bwt, occIndex1, index1, character);
+		r = -(occIndex1 > index1);
+		tempOccValue1 = (decodeValue & ~r) - (decodeValue & r);
+	} else {
+		tempOccValue1 = 0;
+	}
+
+	if (occIndex2 != index2) {
+		decodeValue = BWTDecode(bwt, occIndex2, index2, character);
+		r = -(occIndex2 > index2);
+		tempOccValue2 = (decodeValue & ~r) - (decodeValue & r);
+	} else {
+		tempOccValue2 = 0;
+	}
+
+	tempExplicit1 = BWTOccValueExplicit(bwt, occExplicitIndex1, character);
+	tempExplicit2 = BWTOccValueExplicit(bwt, occExplicitIndex2, character);
+#ifdef DEBUG
+	if (tempExplicit1 > occIndex1) {
+		fprintf(stderr, "BWTOccValueTwoIndex() : occValueExplicit1 > occIndex1!\n");
+		exit(1);
+	}
+	if (tempExplicit2 > occIndex2) {
+		fprintf(stderr, "BWTOccValueTwoIndex() : occValueExplicit2 > occIndex2!\n");
+		exit(1);
+	}
+#endif
+
+	occValue[0] = tempOccValue1 + tempExplicit1;
+	occValue[1] = tempOccValue2 + tempExplicit2;
+
+}
+
+
+void BWTAllOccValue(const BWT *bwt, unsigned int index, unsigned int* __restrict occValue) {
+
+	unsigned int occExplicitIndex, occIndex;
+	unsigned int ALIGN_16 tempOccValue[ALPHABET_SIZE];
+	unsigned int r;
+
+	// SSE registers
+	__m128i rtov, rov, rc, t1, t2;
+
+	// $ is supposed to be positioned at inverseSa0 but it is not encoded
+	// therefore index is subtracted by 1 for adjustment
+	index -= (index > bwt->inverseSa0);
+
+#ifdef DEBUG
+	if (index > bwt->textLength) {
+		fprintf(stderr, "BWTOccValue() : index > textLength!\n");
+		exit(1);
+	}
+#endif
+
+	occExplicitIndex = (index + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL;	// Bidirectional encoding
+	occIndex = occExplicitIndex * OCC_INTERVAL;
+
+	BWTAllOccValueExplicit(bwt, occExplicitIndex, occValue);
+
+	if (occIndex != index) {
+
+		BWTDecodeAll(bwt, occIndex, index, tempOccValue);
+
+		// The following code add tempOccvalue to occValue if index > occIndex and subtract tempOccValue from occValue if occIndex > index
+		r = -(occIndex > index);
+		rc = _mm_set1_epi32(r);				// Set rc = r r r r
+		rtov = _mm_load_si128((__m128i*)tempOccValue);
+		rov = _mm_load_si128((__m128i*)occValue);
+		t1 = _mm_andnot_si128(rc, rtov);
+		t2 = _mm_and_si128(rc, rtov);
+		rov = _mm_add_epi32(rov, t1);
+		rov = _mm_sub_epi32(rov, t2);
+		_mm_store_si128((__m128i*)occValue, rov);
+
+	} else {
+		return;
+	}
+
+}
+
+void BWTAllOccValueTwoIndex(const BWT *bwt, unsigned int index1, unsigned int index2, unsigned int* __restrict occValue1, unsigned int* __restrict occValue2) {
+
+	unsigned int occExplicitIndex1, occIndex1;
+	unsigned int occExplicitIndex2, occIndex2;
+	unsigned int ALIGN_16 tempOccValue1[ALPHABET_SIZE];
+	unsigned int ALIGN_16 tempOccValue2[ALPHABET_SIZE];
+	unsigned int r;
+
+	// SSE registers
+	__m128i rtov, rc, t1, t2, o1, o2;
+
+	// $ is supposed to be positioned at inverseSa0 but it is not encoded
+	// therefore index is subtracted by 1 for adjustment
+	index1 -= (index1 > bwt->inverseSa0);
+	index2 -= (index2 > bwt->inverseSa0);
+
+#ifdef DEBUG
+	if (index1 > index2) {
+		fprintf(stderr, "BWTAllOccValueTwoIndex() : index1 > index2!\n");
+		exit(1);
+	}
+	if (index2 > bwt->textLength) {
+		fprintf(stderr, "BWTAllOccValueTwoIndex() : index2 > textLength!\n");
+		exit(1);
+	}
+#endif
+
+	// Pre-fetch memory to be accessed
+	BWTPrefetchBWT(bwt, index1);
+	BWTPrefetchBWT(bwt, index2);
+
+	occExplicitIndex1 = (index1 + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL;	// Bidirectional encoding
+	occIndex1 = occExplicitIndex1 * OCC_INTERVAL;
+	occExplicitIndex2 = (index2 + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL;	// Bidirectional encoding
+	occIndex2 = occExplicitIndex2 * OCC_INTERVAL;
+
+	// Pre-fetch memory to be accessed
+	BWTPrefetchOccValueExplicit(bwt, occExplicitIndex1);
+	BWTPrefetchOccValueExplicit(bwt, occExplicitIndex2);
+
+	if (occIndex1 != index1) {
+
+		BWTDecodeAll(bwt, occIndex1, index1, tempOccValue1);
+
+		// The following code add tempOccvalue to occValue if index > occIndex and subtract tempOccValue from occValue if occIndex > index
+		r = -(occIndex1 > index1);
+		rtov = _mm_load_si128((__m128i*)tempOccValue1);
+		rc = _mm_set1_epi32(r);				// Set rc = r r r r
+		t1 = _mm_andnot_si128(rc, rtov);
+		t2 = _mm_and_si128(rc, rtov);
+		o1 = _mm_sub_epi32(t1, t2);
+	} else {
+		o1 = _mm_setzero_si128();
+	}
+/*
+	if (occIndex1 != index1) {
+		if (occIndex1 < index1) {
+			ForwardDNAAllOccCount(bwt->bwtCode + occIndex1 / CHAR_PER_WORD, index1 - occIndex1, tempOccValue, bwt->decodeTable);
+			occValue1[0] += tempOccValue[0];
+			occValue1[1] += tempOccValue[1];
+			occValue1[2] += tempOccValue[2];
+			occValue1[3] += tempOccValue[3];
+		} else {
+			BackwardDNAAllOccCount(bwt->bwtCode + occIndex1 / CHAR_PER_WORD, occIndex1 - index1, tempOccValue, bwt->decodeTable);
+			occValue1[0] -= tempOccValue[0];
+			occValue1[1] -= tempOccValue[1];
+			occValue1[2] -= tempOccValue[2];
+			occValue1[3] -= tempOccValue[3];
+		}
+	}
+*/
+	if (occIndex2 != index2) {
+
+		BWTDecodeAll(bwt, occIndex2, index2, tempOccValue2);
+
+		// The following code add tempOccvalue to occValue if index > occIndex and subtract tempOccValue from occValue if occIndex > index
+		r = -(occIndex1 > index2);
+		rc = _mm_set1_epi32(r);				// Set rc = r r r r
+		rtov = _mm_load_si128((__m128i*)tempOccValue2);
+		t1 = _mm_andnot_si128(rc, rtov);
+		t2 = _mm_and_si128(rc, rtov);
+		o2 = _mm_sub_epi32(t1, t2);
+
+	} else {
+		o2 = _mm_setzero_si128();
+	}
+
+	BWTAllOccValueExplicit(bwt, occExplicitIndex1, occValue1);
+	BWTAllOccValueExplicit(bwt, occExplicitIndex2, occValue2);
+
+	t1 = _mm_load_si128((__m128i*)occValue1);
+	t2 = _mm_load_si128((__m128i*)occValue2);
+
+	t1 = _mm_add_epi32(t1, o1);
+	t2 = _mm_add_epi32(t2, o2);
+
+	_mm_store_si128((__m128i*)occValue1, t1);
+	_mm_store_si128((__m128i*)occValue2, t2);
+
+
+
+/*
+	if (occIndex2 != index2) {
+		if (occIndex2 < index2) {
+			ForwardDNAAllOccCount(bwt->bwtCode + occIndex2 / CHAR_PER_WORD, index2 - occIndex2, tempOccValue, bwt->decodeTable);
+			occValue2[0] += tempOccValue[0];
+			occValue2[1] += tempOccValue[1];
+			occValue2[2] += tempOccValue[2];
+			occValue2[3] += tempOccValue[3];
+		} else {
+			BackwardDNAAllOccCount(bwt->bwtCode + occIndex2 / CHAR_PER_WORD, occIndex2 - index2, tempOccValue, bwt->decodeTable);
+			occValue2[0] -= tempOccValue[0];
+			occValue2[1] -= tempOccValue[1];
+			occValue2[2] -= tempOccValue[2];
+			occValue2[3] -= tempOccValue[3];
+		}
+	}
+*/
+
+}
+
+unsigned int BWTOccValueOnSpot(const BWT *bwt, unsigned int index, unsigned int* __restrict character) {
+
+	unsigned int occExplicitIndex, occIndex;
+	unsigned int occValue, decodeValue;
+	unsigned int r;
+
+	// The bwt character before index will be returned and the count will be up to that bwt character
+	#ifdef DEBUG
+	if (index == bwt->inverseSa0 + 1) {
+		fprintf(stderr, "BWTOccValueOnSpot(): index = inverseSa0 + 1!\n");
+		exit(1);
+	}
+	if (index > bwt->textLength + 1) {
+		fprintf(stderr, "BWTOccValueOnSpot() : index > textLength!\n");
+		exit(1);
+	}
+	if (index == 0) {
+		fprintf(stderr, "BWTOccValueOnSpot() : index = 0!\n");
+		exit(1);
+	}
+	#endif
+
+	// $ is supposed to be positioned at inverseSa0 but it is not encoded
+	// therefore index is incremented for adjustment
+	index -= (index > bwt->inverseSa0);
+
+	// Bidirectional encoding
+	occExplicitIndex = (index + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL;
+	occIndex = occExplicitIndex * OCC_INTERVAL;
+
+	*character = bwt->bwtCode[(index - 1) / CHAR_PER_WORD] << (((index - 1) % CHAR_PER_WORD) * BIT_PER_CHAR) >> (BITS_IN_WORD - BIT_PER_CHAR);
+	occValue = BWTOccValueExplicit(bwt, occExplicitIndex, *character);
+
+	if (occIndex != index) {
+		decodeValue = BWTDecode(bwt, occIndex, index, *character);
+		r = -(occIndex > index);
+		return occValue + (decodeValue & ~r) - (decodeValue & r);
+	} else {
+		return occValue;
+	}
+
+}
+
+unsigned int BWTSearchOccValue(const BWT *bwt, const unsigned int character, const unsigned int searchOccValue) {
+
+	unsigned int occValue;
+	unsigned int i,j;
+	unsigned int c;
+	unsigned int bwtPos;
+	unsigned int occExplicitIndexLeft, occExplicitIndexRight, occExplicitIndexMiddle;
+
+	#ifdef DEBUG
+	if (searchOccValue == 0 || searchOccValue > bwt->textLength) {
+		fprintf(stderr, "BWTSearchOccValue() : searchOccValue out of bound!\n");
+		exit(1);
+	}
+	#endif
+
+	// Search Occurrence value
+
+	occExplicitIndexLeft = 0;
+	occExplicitIndexRight = (bwt->textLength + OCC_INTERVAL - 1) / OCC_INTERVAL;
+
+	while (occExplicitIndexLeft + 1 < occExplicitIndexRight) {
+		occExplicitIndexMiddle = average(occExplicitIndexLeft, occExplicitIndexRight);
+		if (searchOccValue > BWTOccValueExplicit(bwt, occExplicitIndexMiddle, character)) {
+			occExplicitIndexLeft = occExplicitIndexMiddle;
+		} else {
+			occExplicitIndexRight = occExplicitIndexMiddle;
+		}
+	}
+
+	// Not tuned for DNA
+	occValue = BWTOccValueExplicit(bwt, occExplicitIndexLeft, character);
+	bwtPos = occExplicitIndexLeft * OCC_INTERVAL / CHAR_PER_WORD;
+
+	for (i=0; i < OCC_INTERVAL / CHAR_PER_WORD; i++) {
+		c = bwt->bwtCode[bwtPos + i];
+		for (j=0; j < CHAR_PER_WORD && occValue < searchOccValue; j++) {
+			if (c >> (BITS_IN_WORD - BIT_PER_CHAR) == character) {
+				occValue++;
+				if (occValue >= searchOccValue) {
+					return occExplicitIndexLeft * OCC_INTERVAL + i * CHAR_PER_WORD + j;
+				}
+			}
+			c <<= BIT_PER_CHAR;
+		}
+	}
+
+	fprintf(stderr, "BWTSearchOccValue() : unexpected error!\n");
+	exit(1);
+
+}
+
+static INLINE unsigned int BWTOccValueExplicit(const BWT *bwt, const unsigned int occIndexExplicit, const unsigned int character) {
+
+	unsigned int occIndexMajor;
+	unsigned int compareMask, shift, mask;
+
+	occIndexMajor = occIndexExplicit * OCC_INTERVAL / OCC_INTERVAL_MAJOR;
+
+	compareMask = (-(occIndexExplicit % OCC_VALUE_PER_WORD == 0));
+	shift = 16 & compareMask;
+	mask = 0x0000FFFF | compareMask;
+
+	return bwt->occValueMajor[occIndexMajor * ALPHABET_SIZE + character] +
+			((bwt->occValue[occIndexExplicit / OCC_VALUE_PER_WORD * ALPHABET_SIZE + character] >> shift) & mask);
+
+}
+
+static INLINE void BWTAllOccValueExplicit(const BWT *bwt, const unsigned int occIndexExplicit, unsigned int* __restrict occValueExplicit) {
+
+	unsigned int occIndexMajor;
+	unsigned int compareMask, shift, mask;
+
+	__m128i v1, v2, m;
+
+	occIndexMajor = occIndexExplicit * OCC_INTERVAL / OCC_INTERVAL_MAJOR;
+
+	compareMask = (-(occIndexExplicit % OCC_VALUE_PER_WORD == 0));
+	shift = 16 & compareMask;
+	mask = 0x0000FFFF | compareMask;
+
+	v2 = _mm_load_si128((__m128i *)(bwt->occValue + occIndexExplicit / OCC_VALUE_PER_WORD * ALPHABET_SIZE));
+	v1 = _mm_load_si128((__m128i *)(bwt->occValueMajor + occIndexMajor * ALPHABET_SIZE));
+
+	m = _mm_set1_epi32(mask);
+
+	v2 = _mm_srli_epi32(v2, shift);
+	v2 = _mm_and_si128(v2, m);
+
+	v1 = _mm_add_epi32(v1, v2);
+
+	_mm_store_si128((__m128i*)occValueExplicit, v1);
+
+}
+
+static INLINE void BWTPrefetchOccValueExplicit(const BWT *bwt, const unsigned int occIndexExplicit) {
+
+	unsigned int occIndexMajor;
+
+	occIndexMajor = occIndexExplicit * OCC_INTERVAL / OCC_INTERVAL_MAJOR;
+
+	_mm_prefetch((char*)(bwt->occValueMajor + occIndexMajor * ALPHABET_SIZE), _MM_HINT_T0);
+	_mm_prefetch((char*)(bwt->occValue + occIndexExplicit / OCC_VALUE_PER_WORD * ALPHABET_SIZE), _MM_HINT_NTA);
+
+}
+
+static INLINE void BWTPrefetchBWT(const BWT *bwt, const unsigned int index) {
+
+	_mm_prefetch((char*)(bwt->bwtCode + index / CHAR_PER_WORD), _MM_HINT_NTA);
+
+}
+
+
+unsigned int BWTResidentSizeInWord(const unsigned int numChar) {
+
+	unsigned int numCharRoundUpToOccInterval;
+
+	// The $ in BWT at the position of inverseSa0 is not encoded
+	numCharRoundUpToOccInterval = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL * OCC_INTERVAL;
+
+	return (numCharRoundUpToOccInterval + CHAR_PER_WORD - 1) / CHAR_PER_WORD;
+
+}
+
+unsigned int BWTFileSizeInWord(const unsigned int numChar) {
+
+	// The $ in BWT at the position of inverseSa0 is not encoded
+	return (numChar + CHAR_PER_WORD - 1) / CHAR_PER_WORD;
+
+}
+
+unsigned int BWTOccValueMinorSizeInWord(const unsigned int numChar) {
+
+	unsigned int numOfOccValue;
+
+	numOfOccValue = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;		// Value at both end for bi-directional encoding
+	return (numOfOccValue + OCC_VALUE_PER_WORD - 1) / OCC_VALUE_PER_WORD * ALPHABET_SIZE;
+
+}
+
+unsigned int BWTOccValueMajorSizeInWord(const unsigned int numChar) {
+
+	unsigned int numOfOccValue;
+	unsigned int numOfOccIntervalPerMajor;
+
+	numOfOccValue = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;				// Value at both end for bi-directional encoding
+	numOfOccIntervalPerMajor = OCC_INTERVAL_MAJOR / OCC_INTERVAL;
+
+	return (numOfOccValue + numOfOccIntervalPerMajor - 1) / numOfOccIntervalPerMajor * ALPHABET_SIZE;
+
+}
+
+void BWTClearTrailingBwtCode(BWT *bwt) {
+
+	unsigned int bwtResidentSizeInWord;
+	unsigned int wordIndex, offset;
+	unsigned int i;
+
+	bwtResidentSizeInWord = BWTResidentSizeInWord(bwt->textLength);
+
+	wordIndex = bwt->textLength / CHAR_PER_WORD;
+	offset = (bwt->textLength - wordIndex * CHAR_PER_WORD) * BIT_PER_CHAR;
+	if (offset > 0) {
+		bwt->bwtCode[wordIndex] = truncateRight(bwt->bwtCode[wordIndex], BITS_IN_WORD - offset);
+	} else {
+		if (wordIndex < bwtResidentSizeInWord) {
+			bwt->bwtCode[wordIndex] = 0;
+		}
+	}
+
+	for (i=wordIndex+1; i<bwtResidentSizeInWord; i++) {
+		bwt->bwtCode[i] = 0;
+	}
+
+}
+
+unsigned int BWTPsiMinusValue(const BWT *bwt, const unsigned int index) {
+
+	unsigned int c;
+	unsigned int occValue;
+
+	#ifdef DEBUG
+	if (index > bwt->textLength) {
+		fprintf(stderr, "BWTPsiMinusValue() : index out of range!\n");
+		exit(1);
+	}
+	#endif
+
+	if (index != bwt->inverseSa0) {
+
+		occValue = BWTOccValueOnSpot(bwt, index + 1, &c);
+		occValue += bwt->cumulativeFreq[c];
+
+		return occValue;
+
+	} else {
+		return 0;
+	}
+
+}
+
+unsigned int BWTPsiPlusValue(const BWT *bwt, const unsigned int index) {
+
+	unsigned int c;
+	unsigned int psiPlusValue;
+
+	#ifdef DEBUG
+	if (index > bwt->textLength) {
+		fprintf(stderr, "BWTPsiPlusValue() : index out of range!\n");
+		exit(1);
+	}
+	#endif
+
+	if (index == 0) {
+		return bwt->inverseSa0;
+	}
+
+	// Find the BWT of PSI+
+	c = (index > bwt->cumulativeFreq[1]) + (index > bwt->cumulativeFreq[2])
+										 + (index > bwt->cumulativeFreq[3]);
+
+	psiPlusValue = BWTSearchOccValue(bwt, c, index - bwt->cumulativeFreq[c]);
+	if (psiPlusValue >= bwt->inverseSa0) {
+		psiPlusValue++;
+	}
+	return psiPlusValue;
+
+}
+
+unsigned int BWTSaValue(const BWT *bwt, unsigned int saIndex) {
+
+	unsigned int saValueSkipped = 0;
+
+	#ifdef DEBUG
+	if (saIndex > bwt->textLength) {
+		fprintf(stderr, "BWTSaValue() : Index out of range!\n");
+		exit(1);
+	}
+	if (bwt->saValue == NULL) {
+		fprintf(stderr, "BWTSaValue() : Explicit SA value is not loaded!\n");
+		exit(1);
+	}
+	#endif
+
+	while (saIndex % bwt->saInterval != 0) {
+		saValueSkipped++;
+		saIndex = BWTPsiMinusValue(bwt, saIndex);
+	}
+
+	#ifdef DEBUG
+	if (bwt->saValue[saIndex/bwt->saInterval] + saValueSkipped > bwt->textLength) {
+		fprintf(stderr, "BWTSaValue() : saValue out of range!\n");
+		exit(1);
+	}
+	#endif
+
+	// SA[0] stores -1 although it should be textLength
+	// PsiMinusValue returns 0 on inverseSa0
+	return bwt->saValue[saIndex/bwt->saInterval] + saValueSkipped;
+
+}
+
+unsigned int BWTInverseSa(const BWT *bwt, unsigned int saValue) {
+
+	unsigned int i;
+	unsigned int saIndex;
+	unsigned int inverseSaExplicitIndex;
+	unsigned int saValueToSkip;
+
+	#ifdef DEBUG
+	if (saValue > bwt->textLength) {
+		fprintf(stderr, "BWTInverseSa() : Index out of range!\n");
+		exit(1);
+	}
+	if (bwt->inverseSa == NULL) {
+		fprintf(stderr, "BWTInverseSa() : Explicit inverse SA is not loaded!\n");
+		exit(1);
+	}
+	#endif
+
+	inverseSaExplicitIndex = (saValue + bwt->inverseSaInterval - 1) / bwt->inverseSaInterval;
+	if (inverseSaExplicitIndex * bwt->inverseSaInterval > bwt->textLength) {
+		saIndex = 0;
+		saValueToSkip = bwt->textLength - saValue;
+	} else {
+		saIndex = bwt->inverseSa[inverseSaExplicitIndex];
+		saValueToSkip = inverseSaExplicitIndex * bwt->inverseSaInterval - saValue;
+	}
+
+	for (i=0; i<saValueToSkip; i++) {
+		saIndex = BWTPsiMinusValue(bwt, saIndex);
+	}
+
+	return saIndex;
+
+}
+
+static INLINE unsigned int BWTGetWordPackedText(const unsigned int *packedText, const unsigned int index, const unsigned int shift, const unsigned int numOfBit) {
+
+	unsigned int text;
+	const static unsigned int mask[32] = { 0x00000000, 0x80000000, 0xC0000000, 0xE0000000,
+								  0xF0000000, 0xF8000000, 0xFC000000, 0xFE000000,
+								  0xFF000000, 0xFF800000, 0xFFC00000, 0xFFE00000,
+								  0xFFF00000, 0xFFF80000, 0xFFFC0000, 0xFFFE0000,
+								  0xFFFF0000, 0xFFFF8000, 0xFFFFC000, 0xFFFFE000,
+								  0xFFFFF000, 0xFFFFF800, 0xFFFFFC00, 0xFFFFFE00,
+								  0xFFFFFF00, 0xFFFFFF80, 0xFFFFFFC0, 0xFFFFFFE0,
+								  0xFFFFFFF0, 0xFFFFFFF8, 0xFFFFFFFC, 0xFFFFFFFE };
+
+	if (shift > 0) {
+		// packedText should be allocated with at least 1 Word buffer initialized to zero
+		text = (packedText[index] << shift) | (packedText[index + 1] >> (BITS_IN_WORD - shift));
+	} else {
+		text = packedText[index];
+	}
+
+	if (numOfBit < BITS_IN_WORD) {
+		// Fill unused bit with zero
+		text &= mask[numOfBit];
+	}
+
+	return text;
+}
+
+int BWTForwardSearch(const unsigned int *packedKey, const unsigned int keyLength, const BWT *bwt, const unsigned int *packedText) {
+
+	unsigned int startSaIndex, endSaIndex, saIndexMiddle;
+	unsigned int saExplicitIndexLeft, saExplicitIndexRight, saExplicitIndexMiddle;
+	unsigned int saValue;
+
+	unsigned int firstChar;
+	unsigned int index, shift;
+	unsigned int packedKeyLength, keyLengthInBit;
+	unsigned int llcp, rlcp, mlcp, maxlcp;
+	unsigned int p = 0;	// to avoid compiler warning only
+
+	if (keyLength % CHAR_PER_WORD == 0) {
+		packedKeyLength = keyLength / CHAR_PER_WORD;
+		keyLengthInBit = packedKeyLength * BITS_IN_WORD;
+	} else {
+		packedKeyLength = keyLength / CHAR_PER_WORD + 1;
+		keyLengthInBit = (keyLength / CHAR_PER_WORD) * BITS_IN_WORD + 
+						 (keyLength % CHAR_PER_WORD) * BIT_PER_CHAR;
+	}
+
+	// Get the SA index initial range by retrieving cumulative frequency
+	firstChar = packedKey[0] >> (BITS_IN_WORD - BIT_PER_CHAR);
+
+	startSaIndex = bwt->cumulativeFreq[firstChar] + 1;
+	endSaIndex = bwt->cumulativeFreq[firstChar + 1];
+
+	if (startSaIndex > endSaIndex) {
+		// The first character of search pattern does not exists in text
+		return 0;
+	}
+
+	// Find lcp for left boundary
+	saValue = bwt->saValueOnBoundary[firstChar * 2];		// Pre-calculated
+
+	// restriction for positions near the end of text
+	maxlcp = min(packedKeyLength, (bwt->textLength - saValue + CHAR_PER_WORD - 1) / CHAR_PER_WORD);
+
+	shift = BIT_PER_CHAR * (saValue % CHAR_PER_WORD);
+	index = saValue / CHAR_PER_WORD;
+
+	llcp = 0;
+	while (llcp < maxlcp && packedKey[llcp] == 
+					BWTGetWordPackedText(packedText, index + llcp, shift, keyLengthInBit - llcp * BITS_IN_WORD)) {
+		llcp++;
+	}
+	if ((saValue + keyLength > bwt->textLength) && llcp == maxlcp) {
+		llcp--;
+	}
+	if (llcp == packedKeyLength) {
+		return 1;
+	}
+
+	// Find lcp for right boundary
+	saValue = bwt->saValueOnBoundary[firstChar * 2 + 1];	// Pre-calculated
+
+	// restriction for positions near the end of text
+	maxlcp = min(packedKeyLength, (bwt->textLength - saValue + CHAR_PER_WORD - 1) / CHAR_PER_WORD);
+
+	shift = BIT_PER_CHAR * (saValue % CHAR_PER_WORD);
+	index = saValue / CHAR_PER_WORD;
+
+	rlcp = 0;
+	while (rlcp < maxlcp && packedKey[rlcp] == 
+					BWTGetWordPackedText(packedText, index + rlcp, shift, keyLengthInBit - rlcp * BITS_IN_WORD)) {
+		rlcp++;
+	}
+	if ((saValue + keyLength > bwt->textLength) && rlcp == maxlcp) {
+		rlcp--;
+	}
+	if (rlcp == packedKeyLength) {
+		return 1;
+	}
+
+	// Locate in SA index explicitly stored
+	saExplicitIndexLeft = startSaIndex / bwt->saInterval;
+	saExplicitIndexRight = (endSaIndex - 1) / bwt->saInterval + 1;
+
+	// loop until two adjacent SA explicit index is found
+	while (saExplicitIndexLeft + 1 < saExplicitIndexRight) {
+
+		saExplicitIndexMiddle = average(saExplicitIndexLeft, saExplicitIndexRight);
+
+		saValue = bwt->saValue[saExplicitIndexMiddle];
+		shift = BIT_PER_CHAR * (saValue % CHAR_PER_WORD);
+		index = saValue / CHAR_PER_WORD;
+
+		// Try to increase mlcp
+		mlcp = min(llcp, rlcp);		// mlcp = the characters (in unit of 16 for DNA) matched so far
+		// restriction for positions near the end of text
+		maxlcp = min(packedKeyLength, (bwt->textLength - saValue + CHAR_PER_WORD - 1) / CHAR_PER_WORD);
+
+		while (mlcp < maxlcp) {
+			p = BWTGetWordPackedText(packedText, index + mlcp, shift, keyLengthInBit - mlcp * BITS_IN_WORD);
+			if (packedKey[mlcp] != p) {
+				break;
+			}
+			mlcp++;
+		}
+		if ((saValue + keyLength <= bwt->textLength) || mlcp != maxlcp) {
+			if (mlcp == packedKeyLength) {
+				return 1;
+			}
+			if (packedKey[mlcp] > p) {
+				llcp = mlcp;
+				saExplicitIndexLeft = saExplicitIndexMiddle;
+			} else {
+				rlcp = mlcp;
+				saExplicitIndexRight = saExplicitIndexMiddle;
+			}
+		} else {
+			if (packedKey[mlcp-1] >= p) {
+				llcp = mlcp - 1;
+				saExplicitIndexLeft = saExplicitIndexMiddle;
+			} else {
+				rlcp = mlcp - 1;
+				saExplicitIndexRight = saExplicitIndexMiddle;
+			}
+			
+		}
+
+	}
+
+	// Two adjacent SA explicit index is found, convert back to SA index
+	if (saExplicitIndexLeft == startSaIndex / bwt->saInterval) {
+		startSaIndex = bwt->cumulativeFreq[firstChar] + 1;
+	} else {
+		startSaIndex = saExplicitIndexLeft * bwt->saInterval;
+	}
+	if (saExplicitIndexRight == (endSaIndex - 1) / bwt->saInterval + 1) {
+		endSaIndex = bwt->cumulativeFreq[firstChar + 1];
+	} else {
+		endSaIndex = saExplicitIndexRight * bwt->saInterval;
+	}
+
+	// binary search by decoding bwt
+
+	while (startSaIndex < endSaIndex) {
+
+		saIndexMiddle = average(startSaIndex, endSaIndex);
+
+		saValue = BWTSaValue(bwt, saIndexMiddle);
+		shift = BIT_PER_CHAR * (saValue % CHAR_PER_WORD);
+		index = saValue / CHAR_PER_WORD;
+
+		// Try to increase mlcp
+		mlcp = min(llcp, rlcp);		// mlcp = the characters (in unit of 16 for DNA) matched so far
+		// restriction for positions near the end of text
+		maxlcp = min(packedKeyLength, (bwt->textLength - saValue + CHAR_PER_WORD - 1) / CHAR_PER_WORD);
+
+		while (mlcp < maxlcp) {
+			p = BWTGetWordPackedText(packedText, index + mlcp, shift, keyLengthInBit - mlcp * BITS_IN_WORD);
+			if (packedKey[mlcp] != p) {
+				break;
+			}
+			mlcp++;
+		}
+		if ((saValue + keyLength <= bwt->textLength) || mlcp != maxlcp) {
+			if (mlcp == packedKeyLength) {
+				return 1;
+			}
+			if (packedKey[mlcp] > p) {
+				llcp = mlcp;
+				startSaIndex = saIndexMiddle + 1;
+			} else {
+				rlcp = mlcp;
+				endSaIndex = saIndexMiddle;
+			}
+		} else {
+			if (packedKey[mlcp-1] >= p) {
+				llcp = mlcp - 1;
+				startSaIndex = saIndexMiddle + 1;
+			} else {
+				rlcp = mlcp - 1;
+				endSaIndex = saIndexMiddle;
+			}
+			
+		}
+
+	}
+
+	// no match found
+	return 0;
+
+}
diff --git a/BWT.h b/BWT.h
new file mode 100644
index 0000000..12b5f07
--- /dev/null
+++ b/BWT.h
@@ -0,0 +1,268 @@
+/*
+
+   BWT.h	BWT-Index
+
+   This module contains an implementation of BWT-index for alphabet size = 4.
+   The functions provided include:
+    Load functions for loading BWT to memory;
+    Core functions for accessing core Inverse Psi values;
+	Search functions for searching patterns from text;
+	Text retrieval functions for retrieving text from BWT.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#ifndef __BWT_H__
+#define __BWT_H__
+#include "HSP.h"
+#include "TypeNLimit.h"
+#include "MemManager.h"
+#include "TextConverter.h"
+
+
+#define BITS_PER_OCC_VALUE			16
+#define OCC_VALUE_PER_WORD			2
+#define OCC_INTERVAL				256
+#define WORD_BETWEEN_OCC			16
+#define OCC_INTERVAL_MAJOR			65536
+
+#define SORT_ALL					0
+#define SORT_16_BIT					1
+#define SORT_NONE					2
+
+#define BUCKET_BIT					16
+#define NUM_BUCKET					65536
+
+#define MAX_APPROX_MATCH_ERROR	7
+#define MAX_ARPROX_MATCH_LENGTH	32
+
+#define BWTDP_MAX_SUBSTRING_LENGTH	512
+
+typedef struct _BWTOPT_TYPE_{
+	int cutoff;
+	int alnLen, seqLen;
+	int min_len;
+	int h, x, y;
+	int max_mm, gap_len, gap_fb;
+	int nblock;
+	ChrBlock *blockList;
+	unsigned int *pacRef;
+	unsigned int dnaLen;
+	unsigned int extLen;
+	char *fw, *rc;
+}BWTOPT;
+
+typedef struct SaIndexRange {
+	unsigned int startSaIndex;
+	unsigned int endSaIndex;
+} SaIndexRange;
+
+
+typedef struct BWT {
+	unsigned int textLength;			// length of the text
+	unsigned int saInterval;			// interval between two SA values stored explicitly
+	unsigned int inverseSaInterval;		// interval between two inverse SA stored explicitly
+	unsigned int inverseSa0;			// SA-1[0]
+	unsigned int *cumulativeFreq;		// cumulative frequency
+	unsigned int *bwtCode;				// BWT code
+	unsigned int *occValue;				// Occurrence values stored explicitly
+	unsigned int *occValueMajor;		// Occurrence values stored explicitly
+	unsigned int *saValue;				// SA values stored explicitly
+	unsigned int *inverseSa;			// Inverse SA stored explicitly
+	SaIndexRange *saIndexRange;			// SA index range
+	int saIndexRangeNumOfChar;			// Number of characters indexed in SA index range
+	unsigned int *saValueOnBoundary;	// Pre-calculated frequently referred data
+	unsigned int *decodeTable;			// For decoding BWT by table lookup
+	unsigned int decodeTableGenerated;	// == TRUE if decode table is generated on load and will be freed
+	unsigned int bwtSizeInWord;			// Temporary variable to hold the memory allocated
+	unsigned int occSizeInWord;			// Temporary variable to hold the memory allocated
+	unsigned int occMajorSizeInWord;	// Temporary variable to hold the memory allocated
+	unsigned int saValueSize;			// Temporary variable to hold the memory allocated
+	unsigned int inverseSaSize;			// Temporary variable to hold the memory allocated
+	unsigned int saIndexRangeSize;		// Temporary variable to hold the memory allocated
+} BWT;
+
+#define MAX_DIAGONAL_LEVEL 4				// Number of sub-pattern to keep for detecting diagonal hit
+
+// Error information is stored as:
+// 1. bitVector
+//	  After hamming distance match
+// 2. count
+//    After edit distance match
+// 3. score
+//    After the hits are processed with scoring functions
+
+typedef struct SaIndexGroupNew {	// SA index range and information of a particular error arrangement of a matched sub-pattern
+	unsigned int startSaIndex;			// starting SA index
+	unsigned int numOfMatch;				// number of match
+	unsigned int posQuery;				// position in query; used for detecting diagonal hits
+	unsigned int info;					// extra hit information; to be copied to hitList.info
+} SaIndexGroupNew;
+
+typedef struct SaIndexGroupOld {	// SA index range and information of a particular error arrangement of a matched sub-pattern
+	unsigned int startSaIndex;			// starting SA index
+	unsigned int numOfMatch;				// number of match
+	unsigned int info;					// extra hit information; to be copied to hitList.info
+} SaIndexGroupOld;
+
+typedef struct SaIndexGroup {	// SA index range and information of a particular error arrangement of a matched sub-pattern
+	unsigned int startSaIndex;			// starting SA index
+	unsigned int numOfMatch;			// number of match
+	unsigned int info;					// extra hit information
+} SaIndexGroup;
+
+typedef struct SaIndexGroupWithErrorBitVector {	// SA index range and information of a particular error arrangement of a matched sub-pattern
+	unsigned int startSaIndex;			// starting SA index
+	unsigned int numOfMatch;			// number of match
+	unsigned int errorBitVector;			// error bit vector
+} SaIndexGroupWithErrorBitVector;
+
+typedef struct SaIndexGroupWithLengthError {	// SA index range and information of a particular error arrangement of a matched sub-pattern
+	unsigned int startSaIndex;			// starting SA index
+	unsigned int numOfMatch;			// number of match
+	unsigned posQuery : 16;		// position in query
+	unsigned length   : 8;		// length of hit
+	unsigned error    : 8;		// error in hit
+} SaIndexGroupWithLengthError;
+
+typedef struct SaIndexGroupProcessed {	// Alternative usage of SaIndexGroup - once processed, error bit vector is replaced by index to text position
+	unsigned int startSaIndex;			// starting SA index
+	unsigned int numOfMatch;			// number of match
+	unsigned int textPositionIndex;		// storing the pointer to text position
+} SaIndexGroupProcessed;
+
+typedef struct DupSaIndexGroup {	// Alternative usage of SaIndexGroup - the group duplicates another group
+	unsigned int lastDupSaIndexGroupIndex;	// index to last duplicated group
+	unsigned int saIndexGroupIndex;			// index to the first SA into group among the duplicates
+	unsigned int textPositionIndex;			// storing the pointer to text position
+} DupSaIndexGroup;
+
+typedef struct SaIndexGroupHash {	// Hash table for checking duplicate SA index group
+	unsigned int startSaIndex;
+	unsigned int saIndexGroupIndex;
+} SaIndexGroupHash;
+
+typedef struct BWTSaRetrievalStatistics {
+	unsigned int bwtSaRetrieved;
+	unsigned int saDiagonalLinked;
+	unsigned int saDiagonalFiltered;
+	unsigned int saDuplicated;
+} BWTSaRetrievalStatistics;
+
+typedef struct BWTDPStatistics {
+	int maxDepth;
+	int maxDPCell;
+	int maxDPMemoryInWord;
+	int totalMaxDepth;
+	int totalMaxDPCell;
+	int totalMaxDPMemoryInWord;
+	LONG acceptedPathDepth;
+	LONG acceptedPath;
+	LONG rejectedPathDepth;
+	LONG rejectedPath;
+	LONG* __restrict totalNode;
+	LONG* __restrict rejectedNode;
+	LONG* __restrict totalDPCell;
+} BWTDPStatistics;
+
+typedef struct SaIndexList {
+	unsigned int saIndex;
+	unsigned int textPositionIndex;
+} SaIndexList;
+
+typedef struct HitCombination {
+	int numOfCombination;
+	int maxError;
+	int keyLength;
+	int skipTableWidth;
+	int *errorPos;
+	int *skip;
+	int *skipErrorIndex;
+} HitCombination;
+
+typedef struct DPText {
+	int charBeingProcessed;
+	int dpCellIndex;
+	int numOfDpCellSegment;
+	unsigned int dummy1;	// Must not be removed; so that saIndexLeft and saIndexRight are aligned to 16 byte boundary
+	unsigned int saIndexLeft[ALPHABET_SIZE];
+	unsigned int saIndexRight[ALPHABET_SIZE];
+} DPText;
+
+typedef struct DPScanDepth {
+	unsigned P				:	31;
+	unsigned withAmbiguity	:	1;
+} DPScanDepth;
+
+
+// Load / unload functions
+BWT *BWTCreate(MMPool *mmPool, const unsigned int textLength, unsigned int *decodeTable);
+BWT *BWTLoad(MMPool *mmPool, const char *bwtCodeFileName, const char *occValueFileName, 
+			 const char *saValueFileName, const char *inverseSaFileName, const char *saIndexRangeFileName,
+			 unsigned int *decodeTable);
+void BWTFree(MMPool *mmPool, BWT *bwt);
+//void BWTPrintMemoryUsage(const BWT *bwt, FILE *output, const unsigned int packedDNASize);
+
+// Precalculate frequenctly accessed data
+void BWTGenerateSaValueOnBoundary(MMPool *mmPool, BWT *bwt);
+
+// Core functions
+// The following must be customized for differenet compression schemes ***
+unsigned int BWTDecode(const BWT *bwt, const unsigned int index1, const unsigned int index2, const unsigned int character);
+void BWTDecodeAll(const BWT *bwt, const unsigned int index1, const unsigned int index2, unsigned int* __restrict occValue);
+unsigned int BWTOccValue(const BWT *bwt, unsigned int index, const unsigned int character);
+void BWTOccValueTwoIndex(const BWT *bwt, unsigned int index1, unsigned int index2, const unsigned int character, unsigned int* __restrict occValue);
+void BWTAllOccValue(const BWT *bwt, unsigned int index, unsigned int* __restrict occValue);
+void BWTAllOccValueTwoIndex(const BWT *bwt, unsigned int index1, unsigned int index2, unsigned int* __restrict occValue1, unsigned int* __restrict occValue2);
+unsigned int BWTOccValueOnSpot(const BWT *bwt, unsigned int index, unsigned int* __restrict character);
+unsigned int BWTSearchOccValue(const BWT *bwt, const unsigned int character, const unsigned int searchOccValue);
+
+
+// Utility functions for no compression only
+unsigned int BWTResidentSizeInWord(const unsigned int numChar);
+unsigned int BWTFileSizeInWord(const unsigned int numChar);
+void BWTClearTrailingBwtCode(BWT *bwt);
+
+// These are generic to different compression schemes (and generic to no compression as well)
+unsigned int BWTPsiMinusValue(const BWT *bwt, const unsigned int index);
+unsigned int BWTPsiPlusValue(const BWT *bwt, const unsigned int index);
+unsigned int BWTSaValue(const BWT *bwt, unsigned int index);
+unsigned int BWTInverseSa(const BWT *bwt, unsigned int saValue);
+unsigned int BWTOccIntervalMajor(const unsigned int occInterval);
+unsigned int BWTOccValueMinorSizeInWord(const unsigned int numChar);
+unsigned int BWTOccValueMajorSizeInWord(const unsigned int numChar);
+
+// Search functions
+// packedText should be allocated with at least 1 Word buffer initialized to zero
+
+// Text retrieval functions
+// Position in text will be placed at the first word of hitListSizeInWord
+
+// startSaIndex + resultInfo must be sorted in increasing order; there must be no overlapping groups except that one group can completely enclose another
+
+// QSort comparison functions
+int SaIndexGroupStartSaIndexOrder(const void *saIndexGroup, const int index1, const int index2);
+int SaIndexGroupStartSaIndexLengthErrorOrder(const void *saIndexGroup, const int index1, const int index2);
+int HitListPosTextErrorLengthOrder(const void *hitList, const int index1, const int index2);
+int HitListPosText16BitOrder(const void *hitList, const int index1, const int index2);
+int HitListPosTextOrder(const void *hitList, const int index1, const int index2);
+int GappedHitListScorePosTextOrder(const void *gappedHitList, const int index1, const int index2);
+int GappedHitListDbSeqIndexScorePosTextOrder(const void *gappedHitList, const int index1, const int index2);
+
+
+#endif
diff --git a/BWTAln.c b/BWTAln.c
new file mode 100644
index 0000000..7da7091
--- /dev/null
+++ b/BWTAln.c
@@ -0,0 +1,919 @@
+#include "BWTAln.h"
+
+unsigned int REVBWTForwardSearch(const unsigned char *convertedkey, const unsigned int keylength, const BWT *rev_bwt, unsigned int *resultsaindexleft, unsigned int *resultsaindexright, unsigned int *rev_resultsaindexleft, unsigned int *rev_resultsaindexright) {
+
+	unsigned int sacount=0;
+	unsigned int rev_startsaindex, rev_endsaindex;
+	unsigned int startsaindex, endsaindex;
+	unsigned int pos = 1;
+	int i;
+	unsigned int c = convertedkey[0];
+	unsigned int occcount_start[4];
+	unsigned int occcount_end[4];
+	unsigned int occcount[4];
+
+	rev_startsaindex = rev_bwt->cumulativeFreq[convertedkey[0]]+1;
+	rev_endsaindex = rev_bwt->cumulativeFreq[convertedkey[0]+1];
+	startsaindex = rev_bwt->cumulativeFreq[convertedkey[0]]+1;
+	endsaindex = rev_bwt->cumulativeFreq[convertedkey[0]+1];
+
+	while (pos < keylength && startsaindex <= endsaindex) {
+		c = convertedkey[pos];
+
+		BWTAllOccValue(rev_bwt,rev_startsaindex,occcount_start);
+		BWTAllOccValue(rev_bwt,rev_endsaindex + 1,occcount_end);
+
+		rev_startsaindex = rev_bwt->cumulativeFreq[c] + occcount_start[c] + 1;
+		rev_endsaindex = rev_bwt->cumulativeFreq[c] + occcount_end[c];
+
+		occcount[3]=0;
+		for (i=2;i>=0;i--) {
+			occcount[i]=occcount[i+1]+occcount_end[i+1]-occcount_start[i+1];
+		}
+
+		endsaindex = endsaindex - occcount[c];
+		startsaindex = endsaindex - (rev_endsaindex-rev_startsaindex);
+		pos++;
+	}
+
+	*resultsaindexleft = startsaindex;
+	*resultsaindexright = endsaindex;
+	*rev_resultsaindexleft = rev_startsaindex;
+	*rev_resultsaindexright = rev_endsaindex;
+
+	sacount+=endsaindex-startsaindex+1;
+	// number of occurrence = endsaindex - startsaindex + 1
+	return sacount;
+
+}
+
+
+unsigned int REVBWTContForwardSearch(const unsigned char *convertedkey, const unsigned int start, const unsigned int len, const BWT *rev_bwt, unsigned int *sal, unsigned int *sar, unsigned int *rev_sal, unsigned int *rev_sar) {
+
+	unsigned int sacount=0;
+	unsigned int pos = start;
+	unsigned char c;
+	unsigned int occcount_start[4];
+	unsigned int occcount_end[4];
+	unsigned int occcount[4];
+	int k;
+	while (pos < start+len  && *sal <= *sar) {
+		c = convertedkey[pos];
+
+		BWTAllOccValue(rev_bwt,*rev_sal,occcount_start);
+		BWTAllOccValue(rev_bwt,*rev_sar + 1,occcount_end);
+
+		*rev_sal = rev_bwt->cumulativeFreq[c] + occcount_start[c] + 1;
+		*rev_sar = rev_bwt->cumulativeFreq[c] + occcount_end[c];
+
+		occcount[3]=0;
+		for (k=2;k>=0;k--) {
+			occcount[k]=occcount[k+1]+occcount_end[k+1]-occcount_start[k+1];
+		}
+
+		*sar = *sar - occcount[c];
+		*sal = *sar - (*rev_sar-*rev_sal);
+
+		pos++;
+	}
+	sacount+=*sar-*sal+1;
+	return sacount;
+
+}
+unsigned int BWTContBackwardSearch(const unsigned char *convertedkey, const unsigned int start, const unsigned int len, const BWT *bwt, unsigned int *sal, unsigned int *sar) {
+
+	unsigned int sacount=0;
+	unsigned int pos = len;
+	unsigned char c;
+
+	if (*sal > *sar) {
+		return 0;
+	}
+
+	while (pos > 0 && *sal <= *sar) {
+		c = convertedkey[pos-1];
+		*sal = bwt->cumulativeFreq[c] + BWTOccValue(bwt, *sal, c) + 1;
+		*sar = bwt->cumulativeFreq[c] + BWTOccValue(bwt, *sar + 1, c);
+		pos--;
+	}
+	sacount+=*sar-*sal+1;
+	return sacount;
+}
+
+unsigned int BWTBackward1Error(const unsigned char *querypattern, const BWTOPT *bo, BWT *bwt, unsigned int start, unsigned int len, unsigned int pl, unsigned int pr, unsigned int info, HITTABLE *hits) {
+		unsigned int mk_l=1,mk_r=0;
+		unsigned int occcount_pstart[4];
+		unsigned int occcount_pend[4];
+		unsigned int sacount=0;
+		int hitcount = 0;
+
+		unsigned char c;
+		unsigned char ec;
+		int i;
+		//printf("bwtbackward1error %u %u\n",pl,pr);
+		//                                  v--start       v----start+len
+		// querypattern = xxxxxxxxxxxxxxxxx[xxxxxxxxxxxxxx]xxxxxx
+		//                                      <------- search direction
+		//                               querypattern[start+len-1], querypattern[start+len-2]...querypattern[start]
+		//                           for i=0 to len-1,
+		//                                append querypatter[start+len-1-i]!
+
+		for (i=0;(i<len && pl<=pr);i++) {
+			//call once only proceduressssss - great
+			BWTAllOccValue(bwt,pl,occcount_pstart);
+			BWTAllOccValue(bwt,pr + 1,occcount_pend);
+
+			//backward manner
+			for (ec=0;ec<4;ec++) {
+				if (querypattern[start+len-1-i]==ec)
+					continue;
+
+				info &= 0xffff000;
+				info |= ((ec&3)<<8|((start+len-1-i)&0xff))&0xfff;
+				mk_l=pl;
+				mk_r=pr;
+
+				mk_l = bwt->cumulativeFreq[ec] + occcount_pstart[ec] + 1;
+				mk_r = bwt->cumulativeFreq[ec] + occcount_pend[ec];
+
+				if (BWTContBackwardSearch(querypattern,start,len-i-1,bwt,&mk_l,&mk_r)) {
+					hitcount += OCCProcess(mk_l,mk_r, bo, info, hits);
+					sacount+=mk_r-mk_l+1;
+				}
+			}
+			c = querypattern[start+len-1-i];
+			pl = bwt->cumulativeFreq[c] + occcount_pstart[c] + 1;
+			pr = bwt->cumulativeFreq[c] + occcount_pend[c];
+
+		}
+		return hitcount;
+}
+
+
+unsigned int REVBWTForward1Error(const unsigned char *querypattern, const BWTOPT *bo, BWT * bwt,BWT * rev_bwt, unsigned int start,unsigned int len,unsigned int pl,unsigned int pr,unsigned int rev_pl,unsigned int rev_pr, unsigned int info, HITTABLE *hits) {
+
+		unsigned int mk_l=1,mk_r=0,rev_mk_l,rev_mk_r;
+		unsigned int occcount_pstart[4];
+		unsigned int occcount_pend[4];
+		unsigned int occcountp[4];
+		unsigned int sacount=0;
+		int hitcount = 0;
+
+		unsigned char c;
+		unsigned char ec;
+		unsigned int i;
+		int k;
+		const int coord = (info>>24&1)?(bo->seqLen-bo->alnLen):0;
+
+		for (i=0;(i<len && pl<=pr);i++) {
+			//call once only proceduressssss - great
+			BWTAllOccValue(rev_bwt,rev_pl,occcount_pstart);
+			BWTAllOccValue(rev_bwt,rev_pr + 1,occcount_pend);
+
+			occcountp[3]=0;
+			for (k=2;k>=0;k--) {
+				occcountp[k]=occcountp[k+1]+occcount_pend[k+1]-occcount_pstart[k+1];
+			}
+
+
+			//forward manner
+			for (ec=0;ec<4;ec++) {
+				if (querypattern[start+i]==ec)
+					continue;
+				info &= 0xffff000;
+				info |= ((ec&3)<<8|((start+i+coord)&0xff))&0xfff;
+				mk_l=pl;
+				mk_r=pr;
+				rev_mk_l=rev_pl;
+				rev_mk_r=rev_pr;
+
+				unsigned int pos = i+1;
+
+				rev_mk_l = rev_bwt->cumulativeFreq[ec] + occcount_pstart[ec] + 1;
+				rev_mk_r = rev_bwt->cumulativeFreq[ec] + occcount_pend[ec];
+
+				mk_r = mk_r - occcountp[ec];
+				mk_l = mk_r - (rev_mk_r-rev_mk_l);
+
+				if (REVBWTContForwardSearch(querypattern,start+pos,len-i-1,rev_bwt,&mk_l,&mk_r,&rev_mk_l,&rev_mk_r)) {
+//					printf("%d\t", start+i);
+//					printf("%d\n", ec);
+					hitcount+= OCCProcess(mk_l,mk_r, bo, info, hits);
+					//return mk_l, mk_r
+					sacount+=mk_r-mk_l+1;
+				}
+			}
+			c = querypattern[start+i];
+
+			rev_pl = rev_bwt->cumulativeFreq[c] + occcount_pstart[c] + 1;
+			rev_pr = rev_bwt->cumulativeFreq[c] + occcount_pend[c];
+
+			pr = pr - occcountp[c];
+			pl = pr - (rev_pr-rev_pl);
+		}
+		return hitcount;
+}
+
+int BWTExactMatching(const unsigned char *convertedKey, const BWTOPT *bo, int chain, BWT *bwt, LOOKUPTABLE *lookup, HITTABLE *hits){
+	if(convertedKey == NULL) return 0;
+	const unsigned int keyLength = bo->alnLen;
+	LOOKUPTABLE lookupTable;
+	lookupTable.tableSize = lookup->tableSize;
+	lookupTable.table = lookup->table;
+	unsigned int l, r;
+	unsigned int i;
+	int hitcount = 0;
+//	fprintf(stdout, "BWTExactMatching\n");
+	unsigned int info = (chain&1) << 24 ;
+	unsigned long long packedPattern = 0;
+//	printf("tablesize: %d, keyLength: %d\n", lookupTable.tableSize, keyLength);
+	for (i = 0; i <lookupTable.tableSize ; i++) {
+		packedPattern<<=2;
+		packedPattern |= (convertedKey[keyLength-lookupTable.tableSize+i] & 3);
+	}
+	l = packedPattern ? lookupTable.table[packedPattern-1]+1 : 1;
+	r = lookupTable.table[packedPattern];
+
+	for (i = keyLength-lookupTable.tableSize; i > 0 && l <= r; --i) {
+		unsigned char c = convertedKey[i-1];
+		l = bwt->cumulativeFreq[c] + BWTOccValue(bwt, l, c) + 1;
+		r = bwt->cumulativeFreq[c] + BWTOccValue(bwt, r + 1, c);
+	}
+	
+	if (l<=r && hits->n < bo->cutoff) {
+//		fprintf(stderr, "occ find\n");
+		hitcount += OCCProcess(l, r, bo, info, hits);
+		return hitcount;
+	}
+	return 0;
+}
+
+int BWT1ErrorMatching(const unsigned char * convertedKey, const BWTOPT *bo, const int chain, BWT *bwt, BWT *rev_bwt, LOOKUPTABLE *lookup, LOOKUPTABLE *rev_lookup, HITTABLE *hits) {
+	if(convertedKey == NULL) return 0;
+	LOOKUPTABLE lookupTable, rev_lookupTable;
+	lookupTable.tableSize     = lookup->tableSize;
+	rev_lookupTable.tableSize = rev_lookup->tableSize;
+	lookupTable.table         = lookup->table;
+	rev_lookupTable.table     = rev_lookup->table;
+//	unsigned int cutoff       = bo->cutoff;
+	unsigned int keyLength    = bo->alnLen;
+	unsigned int forwardDepth = bo->h;
+	unsigned int info         = (1<<25)|((chain&1)<<24);
+	unsigned int l, r;
+	unsigned int rev_l, rev_r;
+	unsigned int i;
+	int hitcount = 0;
+	unsigned backwardDepth = keyLength - forwardDepth;
+//	fprintf(stdout, "BWT1misMatching\n");
+	//1. Backward Case
+	//==============================================
+	// look-up the last characters (backward)
+	unsigned long long packedPattern = 0;
+	for (i = 0; i <lookupTable.tableSize ; i++) {
+		packedPattern<<=2;
+		packedPattern |= (convertedKey[keyLength-lookupTable.tableSize+i] & 3);
+	}
+	l = packedPattern ? lookupTable.table[packedPattern-1]+1 : 1;
+	r = lookupTable.table[packedPattern];
+
+	// backward search with BWT until the forward depth section
+	for (i = lookupTable.tableSize; i < backwardDepth && l <= r; ++i) {
+		unsigned char c = convertedKey[keyLength-i-1];
+		l = bwt->cumulativeFreq[c] + BWTOccValue(bwt, l, c) + 1;
+		r = bwt->cumulativeFreq[c] + BWTOccValue(bwt, r + 1, c);
+	}
+	// error in the forward depth section of the query pattern
+	hitcount += BWTBackward1Error(convertedKey,
+			bo,
+			bwt,
+			0, forwardDepth,
+			l,r,
+			info,
+			hits);
+	//    fprintf(stdout, "saCount1: %u\n", saCount);
+//	if(hits->n >= bo->cutoff) return hitcount;
+
+
+	//2.Forward Case
+	//==============================================
+	unsigned int occCount_start[4];
+	unsigned int occCount_end[4];
+	unsigned int occCount[4];
+	unsigned long long l_packedPattern = 0;
+	unsigned long long r_packedPattern = 0;
+	unsigned long long rev_packedPattern = 0;
+	// look-up the first characters
+	for (i = 0; i <rev_lookupTable.tableSize ; i++) {
+		l_packedPattern<<=2;
+		l_packedPattern |= (convertedKey[i]  & 3 );
+	}
+	r_packedPattern = l_packedPattern;
+	//If the look-up tables are of different size
+	l_packedPattern <<= (lookupTable.tableSize-rev_lookupTable.tableSize)*2;
+	r_packedPattern <<= (lookupTable.tableSize-rev_lookupTable.tableSize)*2;
+	r_packedPattern |= (1<<(lookupTable.tableSize-rev_lookupTable.tableSize)*2) - 1;
+
+	for (i = 0; i <rev_lookupTable.tableSize ; i++) {
+		rev_packedPattern<<=2;
+		rev_packedPattern |= (convertedKey[rev_lookupTable.tableSize-i-1] & 3);
+	}
+
+	l = l_packedPattern ? lookupTable.table[l_packedPattern-1]+1 : 1;
+	r = lookupTable.table[r_packedPattern];
+	rev_l = rev_packedPattern ? rev_lookupTable.table[rev_packedPattern-1]+1 : 1;
+	rev_r = rev_lookupTable.table[rev_packedPattern];
+
+	// forward search with BWT until the end of forward depth section
+	for (i = rev_lookupTable.tableSize; i < forwardDepth && l <= r; ++i) {
+		unsigned char c = convertedKey[i];
+		BWTAllOccValue(rev_bwt,rev_l,occCount_start);
+		BWTAllOccValue(rev_bwt,rev_r + 1,occCount_end);
+
+		rev_l = rev_bwt->cumulativeFreq[c] + occCount_start[c] + 1;
+		rev_r = rev_bwt->cumulativeFreq[c] + occCount_end[c];
+
+		occCount[3]=0;
+		int k;
+		for (k=2;k>=0;k--) {
+			occCount[k]=occCount[k+1]+occCount_end[k+1]-occCount_start[k+1];
+		}
+
+		r = r - occCount[c];
+		l = r - (rev_r-rev_l);
+	}
+
+	// error in the forward depth section of the query pattern
+	hitcount+=REVBWTForward1Error(convertedKey,
+			bo,
+			bwt, rev_bwt,
+			forwardDepth, backwardDepth,
+			l, r,
+			rev_l, rev_r,
+			info,
+			hits);
+	//    fprintf(stdout, "saCount: %u\n", saCount);
+	return hitcount;
+}
+
+int BWT2ErrorMatching(const unsigned char *convertedKey, const BWTOPT *bo, const int chain, BWT * bwt, BWT * rev_bwt, LOOKUPTABLE *lookup, LOOKUPTABLE *rev_lookup, HITTABLE *hits) {
+	if(convertedKey == NULL) return 0;
+	LOOKUPTABLE lookupTable, rev_lookupTable;
+	lookupTable.tableSize     = lookup->tableSize;
+	rev_lookupTable.tableSize = rev_lookup->tableSize;
+	lookupTable.table         = lookup->table;
+	rev_lookupTable.table     = rev_lookup->table;
+	unsigned int keyLength    = bo->alnLen;
+//	fprintf(stderr, "keyLength %u\n", keyLength);
+	unsigned int sizeX        = bo->x;
+	unsigned int sizeY        = bo->y;
+	unsigned int cutoff       = bo->cutoff;
+	unsigned int info         = (2<<25)|(chain<<24);
+	unsigned int l, r;
+	unsigned int rev_l, rev_r;
+	unsigned int i;
+
+	unsigned char ec;
+	unsigned int sizeZ = keyLength - sizeX - sizeY;
+	unsigned int occCount_pstart[4];
+	unsigned int occCount_pend[4];
+	unsigned int occCountp[4];
+	unsigned int occCount_start[4];
+	unsigned int occCount_end[4];
+	unsigned int occCount[4];
+
+	unsigned long long packedPattern = 0;
+	unsigned long long l_packedPattern = 0;
+	unsigned long long r_packedPattern = 0;
+	unsigned long long rev_packedPattern = 0;
+	unsigned long long rev_l_packedPattern = 0;
+	unsigned long long rev_r_packedPattern = 0;
+
+	unsigned long long mask;
+	unsigned long long ALLONE = (1<<(lookupTable.tableSize*2))-1;
+	unsigned char c;
+	int hitcount = 0;
+	const int coord = (info>>24&1)?(bo->seqLen-bo->alnLen):0;
+//	fprintf(stdout, "BWT2misMatching\n");
+	//Separate into 4 cases according to the documentation.
+	//==============================================
+
+
+	//Case A    Backward Search
+	//1.    cellZ
+	//2.    2-mismatch cellX+Y
+	//==============================================
+	// look-up the last characters (backward)
+
+	for (i = 0; i <lookupTable.tableSize ; i++) {
+		packedPattern<<=2;
+		packedPattern |= (convertedKey[keyLength-lookupTable.tableSize+i] & 3);
+	}
+
+	l = packedPattern ? lookupTable.table[packedPattern-1]+1 : 1;
+	r = lookupTable.table[packedPattern];
+
+	// backward search with BWT in cellZ
+	for (i = lookupTable.tableSize; i < sizeZ && l <= r; ++i) {
+		c = convertedKey[keyLength-i-1];
+		l = bwt->cumulativeFreq[c] + BWTOccValue(bwt, l, c) + 1;
+		r = bwt->cumulativeFreq[c] + BWTOccValue(bwt, r + 1, c);
+	}
+	// 2 errors in cellX+Y
+	for (i=sizeX+sizeY-1;(i>0 && l<=r);i--) {
+		BWTAllOccValue(bwt,l,occCount_pstart);
+		BWTAllOccValue(bwt,r + 1,occCount_pend);
+
+		//Backward Manner
+		for (ec=0;ec<4;ec++) {
+			if (convertedKey[i]==ec) continue;
+			unsigned int mk_l=l;
+			unsigned int mk_r=r;
+			info &= 0x7000000;
+			info |= ((((ec&0x3)<<8)|((i+coord)&0xff))&0x3ff)<<12;
+			mk_l = bwt->cumulativeFreq[ec] + occCount_pstart[ec] + 1;
+			mk_r = bwt->cumulativeFreq[ec] + occCount_pend[ec];
+
+			//return mk_l, mk_r
+			if (mk_l <= mk_r) {
+				//r_count+=mk_r-mk_l+1;
+				hitcount+=BWTBackward1Error(convertedKey,
+						bo, bwt,
+						0, i,
+						mk_l,mk_r,
+						info,
+						hits);
+				if(hits->n >= cutoff) return hitcount;
+			}
+		}
+		c = convertedKey[i];
+		l = bwt->cumulativeFreq[c] + occCount_pstart[c] + 1;
+		r = bwt->cumulativeFreq[c] + occCount_pend[c];
+	}
+//	if(hits->n >= cutoff) return hitcount;
+
+//	printf("case A %d\n", saCount);
+
+	//Case B    Forward Search
+	//1.    cellX+Y
+	//2.    2-mismatch cellZ
+	//==============================================
+
+	packedPattern = 0;
+	l_packedPattern = 0;
+	r_packedPattern = 0;
+	rev_packedPattern = 0;
+	rev_l_packedPattern = 0;
+	rev_r_packedPattern = 0;
+	// look-up the first characters
+	for (i = 0; i <rev_lookupTable.tableSize ; i++) {
+		l_packedPattern<<=2;
+		l_packedPattern |= (convertedKey[i] & 3);
+	}
+//	fprintf(stdout, "l_packedPattern: %u\n", l_packedPattern);
+	r_packedPattern = l_packedPattern;
+	// if the look-up tables are of different size
+	l_packedPattern<<= (lookupTable.tableSize-rev_lookupTable.tableSize)*2;
+	r_packedPattern<<= (lookupTable.tableSize-rev_lookupTable.tableSize)*2;
+	r_packedPattern |= (1<<(lookupTable.tableSize-rev_lookupTable.tableSize)*2) - 1;
+
+	for (i = 0; i <rev_lookupTable.tableSize ; i++) {
+		rev_packedPattern<<= 2;
+		rev_packedPattern |= (convertedKey[rev_lookupTable.tableSize-i-1] & 3);
+	}
+//	fprintf(stdout, "rev_packedPattern: %u\n", rev_packedPattern);
+	l     = l_packedPattern ? lookupTable.table[l_packedPattern-1]+1 : 1;
+	r     = lookupTable.table[r_packedPattern];
+	rev_l = rev_packedPattern ? rev_lookupTable.table[rev_packedPattern-1]+1 : 1;
+	rev_r = rev_lookupTable.table[rev_packedPattern];
+
+	// forward search with BWT until the end of forward depth section
+	for (i = rev_lookupTable.tableSize; i < sizeX+sizeY && l <= r; ++i) {
+		c = convertedKey[i] & 0x3 ;
+		BWTAllOccValue(rev_bwt,rev_l,occCount_start);
+		BWTAllOccValue(rev_bwt,rev_r + 1,occCount_end);
+
+		rev_l = rev_bwt->cumulativeFreq[c] + occCount_start[c] + 1;
+		rev_r = rev_bwt->cumulativeFreq[c] + occCount_end[c];
+
+		occCount[3]=0;
+		int k;
+		for (k=2;k>=0;k--) {
+			occCount[k]=occCount[k+1]+occCount_end[k+1]-occCount_start[k+1];
+		}
+
+		r = r - occCount[c];
+		l = r - (rev_r-rev_l);
+	}
+
+	//2 error in cellZ
+//	fprintf(stdout, "find errr\n");
+//	fprintf(stdout, "SizeX+Y: %d\n"
+//			"keylen: %d\n"
+//			"l: %d, r: %d\n", sizeX+sizeY, keyLength, l, r);
+	for (i=sizeX+sizeY;(i<keyLength && l<=r);i++) {
+		//Call Once Only Proceduressssss - Great
+		BWTAllOccValue(rev_bwt,rev_l,occCount_pstart);
+		BWTAllOccValue(rev_bwt,rev_r + 1,occCount_pend);
+
+		int k;
+		occCountp[3]=0;
+		for (k=2;k>=0;k--) {
+			occCountp[k]=occCountp[k+1]+occCount_pend[k+1]-occCount_pstart[k+1];
+		}
+
+		//Forward Manner
+		for (ec=0;ec<4;ec++) {
+			if (convertedKey[i]==ec) continue;
+//			fprintf(stdout, "%d\n", i);
+			info &= 0x7000000;
+			info |= ((((ec&0x3)<<8)|((i+coord)&0xff))&0x3ff)<<12;
+			unsigned int mk_l=l;
+			unsigned int mk_r=r;
+			unsigned int rev_mk_l=rev_l;
+			unsigned int rev_mk_r=rev_r;
+
+			rev_mk_l = rev_bwt->cumulativeFreq[ec] + occCount_pstart[ec] + 1;
+			rev_mk_r = rev_bwt->cumulativeFreq[ec] + occCount_pend[ec];
+
+			mk_r = mk_r - occCountp[ec];
+			mk_l = mk_r - (rev_mk_r-rev_mk_l);
+
+			//return mk_l, mk_r
+			if (mk_l <= mk_r) {
+				//r_count+=mk_r-mk_l+1;
+				//2-nd Error Matching in sub cellZ Range
+//				fprintf(stdout, "%d--%d--%d\n,",convertedKey[i], i, ec);
+				hitcount+=REVBWTForward1Error(convertedKey,
+						bo, 
+						bwt, rev_bwt,
+						i+1,
+						keyLength-i-1,
+						mk_l,mk_r,rev_mk_l,rev_mk_r,
+						info,
+						hits);
+				if(hits->n >= cutoff) return hitcount;
+				//saCount+=forward1Error(p,bwt,rev_bwt, i+1,cellX+cellY+cellZ-i-1,mk_l,mk_r,rev_mk_l,rev_mk_r);
+//				fprintf(stdout, "\nsaCoutn:%d\n",saCount);
+			}
+		}
+		c = convertedKey[i];
+
+		rev_l = rev_bwt->cumulativeFreq[c] + occCount_pstart[c] + 1;
+		rev_r = rev_bwt->cumulativeFreq[c] + occCount_pend[c];
+
+		r = r - occCountp[c];
+		l = r - (rev_r-rev_l);
+	}
+//	if(hits->n >= cutoff)return hitcount;
+	//*/
+
+//	printf("case B %d\n", saCount);
+	//
+
+	//Case C
+	//1.    cellX (forward)
+	//2.    1-mismatch cellY (forward)
+	//3.    1-mismatch cellZ (forward)
+	//==============================================
+
+	packedPattern = 0;
+	l_packedPattern = 0;
+	r_packedPattern = 0;
+	rev_packedPattern = 0;
+	rev_l_packedPattern = 0;
+	rev_r_packedPattern = 0;
+
+	for (i = 0; i <lookupTable.tableSize ; i++) {
+		packedPattern<<=2;
+		packedPattern |= (convertedKey[i] & 3);
+	}
+	for (i = 0; i <rev_lookupTable.tableSize ; i++) {
+		rev_packedPattern<<=2;
+		rev_packedPattern |= (convertedKey[rev_lookupTable.tableSize-i-1] & 3 );
+	}
+	//*
+	//For error happen in lookup range....
+	//for (i = sizeX; i< lookupTable.tableSize ;i++) {
+	for (i = 0; i< lookupTable.tableSize ;i++) {
+		unsigned char ec;
+		for (ec=0;ec<4;ec++) {
+			if (ec == (convertedKey[i] & 0x3)) continue;
+			info &= 0x7000000;
+			info |= ((((ec&0x3)<<8)|((i+coord)&0xff))&0x3ff)<<12;
+			unsigned long long err_packedPattern = packedPattern;
+			unsigned long long err_rev_packedPattern = rev_packedPattern;
+
+			unsigned int bitPos = (lookupTable.tableSize-i-1)*2;
+			mask = ALLONE - (3 << bitPos);
+			mask |= ec << bitPos;
+			err_packedPattern |= (3 << bitPos);
+			err_packedPattern &= mask;
+
+			bitPos = i*2;
+			mask = ALLONE - (3 << bitPos);
+			mask |= ec << bitPos;
+			err_rev_packedPattern |= (3 << bitPos);
+			err_rev_packedPattern &= mask;
+
+			l = err_packedPattern ? lookupTable.table[err_packedPattern-1]+1 : 1;
+			r = lookupTable.table[err_packedPattern];
+			rev_l = err_rev_packedPattern ? rev_lookupTable.table[err_rev_packedPattern-1]+1 : 1;
+			rev_r = rev_lookupTable.table[err_rev_packedPattern];
+			//LookupSafe(lookupTable,bwt,err_packedPattern,err_packedPattern,&l,&r);
+			//LookupSafe(rev_lookupTable,rev_bwt,err_rev_packedPattern,err_rev_packedPattern,&rev_l,&rev_r);
+
+			unsigned int pos = lookupTable.tableSize;
+			while (pos < sizeX+sizeY  && l <= r) {
+				c = convertedKey[pos] & 0x3;
+
+				BWTAllOccValue(rev_bwt,rev_l,occCount_start);
+				BWTAllOccValue(rev_bwt,rev_r + 1,occCount_end);
+
+				rev_l = rev_bwt->cumulativeFreq[c] + occCount_start[c] + 1;
+				rev_r = rev_bwt->cumulativeFreq[c] + occCount_end[c];
+
+				int k;
+				occCount[3]=0;
+				for (k=2;k>=0;k--) {
+					occCount[k]=occCount[k+1]+occCount_end[k+1]-occCount_start[k+1];
+				}
+
+				r = r - occCount[c];
+				l = r - (rev_r-rev_l);
+
+				pos++;
+			}
+
+
+			if (l <= r) {
+				//2-nd Error Matching in cellZ Range
+				hitcount+=REVBWTForward1Error(convertedKey, 
+								bo,
+								bwt, rev_bwt,
+								sizeX+sizeY, sizeZ,
+								l,r,
+								rev_l,rev_r,
+								info,
+								hits);
+				if(hits->n >= cutoff) return hitcount;
+			}
+		}
+	}
+	//*/
+	if(hits->n >= cutoff)return hitcount;
+
+
+	l = packedPattern ? lookupTable.table[packedPattern-1]+1 : 1;
+	r = lookupTable.table[packedPattern];
+	rev_l = rev_packedPattern ? rev_lookupTable.table[rev_packedPattern-1]+1 : 1;
+	rev_r = rev_lookupTable.table[rev_packedPattern];
+
+
+	//For error happen outside lookup range..
+	for (i=lookupTable.tableSize;(i<sizeX+sizeY && l<=r);i++) {
+		//Call Once Only Proceduressssss - Great
+		BWTAllOccValue(rev_bwt,rev_l,occCount_pstart);
+		BWTAllOccValue(rev_bwt,rev_r + 1,occCount_pend);
+
+		int k;
+		occCountp[3]=0;
+		for (k=2;k>=0;k--) {
+			occCountp[k]=occCountp[k+1]+occCount_pend[k+1]-occCount_pstart[k+1];
+		}
+		//Forward Manner
+		for (ec=0;ec<4;ec++) {
+			if ((convertedKey[i]&0x3) ==ec) continue;
+			info &= 0x7000000;
+			info |= ((((ec&0x3)<<8)|((i+coord)&0xff))&0x3ff)<<12;
+			unsigned int mk_l=l;
+			unsigned int mk_r=r;
+			unsigned int rev_mk_l=rev_l;
+			unsigned int rev_mk_r=rev_r;
+
+			unsigned int pos = i+1;
+
+			rev_mk_l = rev_bwt->cumulativeFreq[ec] + occCount_pstart[ec] + 1;
+			rev_mk_r = rev_bwt->cumulativeFreq[ec] + occCount_pend[ec];
+
+			mk_r = mk_r - occCountp[ec];
+			mk_l = mk_r - (rev_mk_r-rev_mk_l);
+
+			while (pos < sizeX+sizeY  && rev_mk_l <= rev_mk_r) {
+				c = convertedKey[pos] & 0x3;
+
+				BWTAllOccValue(rev_bwt,rev_mk_l,occCount_start);
+				BWTAllOccValue(rev_bwt,rev_mk_r + 1,occCount_end);
+
+				rev_mk_l = rev_bwt->cumulativeFreq[c] + occCount_start[c] + 1;
+				rev_mk_r = rev_bwt->cumulativeFreq[c] + occCount_end[c];
+
+				int k;
+				occCount[3]=0;
+				for (k=2;k>=0;k--) {
+					occCount[k]=occCount[k+1]+occCount_end[k+1]-occCount_start[k+1];
+				}
+
+				mk_r = mk_r - occCount[c];
+				mk_l = mk_r - (rev_mk_r-rev_mk_l);
+
+				pos++;
+			}
+			//return mk_l, mk_r
+			if (mk_l <= mk_r) {
+				//2-nd Error Matching in cellZ Range
+				hitcount+=REVBWTForward1Error(convertedKey, 
+						bo,
+						bwt, rev_bwt,
+						sizeX+sizeY,
+						sizeZ,
+						mk_l,mk_r,
+						rev_mk_l,rev_mk_r,
+						info,
+						hits);
+//				if(hits->n >= cutoff) return hitcount;
+			}
+		}
+		c = convertedKey[i];
+
+		rev_l = rev_bwt->cumulativeFreq[c] + occCount_pstart[c] + 1;
+		rev_r = rev_bwt->cumulativeFreq[c] + occCount_pend[c];
+
+		r = r - occCountp[c];
+		l = r - (rev_r-rev_l);
+	}
+	if(hits->n >= cutoff)return hitcount;
+	//*/
+
+//	printf("case C %d\n", saCount);
+
+/*
+	//Case D
+	//1.    cellY (forward)
+	//2.    1-mismatch cellZ (forward)
+	//3.    1-mismatch cellX (backward)
+	//==============================================
+	packedPattern = 0;
+	l_packedPattern = 0;
+	r_packedPattern = 0;
+	rev_packedPattern = 0;
+	rev_l_packedPattern = 0;
+	rev_r_packedPattern = 0;
+
+	for (i = 0; i <lookupTable.tableSize ; i++) {
+		packedPattern<<=2;
+		packedPattern |= (convertedKey[sizeX+i] & 3);
+	}
+	for (i = 0; i <rev_lookupTable.tableSize ; i++) {
+		rev_packedPattern<<=2;
+		rev_packedPattern |= (convertedKey[sizeX+rev_lookupTable.tableSize-i-1] & 3);
+	}
+	//*
+	//For error happen in lookup range....
+	for (i = sizeY; i< lookupTable.tableSize ;i++) {
+		unsigned char ec;
+		for (ec=0;ec<4;ec++) {
+			if (ec == convertedKey[sizeX+i]) continue;
+			info &= 0x7000000;
+			info |= ((((ec&0x3)<<8)|((sizeX+i)&0xff))&0x3ff)<<12;
+			unsigned long long err_packedPattern = packedPattern;
+			unsigned long long err_rev_packedPattern = rev_packedPattern;
+
+			unsigned int bitPos = (lookupTable.tableSize-i-1)*2;
+			mask = ALLONE - (3 << bitPos);
+			mask |= ec << bitPos;
+			err_packedPattern |= (3 << bitPos);
+			err_packedPattern &= mask;
+
+			bitPos = i*2;
+			mask = ALLONE - (3 << bitPos);
+			mask |= ec << bitPos;
+			err_rev_packedPattern |= (3 << bitPos);
+			err_rev_packedPattern &= mask;
+
+			l = err_packedPattern ? lookupTable.table[err_packedPattern-1]+1 : 1;
+			r = lookupTable.table[err_packedPattern];
+			rev_l = err_rev_packedPattern ? rev_lookupTable.table[err_rev_packedPattern-1]+1 : 1;
+			rev_r = rev_lookupTable.table[err_rev_packedPattern];
+
+
+			unsigned int pos = sizeX+lookupTable.tableSize;
+			while (pos < keyLength  && l <= r) {
+				c = convertedKey[pos] & 0x3 ;
+
+				BWTAllOccValue(rev_bwt,rev_l,occCount_start);
+				BWTAllOccValue(rev_bwt,rev_r + 1,occCount_end);
+
+				rev_l = rev_bwt->cumulativeFreq[c] + occCount_start[c] + 1;
+				rev_r = rev_bwt->cumulativeFreq[c] + occCount_end[c];
+
+				int k;
+				occCount[3]=0;
+				for (k=2;k>=0;k--) {
+					occCount[k]=occCount[k+1]+occCount_end[k+1]-occCount_start[k+1];
+				}
+
+				r = r - occCount[c];
+				l = r - (rev_r-rev_l);
+
+				pos++;
+			}
+
+
+			if (l <= r) {
+				//2-nd Error Matching in cellX Range
+				hitcount+=BWTBackward1Error(convertedKey,bo, 
+										bwt,
+										0, sizeX,
+										l,r,
+										info,
+										hits);
+			}
+			if (hits->n >= cutoff) return hitcount;
+		}
+	}
+	///
+
+	l = packedPattern ? lookupTable.table[packedPattern-1]+1 : 1;
+	r = lookupTable.table[packedPattern];
+	rev_l = rev_packedPattern ? rev_lookupTable.table[rev_packedPattern-1]+1 : 1;
+	rev_r = rev_lookupTable.table[rev_packedPattern];
+
+
+    //For error happen outside lookup range..
+	for (i=sizeX+lookupTable.tableSize;(i<keyLength && l<=r);i++) {
+		BWTAllOccValue(rev_bwt,rev_l,occCount_pstart);
+		BWTAllOccValue(rev_bwt,rev_r + 1,occCount_pend);
+
+		int k;
+		occCountp[3]=0;
+		for (k=2;k>=0;k--) {
+			occCountp[k]=occCountp[k+1]+occCount_pend[k+1]-occCount_pstart[k+1];
+		}
+		//Forward Manner
+		for (ec=0;ec<4;ec++) {
+			if (convertedKey[i]==ec) continue;
+			info &= 0x7000000;
+			info |= ((((ec&0x3)<<8)|(i&0xff))&0x3ff)<<12;
+			unsigned int mk_l=l;
+			unsigned int mk_r=r;
+			unsigned int rev_mk_l=rev_l;
+			unsigned int rev_mk_r=rev_r;
+
+			unsigned int pos = i+1;
+
+			rev_mk_l = rev_bwt->cumulativeFreq[ec] + occCount_pstart[ec] + 1;
+			rev_mk_r = rev_bwt->cumulativeFreq[ec] + occCount_pend[ec];
+
+			mk_r = mk_r - occCountp[ec];
+			mk_l = mk_r - (rev_mk_r-rev_mk_l);
+
+			while (pos < keyLength  && rev_mk_l <= rev_mk_r) {
+				c = convertedKey[pos];
+
+				BWTAllOccValue(rev_bwt,rev_mk_l,occCount_start);
+				BWTAllOccValue(rev_bwt,rev_mk_r + 1,occCount_end);
+
+				rev_mk_l = rev_bwt->cumulativeFreq[c] + occCount_start[c] + 1;
+				rev_mk_r = rev_bwt->cumulativeFreq[c] + occCount_end[c];
+
+				int k;
+				occCount[3]=0;
+				for (k=2;k>=0;k--) {
+					occCount[k]=occCount[k+1]+occCount_end[k+1]-occCount_start[k+1];
+				}
+
+				mk_r = mk_r - occCount[c];
+				mk_l = mk_r - (rev_mk_r-rev_mk_l);
+
+				pos++;
+			}
+			//return mk_l, mk_r
+			if (mk_l <= mk_r) {
+				//2-nd Error Matching in cellX Range
+				hitcount+=BWTBackward1Error(convertedKey, bo,
+						bwt, 0,
+						sizeX,
+						mk_l,mk_r,
+						info,
+						hits);
+			}
+			if (hits->n >= cutoff)return hitcount;
+		}
+		c = convertedKey[i];
+
+		rev_l = rev_bwt->cumulativeFreq[c] + occCount_pstart[c] + 1;
+		rev_r = rev_bwt->cumulativeFreq[c] + occCount_pend[c];
+
+		r = r - occCountp[c];
+		l = r - (rev_r-rev_l);
+	}
+	///
+//	printf("case D %d\n", saCount);
+	//*/
+	return hitcount;
+
+}
+
+static inline int POSCMP(const void *a, const void *b){
+	return *(unsigned int *)a - *(unsigned int *)b;
+}
diff --git a/BWTAln.h b/BWTAln.h
new file mode 100644
index 0000000..bdec7cd
--- /dev/null
+++ b/BWTAln.h
@@ -0,0 +1,39 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  BWTAln.h
+ *
+ *    Description:  
+ *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+
+
+#ifndef  _BWTALN_H__INC
+#define  _BWTALN_H__INC
+
+#include "BWT.h"
+#include "extratools.h"
+#include "HSP.h"
+
+unsigned int REVBWTForwardSearch(const unsigned char *convertedKey, const unsigned int keyLength, const BWT *rev_bwt,unsigned int *resultSaIndexLeft, unsigned int *resultSaIndexRight,unsigned int *rev_resultSaIndexLeft, unsigned int *rev_resultSaIndexRight);
+unsigned int REVBWTContForwardSearch(const unsigned char *convertedKey, const unsigned int start, const unsigned int len,const BWT *rev_bwt,unsigned int *saL, unsigned int *saR,unsigned int *rev_saL, unsigned int *rev_saR);
+unsigned int BWTContBackwardSearch(const unsigned char *convertedKey, const unsigned int start, const unsigned int len, const BWT *bwt, unsigned int *saL, unsigned int *saR);
+unsigned int BWTBackward1Error(const unsigned char *querypattern, const BWTOPT *bo, BWT *bwt, unsigned int start, unsigned int len, unsigned int pl, unsigned int pr, unsigned int info, HITTABLE *hits);
+unsigned int REVBWTForward1Error(const unsigned char *queryPattern,const BWTOPT *bo,  BWT *bwt, BWT * rev_bwt, unsigned int start,unsigned int len, unsigned int pl,unsigned int pr, unsigned int rev_pl,unsigned int rev_pr, unsigned int info, HITTABLE *hits);
+int BWTExactMatching(const unsigned char *convertedKey, const BWTOPT *bo, const int chain, BWT *bwt, LOOKUPTABLE *lookup, HITTABLE *hits);
+int BWT1ErrorMatching(const unsigned char *convertedKey, const BWTOPT *bo, const int chain, BWT *bwt, BWT *rev_bwt, LOOKUPTABLE *lookup, LOOKUPTABLE *rev_lookup, HITTABLE *hits);
+int BWT2ErrorMatching(const unsigned char *convertedKey, const BWTOPT *bo, const int chain, BWT *bwt, BWT *rev_bwt, LOOKUPTABLE *lookup, LOOKUPTABLE *rev_lookup, HITTABLE *hits);
+ int BWTGapMatching(const unsigned char *convertedKey, const BWTOPT *bo, const int chain, BWT *bwt, BWT *rev_bwt, LOOKUPTABLE *lookup, LOOKUPTABLE *rev_lookup, HITTABLE *hits);
+
+#endif   /* ----- #ifndef _BWTALN_H__INC  ----- */
+
+
+
diff --git a/ChangLogCode.Apr1 b/ChangLogCode.Apr1
new file mode 100644
index 0000000..863b955
--- /dev/null
+++ b/ChangLogCode.Apr1
@@ -0,0 +1,64 @@
+--- ../soap2.15/BWTAln.c	2009-03-27 10:42:36.000000000 +0800
++++ BWTAln.c	2009-04-01 10:25:41.000000000 +0800
+@@ -163,6 +163,8 @@
+ 		unsigned char ec;
+ 		unsigned int i;
+ 		int k;
++		const int coord = (info>>24&1)?(bo->seqLen-bo->alnLen):0;
++
+ 		for (i=0;(i<len && pl<=pr);i++) {
+ 			//call once only proceduressssss - great
+ 			BWTAllOccValue(rev_bwt,rev_pl,occcount_pstart);
+@@ -179,7 +181,7 @@
+ 				if (querypattern[start+i]==ec)
+ 					continue;
+ 				info &= 0xffff000;
+-				info |= ((ec&3)<<8|((start+i)&0xff))&0xfff;
++				info |= ((ec&3)<<8|((start+i+coord)&0xff))&0xfff;
+ 				mk_l=pl;
+ 				mk_r=pr;
+ 				rev_mk_l=rev_pl;
+@@ -390,6 +392,7 @@
+ 	unsigned long long ALLONE = (1<<(lookupTable.tableSize*2))-1;
+ 	unsigned char c;
+ 	int hitcount = 0;
++	const int coord = (info>>24&1)?(bo->seqLen-bo->alnLen):0;
+ //	fprintf(stdout, "BWT2misMatching\n");
+ 	//Separate into 4 cases according to the documentation.
+ 	//==============================================
+@@ -426,7 +429,7 @@
+ 			unsigned int mk_l=l;
+ 			unsigned int mk_r=r;
+ 			info &= 0x7000000;
+-			info |= ((((ec&0x3)<<8)|(i&0xff))&0x3ff)<<12;
++			info |= ((((ec&0x3)<<8)|((i+coord)&0xff))&0x3ff)<<12;
+ 			mk_l = bwt->cumulativeFreq[ec] + occCount_pstart[ec] + 1;
+ 			mk_r = bwt->cumulativeFreq[ec] + occCount_pend[ec];
+ 
+@@ -523,7 +526,7 @@
+ 			if (convertedKey[i]==ec) continue;
+ //			fprintf(stdout, "%d\n", i);
+ 			info &= 0x7000000;
+-			info |= ((((ec&0x3)<<8)|(i&0xff))&0x3ff)<<12;
++			info |= ((((ec&0x3)<<8)|((i+coord)&0xff))&0x3ff)<<12;
+ 			unsigned int mk_l=l;
+ 			unsigned int mk_r=r;
+ 			unsigned int rev_mk_l=rev_l;
+@@ -596,7 +599,7 @@
+ 		for (ec=0;ec<4;ec++) {
+ 			if (ec == (convertedKey[i] & 0x3)) continue;
+ 			info &= 0x7000000;
+-			info |= ((((ec&0x3)<<8)|(i&0xff))&0x3ff)<<12;
++			info |= ((((ec&0x3)<<8)|((i+coord)&0xff))&0x3ff)<<12;
+ 			unsigned long long err_packedPattern = packedPattern;
+ 			unsigned long long err_rev_packedPattern = rev_packedPattern;
+ 
+@@ -681,7 +684,7 @@
+ 		for (ec=0;ec<4;ec++) {
+ 			if ((convertedKey[i]&0x3) ==ec) continue;
+ 			info &= 0x7000000;
+-			info |= ((((ec&0x3)<<8)|(i&0xff))&0x3ff)<<12;
++			info |= ((((ec&0x3)<<8)|((i+coord)&0xff))&0x3ff)<<12;
+ 			unsigned int mk_l=l;
+ 			unsigned int mk_r=r;
+ 			unsigned int rev_mk_l=rev_l;
diff --git a/ChangeLog1 b/ChangeLog1
new file mode 100644
index 0000000..6ea071d
--- /dev/null
+++ b/ChangeLog1
@@ -0,0 +1,16 @@
+May 21 2009
+1. SWRescue serier bug fixed
+
+------------------------------------------------------
+Apr 10 2009
+1. -l option compatible with diff read_length
+2. -s min_length after soft clip
+3. seq and quality real length is coordinated by soft clip
+4. MD contain no 0 except first
+
+------------------------------------------------------
+Apr 8 2009
+1. -t option
+2. -r segfault
+3. chang x, y
+
diff --git a/DNACount.c b/DNACount.c
new file mode 100644
index 0000000..d2178e2
--- /dev/null
+++ b/DNACount.c
@@ -0,0 +1,1159 @@
+/*
+
+   DNACount.c		DNA Count
+
+   This module contains DNA occurrence counting functions. The DNA must be
+   in word-packed format.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "DNACount.h"
+#include "MiscUtilities.h"
+
+
+void GenerateDNAOccCountTable(unsigned int *dnaDecodeTable) {
+
+	unsigned int i, j, c, t;
+
+	for (i=0; i<DNA_OCC_CNT_TABLE_SIZE_IN_WORD; i++) {
+		dnaDecodeTable[i] = 0;
+		c = i;
+		for (j=0; j<8; j++) {
+			t = c & 0x00000003;
+			dnaDecodeTable[i] += 1 << (t * 8);
+			c >>= 2;
+		}
+	}
+
+}
+
+unsigned int ForwardDNAOccCount(const unsigned int*  dna, const unsigned int index, const unsigned int character, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
+											   0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
+											   0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
+											   0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
+
+	unsigned int wordToCount, charToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+
+#ifdef DEBUG
+	if (index >= 256) {
+		fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 16;
+	charToCount = index - wordToCount * 16;
+
+	for (i=0; i<wordToCount; i++) {
+		sum += dnaDecodeTable[dna[i] >> 16];
+		sum += dnaDecodeTable[dna[i] & 0x0000FFFF];
+	}
+
+	if (charToCount > 0) {
+		c = dna[i] & truncateRightMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	return (sum >> (character * 8)) & 0x000000FF;
+
+}
+
+unsigned int BackwardDNAOccCount(const unsigned int*  dna, const unsigned int index, const unsigned int character, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[16] =  { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
+											   0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
+											   0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
+											   0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
+
+	unsigned int wordToCount, charToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+
+#ifdef DEBUG
+	if (index >= 256) {
+		fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 16;
+	charToCount = index - wordToCount * 16;
+
+	dna -= wordToCount + 1;
+
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+	
+	for (i=0; i<wordToCount; i++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 16];
+		sum += dnaDecodeTable[*dna & 0x0000FFFF];
+	}
+
+	return (sum >> (character * 8)) & 0x000000FF;
+
+}
+
+void ForwardDNAAllOccCount(const unsigned int*  dna, const unsigned int index, unsigned int*  __restrict occCount, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
+											   0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
+											   0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
+											   0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
+
+	unsigned int wordToCount, charToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+
+#ifdef DEBUG
+	if (index >= 256) {
+		fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 16;
+	charToCount = index - wordToCount * 16;
+
+	for (i=0; i<wordToCount; i++) {
+		sum += dnaDecodeTable[dna[i] >> 16];
+		sum += dnaDecodeTable[dna[i] & 0x0000FFFF];
+	}
+
+	if (charToCount > 0) {
+		c = dna[i] & truncateRightMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	occCount[0] = sum & 0x000000FF;	sum >>= 8;
+	occCount[1] = sum & 0x000000FF;	sum >>= 8;
+	occCount[2] = sum & 0x000000FF;	sum >>= 8;
+	occCount[3] = sum;
+
+}
+
+void BackwardDNAAllOccCount(const unsigned int*  dna, const unsigned int index, unsigned int* __restrict occCount, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[16] =  { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
+											   0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
+											   0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
+											   0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
+
+	unsigned int wordToCount, charToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+
+#ifdef DEBUG
+	if (index >= 256) {
+		fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 16;
+	charToCount = index - wordToCount * 16;
+
+	dna -= wordToCount + 1;
+
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	for (i=0; i<wordToCount; i++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 16];
+		sum += dnaDecodeTable[*dna & 0x0000FFFF];
+	}
+	
+	occCount[0] = sum & 0x000000FF;	sum >>= 8;
+	occCount[1] = sum & 0x000000FF;	sum >>= 8;
+	occCount[2] = sum & 0x000000FF;	sum >>= 8;
+	occCount[3] = sum;
+
+}
+
+unsigned int Forward1OccCount(const unsigned int*  bitVector, const unsigned int index, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[32] = { 0x00000000, 0x80000000, 0xC0000000, 0xE0000000, 0xF0000000, 0xF8000000, 0xFC000000, 0xFE000000,
+											   0xFF000000, 0xFF800000, 0xFFC00000, 0xFFE00000, 0xFFF00000, 0xFFF80000, 0xFFFC0000, 0xFFFE0000,
+											   0xFFFF0000, 0xFFFF8000, 0xFFFFC000, 0xFFFFE000, 0xFFFFF000, 0xFFFFF800, 0xFFFFFC00, 0xFFFFFE00,
+											   0xFFFFFF00, 0xFFFFFF80, 0xFFFFFFC0, 0xFFFFFFE0, 0xFFFFFFF0, 0xFFFFFFF8, 0xFFFFFFFC, 0xFFFFFFFE};
+
+	unsigned int wordToCount, bitToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+	unsigned int numberOf1;
+
+#ifdef DEBUG
+	if (index >= 256) {
+		fprintf(stderr, "Forward1OccCount() : index >= 256!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 32;
+	bitToCount = index - wordToCount * 32;
+
+	for (i=0; i<wordToCount; i++) {
+		sum += dnaDecodeTable[bitVector[i] >> 16];
+		sum += dnaDecodeTable[bitVector[i] & 0x0000FFFF];
+	}
+
+	if (bitToCount > 0) {
+		c = bitVector[i] & truncateRightMask[bitToCount];
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0x0000FFFF];
+	}
+
+	sum >>= 8;
+	numberOf1 = sum & 0x000000FF;
+	sum >>= 8;
+	numberOf1 = sum & 0x000000FF;
+	sum >>= 8;
+	numberOf1 = sum * 2;
+
+	return numberOf1;
+
+}
+
+unsigned int Backward1OccCount(const unsigned int*  bitVector, const unsigned int index, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[32] =  { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
+											   0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
+											   0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
+											   0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF};
+
+	unsigned int wordToCount, bitToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+	unsigned int numberOf1;
+
+#ifdef DEBUG
+	if (index >= 256) {
+		fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 32;
+	bitToCount = index - wordToCount * 32;
+
+	bitVector -= wordToCount + 1;
+
+	if (bitToCount > 0) {
+		c = *bitVector & truncateLeftMask[bitToCount];
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+	}
+	
+	for (i=0; i<wordToCount; i++) {
+		bitVector++;
+		sum += dnaDecodeTable[*bitVector >> 16];
+		sum += dnaDecodeTable[*bitVector & 0x0000FFFF];
+	}
+
+	sum >>= 8;
+	numberOf1 = sum & 0x000000FF;
+	sum >>= 8;
+	numberOf1 = sum & 0x000000FF;
+	sum >>= 8;
+	numberOf1 = sum * 2;
+
+	return numberOf1;
+
+}
+
+unsigned int ForwardDNAOccCountNoLimit(const unsigned int*  dna, const unsigned int index, const unsigned int character, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
+											   0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
+											   0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
+											   0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
+
+	unsigned int iteration, wordToCount, charToCount;
+	unsigned int i, j, c;
+	unsigned int sum;
+	unsigned int occCount = 0;
+
+	iteration = index / 256;
+	wordToCount = (index - iteration * 256) / 16;
+	charToCount = index - iteration * 256 - wordToCount * 16;
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<16; j++) {
+			sum += dnaDecodeTable[*dna >> 16];
+			sum += dnaDecodeTable[*dna & 0x0000FFFF];
+			dna++;
+		}
+		if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+            occCount += (sum >> (character * 8)) & 0x000000FF;
+		} else {
+			// only some or all of the 3 bits are on
+			// in reality, only one of the four cases are possible
+			if (sum == 0x00000100) {
+				if (character == 0) {
+					occCount += 256;
+				}
+			} else if (sum == 0x00010000) {
+				if (character == 1) {
+					occCount += 256;
+				}
+			} else if (sum == 0x01000000) {
+				if (character == 2) {
+					occCount += 256;
+				}
+			} else if (sum == 0x00000000) {
+				if (character == 3) {
+					occCount += 256;
+				}
+			} else {
+				fprintf(stderr, "ForwardDNAOccCountNoLimit(): DNA occ sum exception!\n");
+				exit(1);
+			}
+		}
+
+	}
+
+	sum = 0;
+	for (j=0; j<wordToCount; j++) {
+		sum += dnaDecodeTable[*dna >> 16];
+		sum += dnaDecodeTable[*dna & 0x0000FFFF];
+		dna++;
+	}
+
+	if (charToCount > 0) {
+		c = *dna & truncateRightMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	occCount += (sum >> (character * 8)) & 0x000000FF;
+
+	return occCount;
+
+}
+
+unsigned int BackwardDNAOccCountNoLimit(const unsigned int*  dna, const unsigned int index, const unsigned int character, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[16] =  { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
+											   0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
+											   0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
+											   0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
+
+	unsigned int iteration, wordToCount, charToCount;
+	unsigned int i, j, c;
+	unsigned int sum = 0;
+	unsigned int occCount;
+
+	dna -= index / 16 + 1;
+
+	iteration = index / 256;
+	wordToCount = (index - iteration * 256) / 16;
+	charToCount = index - iteration * 256 - wordToCount * 16;
+
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	for (j=0; j<wordToCount; j++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 16];
+		sum += dnaDecodeTable[*dna & 0x0000FFFF];
+	}
+
+	occCount = (sum >> (character * 8)) & 0x000000FF;
+
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<16; j++) {
+			dna++;
+			sum += dnaDecodeTable[*dna >> 16];
+			sum += dnaDecodeTable[*dna & 0x0000FFFF];
+		}
+		if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+            occCount += (sum >> (character * 8)) & 0x000000FF;
+		} else {
+			// only some or all of the 3 bits are on
+			// in reality, only one of the four cases are possible
+			if (sum == 0x00000100) {
+				if (character == 0) {
+					occCount += 256;
+				}
+			} else if (sum == 0x00010000) {
+				if (character == 1) {
+					occCount += 256;
+				}
+			} else if (sum == 0x01000000) {
+				if (character == 2) {
+					occCount += 256;
+				}
+			} else if (sum == 0x00000000) {
+				if (character == 3) {
+					occCount += 256;
+				}
+			} else {
+				fprintf(stderr, "BackwardDNAOccCountNoLimit(): DNA occ sum exception!\n");
+				exit(1);
+			}
+		}
+
+	}
+
+	return occCount;
+
+}
+
+void ForwardDNAAllOccCountNoLimit(const unsigned int*  dna, const unsigned int index, unsigned int* __restrict occCount, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
+											   0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
+											   0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
+											   0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
+
+	unsigned int iteration, wordToCount, charToCount;
+	unsigned int i, j, c;
+	unsigned int sum;
+
+	occCount[0] = 0;
+	occCount[1] = 0;
+	occCount[2] = 0;
+	occCount[3] = 0;
+
+	iteration = index / 256;
+	wordToCount = (index - iteration * 256) / 16;
+	charToCount = index - iteration * 256 - wordToCount * 16;
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<16; j++) {
+			sum += dnaDecodeTable[*dna >> 16];
+			sum += dnaDecodeTable[*dna & 0x0000FFFF];
+			dna++;
+		}
+		if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+			occCount[0] += sum & 0x000000FF;	sum >>= 8;
+			occCount[1] += sum & 0x000000FF;	sum >>= 8;
+			occCount[2] += sum & 0x000000FF;	sum >>= 8;
+			occCount[3] += sum;
+		} else {
+			// only some or all of the 3 bits are on
+			// in reality, only one of the four cases are possible
+			if (sum == 0x00000100) {
+				occCount[0] += 256;
+			} else if (sum == 0x00010000) {
+				occCount[1] += 256;
+			} else if (sum == 0x01000000) {
+				occCount[2] += 256;
+			} else if (sum == 0x00000000) {
+				occCount[3] += 256;
+			} else {
+				fprintf(stderr, "ForwardDNAAllOccCountNoLimit(): DNA occ sum exception!\n");
+				exit(1);
+			}
+		}
+
+	}
+
+	sum = 0;
+	for (j=0; j<wordToCount; j++) {
+		sum += dnaDecodeTable[*dna >> 16];
+		sum += dnaDecodeTable[*dna & 0x0000FFFF];
+		dna++;
+	}
+
+	if (charToCount > 0) {
+		c = *dna & truncateRightMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	occCount[0] += sum & 0x000000FF;	sum >>= 8;
+	occCount[1] += sum & 0x000000FF;	sum >>= 8;
+	occCount[2] += sum & 0x000000FF;	sum >>= 8;
+	occCount[3] += sum;
+
+}
+
+void BackwardDNAAllOccCountNoLimit(const unsigned int*  dna, const unsigned int index, unsigned int* __restrict occCount, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[16] =  { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
+											   0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
+											   0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
+											   0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
+
+	unsigned int iteration, wordToCount, charToCount;
+	unsigned int i, j, c;
+	unsigned int sum;
+
+	dna -= index / 16 + 1;
+
+	iteration = index / 256;
+	wordToCount = (index - iteration * 256) / 16;
+	charToCount = index - iteration * 256 - wordToCount * 16;
+
+	sum = 0;
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 16 - c;
+		sum += dnaDecodeTable[c >> 16];
+		sum += dnaDecodeTable[c & 0xFFFF];
+		sum += charToCount - 16;	// decrease count of 'a' by 16 - positionToProcess
+	}
+
+	for (j=0; j<wordToCount; j++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 16];
+		sum += dnaDecodeTable[*dna & 0x0000FFFF];
+	}
+
+	occCount[0] = sum & 0x000000FF;	sum >>= 8;
+	occCount[1] = sum & 0x000000FF;	sum >>= 8;
+	occCount[2] = sum & 0x000000FF;	sum >>= 8;
+	occCount[3] = sum;
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<16; j++) {
+			dna++;
+			sum += dnaDecodeTable[*dna >> 16];
+			sum += dnaDecodeTable[*dna & 0x0000FFFF];
+		}
+		if (!DNA_OCC_SUM_EXCEPTION(sum)) {
+			occCount[0] += sum & 0x000000FF;	sum >>= 8;
+			occCount[1] += sum & 0x000000FF;	sum >>= 8;
+			occCount[2] += sum & 0x000000FF;	sum >>= 8;
+			occCount[3] += sum;
+		} else {
+			// only some or all of the 3 bits are on
+			// in reality, only one of the four cases are possible
+			if (sum == 0x00000100) {
+				occCount[0] += 256;
+			} else if (sum == 0x00010000) {
+				occCount[1] += 256;
+			} else if (sum == 0x01000000) {
+				occCount[2] += 256;
+			} else if (sum == 0x00000000) {
+				occCount[3] += 256;
+			} else {
+				fprintf(stderr, "BackwardDNAAllOccCountNoLimit(): DNA occ sum exception!\n");
+				exit(1);
+			}
+		}
+
+	}
+
+}
+
+
+void GenerateDNA_NOccCountTable(unsigned int *dnaDecodeTable) {
+
+	unsigned int i, j, c, t;
+
+	for (i=0; i<DNA_N_OCC_CNT_TABLE_SIZE_IN_WORD; i++) {
+		dnaDecodeTable[i] = 0;
+		c = i;
+		for (j=0; j<5; j++) {
+			t = c & 0x00000007;
+			if (t != 4) {
+				// Count of 'n' is to be derived from acgt
+				dnaDecodeTable[i] += 1 << (t * 8);
+			}
+			c >>= 3;
+		}
+	}
+
+}
+
+unsigned int ForwardDNA_NOccCount(const unsigned int*  dna, const unsigned int index, const unsigned int character, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
+											   0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
+											   0xFFFFFF00, 0xFFFFFFE0};
+
+	unsigned int wordToCount, charToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+	unsigned int occCount;
+
+#ifdef DEBUG
+	if (index > 250) {
+		fprintf(stderr, "ForwardDNA_NOccCount() : index > 250!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 10;
+	charToCount = index - wordToCount * 10;
+
+	for (i=0; i<wordToCount; i++) {
+		sum += dnaDecodeTable[dna[i] >> 17];
+		sum += dnaDecodeTable[(dna[i] >> 2) & 0x00007FFF];
+	}
+
+	if (charToCount > 0) {
+		c = dna[i] & truncateRightMask[charToCount];	// increase count of 'a' by 10 - charToCount;
+		sum += dnaDecodeTable[c >> 17];
+		sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
+		sum += charToCount - 10;	// decrease count of 'a' by 10 - charToCount
+	}
+
+	if (character != 4) {
+		occCount = (sum >> (character * 8)) & 0x000000FF;
+	} else {
+		occCount = index;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum;
+	}
+
+	return occCount;
+
+}
+
+unsigned int BackwardDNA_NOccCount(const unsigned int*  dna, const unsigned int index, const unsigned int character, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[10] =  { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
+											   0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
+											   0x03FFFFFC, 0x1FFFFFFC};
+
+	unsigned int wordToCount, charToCount;
+	unsigned int j, c;
+	unsigned int sum = 0;
+	unsigned int occCount;
+
+#ifdef DEBUG
+	if (index > 250) {
+		fprintf(stderr, "BackwardDNA_NOccCount() : index >= 250!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 10;
+	charToCount = index - wordToCount * 10;
+
+	dna -= wordToCount + 1;
+
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 10 - charToCount;
+		sum += dnaDecodeTable[c >> 17];
+		sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
+		sum += charToCount - 10;	// decrease count of 'a' by 10 - charToCount
+	}
+
+	for (j=0; j<wordToCount; j++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 17];
+		sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+	}
+
+	if (character != 4) {
+		occCount = (sum >> (character * 8)) & 0x000000FF;
+	} else {
+		occCount = index;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum;
+	}
+
+#ifdef DEBUG
+	if (occCount > index + 1) {
+		fprintf(stderr, "BackwardDNA_NOccCount() : occCount > index + 1!\n");
+		exit(1);
+	}
+#endif
+
+	return occCount;
+
+}
+
+void ForwardDNA_NAllOccCount(const unsigned int*  dna, const unsigned int index, unsigned int* __restrict occCount, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
+											   0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
+											   0xFFFFFF00, 0xFFFFFFE0};
+
+	unsigned int wordToCount, charToCount;
+	unsigned int i, c;
+	unsigned int sum = 0;
+
+#ifdef DEBUG
+	if (index > 250) {
+		fprintf(stderr, "ForwardDNA_NAllOccCount() : index >= 250!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 10;
+	charToCount = index - wordToCount * 10;
+
+	for (i=0; i<wordToCount; i++) {
+		sum += dnaDecodeTable[dna[i] >> 17];
+		sum += dnaDecodeTable[(dna[i] >> 2) & 0x00007FFF];
+	}
+
+	if (charToCount > 0) {
+		c = dna[i] & truncateRightMask[charToCount];	// increase count of 'a' by 10 - charToCount;
+		sum += dnaDecodeTable[c >> 17];
+		sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
+		sum += charToCount - 10;	// decrease count of 'a' by 10 - charToCount
+	}
+
+	occCount[0] = sum & 0x000000FF;	sum >>= 8;
+	occCount[1] = sum & 0x000000FF;	sum >>= 8;
+	occCount[2] = sum & 0x000000FF;	sum >>= 8;
+	occCount[3] = sum;
+
+}
+
+void BackwardDNA_NAllOccCount(const unsigned int*  dna, const unsigned int index, unsigned int* __restrict occCount, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[10] =  { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
+											   0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
+											   0x03FFFFFC, 0x1FFFFFFC};
+
+	unsigned int wordToCount, charToCount;
+	unsigned int j, c;
+	unsigned int sum = 0;
+
+#ifdef DEBUG
+	if (index > 250) {
+		fprintf(stderr, "BackwardDNA_NAllOccCount() : index >= 250!\n");
+		exit(1);
+	}
+#endif
+
+	wordToCount = index / 10;
+	charToCount = index - wordToCount * 10;
+
+	dna -= wordToCount + 1;
+
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 10 - charToCount;
+		sum += dnaDecodeTable[c >> 17];
+		sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
+		sum += charToCount - 10;	// decrease count of 'a' by 16 - charToCount
+	}
+
+	for (j=0; j<wordToCount; j++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 17];
+		sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+	}
+
+	occCount[0] = sum & 0x000000FF;	sum >>= 8;
+	occCount[1] = sum & 0x000000FF;	sum >>= 8;
+	occCount[2] = sum & 0x000000FF;	sum >>= 8;
+	occCount[3] = sum;
+
+}
+
+unsigned int ForwardDNA_NOccCountNoLimit(const unsigned int*  dna, const unsigned int index, const unsigned int character, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
+											   0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
+											   0xFFFFFF00, 0xFFFFFFE0};
+
+	unsigned int iteration, wordToCount, charToCount;
+	unsigned int i, j, c;
+	unsigned int sum;
+	unsigned int occCount = 0;
+
+	iteration = index / 250;
+	wordToCount = (index - iteration * 250) / 10;
+	charToCount = index - iteration * 250 - wordToCount * 10;
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<25; j++) {
+			sum += dnaDecodeTable[*dna >> 17];
+			sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+			dna++;
+		}
+		if (character != 4) {
+			occCount += (sum >> (character * 8)) & 0x000000FF;
+		} else {
+			occCount -= sum & 0x000000FF;	sum >>= 8;
+			occCount -= sum & 0x000000FF;	sum >>= 8;
+			occCount -= sum & 0x000000FF;	sum >>= 8;
+			occCount -= sum;
+		}
+	}
+
+	sum = 0;
+	for (j=0; j<wordToCount; j++) {
+		sum += dnaDecodeTable[*dna >> 17];
+		sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+		dna++;
+	}
+
+	if (charToCount > 0) {
+		c = *dna & truncateRightMask[charToCount];	// increase count of 'a' by 10 - charToCount;
+		sum += dnaDecodeTable[c >> 17];
+		sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
+		sum += charToCount - 10;	// decrease count of 'a' by 10 - charToCount
+	}
+
+	if (character != 4) {
+		occCount += (sum >> (character * 8)) & 0x000000FF;
+	} else {
+		occCount += index;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum;
+	}
+
+	return occCount;
+
+}
+
+unsigned int BackwardDNA_NOccCountNoLimit(const unsigned int*  dna, const unsigned int index, const unsigned int character, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[10] =  { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
+											   0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
+											   0x03FFFFFC, 0x1FFFFFFC};
+
+	unsigned int iteration, wordToCount, charToCount;
+	unsigned int i, j, c;
+	unsigned int sum = 0;
+	unsigned int occCount = 0;
+
+	dna -= index / 10 + 1;
+
+	iteration = index / 250;
+	wordToCount = (index - iteration * 250) / 10;
+	charToCount = index - iteration * 250 - wordToCount * 10;
+
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 10 - charToCount;
+		sum += dnaDecodeTable[c >> 17];
+		sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
+		sum += charToCount - 10;	// decrease count of 'a' by 10 - charToCount
+	}
+
+	for (j=0; j<wordToCount; j++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 17];
+		sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+	}
+
+	if (character != 4) {
+		occCount = (sum >> (character * 8)) & 0x000000FF;
+	} else {
+		occCount = index;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum & 0x000000FF;	sum >>= 8;
+		occCount -= sum;
+	}
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<25; j++) {
+			dna++;
+			sum += dnaDecodeTable[*dna >> 17];
+			sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+		}
+		if (character != 4) {
+			occCount += (sum >> (character * 8)) & 0x000000FF;
+		} else {
+			occCount -= sum & 0x000000FF;	sum >>= 8;
+			occCount -= sum & 0x000000FF;	sum >>= 8;
+			occCount -= sum & 0x000000FF;	sum >>= 8;
+			occCount -= sum;
+		}
+	}
+
+	return occCount;
+
+}
+
+void ForwardDNA_NAllOccCountNoLimit(const unsigned int*  dna, const unsigned int index, unsigned int* __restrict occCount, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateRightMask[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
+											   0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
+											   0xFFFFFF00, 0xFFFFFFE0};
+
+	unsigned int iteration, wordToCount, charToCount;
+	unsigned int i, j, c;
+	unsigned int sum;
+
+	occCount[0] = 0;
+	occCount[1] = 0;
+	occCount[2] = 0;
+	occCount[3] = 0;
+
+	iteration = index / 250;
+	wordToCount = (index - iteration * 250) / 10;
+	charToCount = index - iteration * 250 - wordToCount * 10;
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<25; j++) {
+			sum += dnaDecodeTable[*dna >> 17];
+			sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+			dna++;
+		}
+		occCount[0] += sum & 0x000000FF;	sum >>= 8;
+		occCount[1] += sum & 0x000000FF;	sum >>= 8;
+		occCount[2] += sum & 0x000000FF;	sum >>= 8;
+		occCount[3] += sum;
+	}
+
+	sum = 0;
+	for (j=0; j<wordToCount; j++) {
+		sum += dnaDecodeTable[*dna >> 17];
+		sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+		dna++;
+	}
+
+	if (charToCount > 0) {
+		c = *dna & truncateRightMask[charToCount];	// increase count of 'a' by 10 - charToCount;
+		sum += dnaDecodeTable[c >> 17];
+		sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
+		sum += charToCount - 10;	// decrease count of 'a' by 10 - charToCount
+	}
+
+	occCount[0] += sum & 0x000000FF;	sum >>= 8;
+	occCount[1] += sum & 0x000000FF;	sum >>= 8;
+	occCount[2] += sum & 0x000000FF;	sum >>= 8;
+	occCount[3] += sum;
+
+}
+
+void BackwardDNA_NAllOccCountNoLimit(const unsigned int*  dna, const unsigned int index, unsigned int* __restrict occCount, const unsigned int*  dnaDecodeTable) {
+
+	static const unsigned int truncateLeftMask[10] =  { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
+											   0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
+											   0x03FFFFFC, 0x1FFFFFFC};
+
+	unsigned int iteration, wordToCount, charToCount;
+	unsigned int i, j, c;
+	unsigned int sum = 0;
+
+	dna -= index / 10 + 1;
+
+	iteration = index / 250;
+	wordToCount = (index - iteration * 250) / 10;
+	charToCount = index - iteration * 250 - wordToCount * 10;
+
+	if (charToCount > 0) {
+		c = *dna & truncateLeftMask[charToCount];	// increase count of 'a' by 10 - charToCount;
+		sum += dnaDecodeTable[c >> 17];
+		sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
+		sum += charToCount - 10;	// decrease count of 'a' by 16 - charToCount
+	}
+
+	for (j=0; j<wordToCount; j++) {
+		dna++;
+		sum += dnaDecodeTable[*dna >> 17];
+		sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+	}
+
+	occCount[0] = sum & 0x000000FF;	sum >>= 8;
+	occCount[1] = sum & 0x000000FF;	sum >>= 8;
+	occCount[2] = sum & 0x000000FF;	sum >>= 8;
+	occCount[3] = sum;
+
+	for (i=0; i<iteration; i++) {
+
+		sum = 0;
+		for (j=0; j<25; j++) {
+			dna++;
+			sum += dnaDecodeTable[*dna >> 17];
+			sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
+		}
+		occCount[0] += sum & 0x000000FF;	sum >>= 8;
+		occCount[1] += sum & 0x000000FF;	sum >>= 8;
+		occCount[2] += sum & 0x000000FF;	sum >>= 8;
+		occCount[3] += sum;
+	}
+
+}
+
+unsigned int ForwardOccCount(const unsigned int*  packed, const unsigned int index, const unsigned int character, const unsigned int alphabetSize) {
+
+	unsigned int wordToCount, charToCount;
+	unsigned int bitPerChar, charPerWord;
+	unsigned int i, j, c;
+	unsigned int occCount = 0;
+
+	bitPerChar = ceilLog2(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerChar;
+
+	wordToCount = index / charPerWord;
+	charToCount = index - wordToCount * charPerWord;
+
+	for (i=0; i<wordToCount; i++) {
+		c = packed[i];
+		for (j=0; j<charPerWord; j++) {
+			if (c >> (BITS_IN_WORD - bitPerChar) == character) {
+				occCount++;
+			}
+			c <<= bitPerChar;
+		}
+	}
+	if (charToCount > 0) {
+		c = packed[i];
+		for (j=0; j<charToCount; j++) {
+			if (c >> (BITS_IN_WORD - bitPerChar) == character) {
+				occCount++;
+			}
+			c <<= bitPerChar;
+		}
+	}
+
+	return occCount;
+
+}
+
+
+unsigned int BackwardOccCount(const unsigned int*  packed, const unsigned int index, const unsigned int character, const unsigned int alphabetSize) {
+
+	unsigned int wordToCount, charToCount;
+	unsigned int bitPerChar, charPerWord;
+	unsigned int i, j, c;
+	unsigned int occCount = 0;
+
+	bitPerChar = ceilLog2(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerChar;
+
+	wordToCount = index / charPerWord;
+	charToCount = index - wordToCount * charPerWord;
+
+	packed -= wordToCount + 1;
+
+	if (charToCount > 0) {
+		c = *packed << (bitPerChar * (charPerWord - charToCount));
+		for (j=0; j<charToCount; j++) {
+			if (c >> (BITS_IN_WORD - bitPerChar) == character) {
+				occCount++;
+			}
+			c <<= bitPerChar;
+		}
+	}
+
+	for (i=1; i<=wordToCount; i++) {
+		packed++;
+		c = *packed;
+		for (j=0; j<charPerWord; j++) {
+			if (c >> (BITS_IN_WORD - bitPerChar) == character) {
+				occCount++;
+			}
+			c <<= bitPerChar;
+		}
+	}
+
+	return occCount;
+}
+
+void ForwardAllOccCount(const unsigned int*  packed, const unsigned int index, const unsigned int alphabetSize, unsigned int*  occCount) {
+
+	unsigned int wordToCount, charToCount;
+	unsigned int bitPerChar, charPerWord;
+	unsigned int i, j, c;
+
+	bitPerChar = ceilLog2(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerChar;
+
+	wordToCount = index / charPerWord;
+	charToCount = index - wordToCount * charPerWord;
+
+	for (i=0; i<wordToCount; i++) {
+		c = packed[i];
+		for (j=0; j<charPerWord; j++) {
+			occCount[c >> (BITS_IN_WORD - bitPerChar)]++;
+			c <<= bitPerChar;
+		}
+	}
+	if (charToCount > 0) {
+		c = packed[i];
+		for (j=0; j<charToCount; j++) {
+			occCount[c >> (BITS_IN_WORD - bitPerChar)]++;
+			c <<= bitPerChar;
+		}
+	}
+
+}
+
+void BackwardAllOccCount(const unsigned int*  packed, const unsigned int index, const unsigned int alphabetSize, unsigned int*  occCount) {
+
+	unsigned int wordToCount, charToCount;
+	unsigned int bitPerChar, charPerWord;
+	unsigned int i, j, c;
+
+	bitPerChar = ceilLog2(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerChar;
+
+	wordToCount = index / charPerWord;
+	charToCount = index - wordToCount * charPerWord;
+
+	packed -= wordToCount + 1;
+
+	if (charToCount > 0) {
+		c = *packed << (bitPerChar * (charPerWord - charToCount));
+		for (j=0; j<charToCount; j++) {
+			occCount[c >> (BITS_IN_WORD - bitPerChar)]++;
+			c <<= bitPerChar;
+		}
+	}
+
+	for (i=1; i<=wordToCount; i++) {
+		packed++;
+		c = *packed;
+		for (j=0; j<charPerWord; j++) {
+			occCount[c >> (BITS_IN_WORD - bitPerChar)]++;
+			c <<= bitPerChar;
+		}
+	}
+
+}
+
diff --git a/DNACount.h b/DNACount.h
new file mode 100644
index 0000000..5aec451
--- /dev/null
+++ b/DNACount.h
@@ -0,0 +1,91 @@
+/*
+
+   DNACount.h		DNA Count
+
+   This module contains DNA occurrence counting functions. The DNA must be
+   in word-packed format.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#ifndef __DNA_COUNT_H__
+#define __DNA_COUNT_H__
+
+#include "TypeNLimit.h"
+
+// DNA
+#define DNA_ALPHABET_SIZE			4
+#define DNA_CHAR_PER_WORD			16
+#define DNA_BIT_PER_CHAR			2
+
+// DNA occurrence count table
+#define DNA_OCC_CNT_TABLE_SIZE_IN_WORD	65536
+#define DNA_OCC_SUM_EXCEPTION(sum)			((sum & 0xfefefeff) == 0)
+
+// DNA with 'n'
+#define DNA_N_ALPHABET_SIZE			5
+#define DNA_N_CHAR_PER_WORD			10
+#define DNA_N_BIT_PER_CHAR			3
+
+// DNA with 'n' occurrence count table
+#define DNA_N_OCC_CNT_TABLE_SIZE_IN_WORD	32786
+
+
+void GenerateDNAOccCountTable(unsigned int *dnaDecodeTable);
+
+// The following functions can only count up to 255 characters
+unsigned int ForwardDNAOccCount(const unsigned int* dna, const unsigned int index, const unsigned int character, const unsigned int* dnaDecodeTable);
+unsigned int BackwardDNAOccCount(const unsigned int* dna, const unsigned int index, const unsigned int character, const unsigned int* dnaDecodeTable);
+void ForwardDNAAllOccCount(const unsigned int* dna, const unsigned int index, unsigned int* __restrict occCount, const unsigned int* dnaDecodeTable);
+void BackwardDNAAllOccCount(const unsigned int* dna, const unsigned int index, unsigned int* __restrict  occCount, const unsigned int* dnaDecodeTable);
+unsigned int Forward1OccCount(const unsigned int* bitVector, const unsigned int index, const unsigned int* dnaDecodeTable);	// Count number of 1 bit
+unsigned int Backward1OccCount(const unsigned int* bitVector, const unsigned int index, const unsigned int* dnaDecodeTable); // Count number of 1 bit
+
+// The following functions have no limit on the number of characters
+unsigned int ForwardDNAOccCountNoLimit(const unsigned int* dna, const unsigned int index, const unsigned int character, const unsigned int* dnaDecodeTable);
+unsigned int BackwardDNAOccCountNoLimit(const unsigned int* dna, const unsigned int index, const unsigned int character, const unsigned int* dnaDecodeTable);
+void ForwardDNAAllOccCountNoLimit(const unsigned int* dna, const unsigned int index, unsigned int* __restrict  occCount, const unsigned int* dnaDecodeTable);
+void BackwardDNAAllOccCountNoLimit(const unsigned int* dna, const unsigned int index, unsigned int* __restrict  occCount, const unsigned int* dnaDecodeTable);
+
+
+void GenerateDNA_NOccCountTable(unsigned int *dnaDecodeTable);
+
+// The following functions have no limit on the number of characters
+unsigned int ForwardDNA_NOccCount(const unsigned int* dna, const unsigned int index, const unsigned int character, const unsigned int* dnaDecodeTable);
+unsigned int BackwardDNA_NOccCount(const unsigned int* dna, const unsigned int index, const unsigned int character, const unsigned int* dnaDecodeTable);
+void ForwardDNA_NAllOccCount(const unsigned int* dna, const unsigned int index, unsigned int* __restrict  occCount, const unsigned int* dnaDecodeTable);
+void BackwardDNA_NAllOccCount(const unsigned int* dna, const unsigned int index, unsigned int* __restrict  occCount, const unsigned int* dnaDecodeTable);
+
+// The following functions have no limit on the number of characters
+unsigned int ForwardDNAnOccCountNoLimit(const unsigned int* dna, const unsigned int index, const unsigned int character, const unsigned int* dnaDecodeTable);
+unsigned int BackwardDNA_NOccCountNoLimit(const unsigned int* dna, const unsigned int index, const unsigned int character, const unsigned int* dnaDecodeTable);
+void ForwardDNA_NAllOccCountNoLimit(const unsigned int* dna, const unsigned int index, unsigned int* __restrict  occCount, const unsigned int* dnaDecodeTable);
+void BackwardDNA_NAllOccCountNoLimit(const unsigned int* dna, const unsigned int index, unsigned int* __restrict  occCount, const unsigned int* dnaDecodeTable);
+
+// The first character from startAddr is indexed as 1
+// DNA_NAllOccCount only count occurrence from character 0 to 3
+
+// The following functions work for any word packed text
+unsigned int ForwardOccCount(const unsigned int* packed, const unsigned int index, const unsigned int character, const unsigned int alphabetSize);
+unsigned int BackwardOccCount(const unsigned int* packed, const unsigned int index, const unsigned int character, const unsigned int alphabetSize);
+void ForwardAllOccCount(const unsigned int* packed, const unsigned int index, const unsigned int alphabetSize, unsigned int* occCount);
+void BackwardAllOccCount(const unsigned int* packed, const unsigned int index, const unsigned int alphabetSize, unsigned int* occCount);
+
+
+#endif
+
diff --git a/GPLv3 b/GPLv3
new file mode 100644
index 0000000..94a9ed0
--- /dev/null
+++ b/GPLv3
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/HSP.c b/HSP.c
new file mode 100644
index 0000000..a0ae91c
--- /dev/null
+++ b/HSP.c
@@ -0,0 +1,339 @@
+/*
+
+   HSP.c		BWTBlastn functions
+
+   This module contains miscellaneous BWTBlastn functions.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#include <stdio.h>
+
+#include "MemManager.h"
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdint.h>
+#include "TextConverter.h"
+#include "MiscUtilities.h"
+#include "r250.h"
+#include "HSP.h"
+
+
+extern  double stat_expectationValue;
+
+void HSPFillCharMap(unsigned char charMap[255]) {
+
+	int i;
+
+	for (i=0; i<255; i++) {
+		charMap[i] = nonMatchDnaCharIndex;
+	}
+	for (i=0; i<16; i++) {
+		charMap[(int)dnaChar[i]] = (unsigned char)i;
+		charMap[(int)dnaChar[i] - 'A' + 'a'] = (unsigned char)i;
+	}
+
+}
+
+void HSPFillComplementMap(unsigned char complementMap[255]) {
+
+	int i;
+
+	for (i=0; i<255; i++) {
+		complementMap[i] = nonMatchDnaCharIndex;
+	}
+	for (i=0; i<16; i++) {
+		complementMap[(int)dnaComplement[i]] = (unsigned int)i;
+		complementMap[(int)dnaComplement[i] - 'A' + 'a'] = (unsigned int)i;
+	}
+}
+
+HSP *HSPLoad (MMPool *mmPool, const char *PackedDNAFileName, const char *AnnotationFileName) {
+
+	HSP *hsp;
+	int i;
+	FILE *annotationFile = NULL;
+
+	hsp = MMPoolDispatch(mmPool, sizeof(HSP));
+
+	// Load packed DNA
+	if (PackedDNAFileName != NULL && PackedDNAFileName[0] != '\0' && PackedDNAFileName[0] != '-') {
+		hsp->packedDNA = DNALoadPacked(PackedDNAFileName, &hsp->dnaLength, TRUE);
+	} else {
+		hsp->packedDNA = NULL;
+		hsp->dnaLength = 0;
+	}
+	
+	// Load Annotation
+//	fprintf(stderr, "%s\n", AnnotationFileName);
+	if (AnnotationFileName != NULL && (annotationFile = fopen(AnnotationFileName, "r"))){
+		unsigned int numOfChar = 0;
+		int FASTARandomSeed =0;
+		int chrNum =0;
+		fscanf(annotationFile, "%u\t%d\t%d\n", &numOfChar, &chrNum, &FASTARandomSeed);
+		hsp->chrNum = chrNum;
+		hsp->chrName =  MMUnitAllocate((chrNum+1)*sizeof(char *));
+		for (i=0; i<hsp->chrNum; ++i){
+			unsigned int nameLen = 0;
+			fscanf(annotationFile, "%u\t", &nameLen);
+			hsp->chrName[i] =  MMUnitAllocate((nameLen+1) * sizeof(char));
+			fscanf(annotationFile, "%s\n", hsp->chrName[i]);
+		}
+		fscanf(annotationFile, "%d\n", &(hsp->numOfBlock));
+		hsp->blockList =  MMUnitAllocate(((hsp->numOfBlock)+1) * sizeof(ChrBlock));
+		for(i=0;i<hsp->numOfBlock;++i){
+			ChrBlock *p = hsp->blockList+i;
+			int chrID, blockStart, blockEnd,ori;
+			chrID = blockStart = blockEnd = ori = 0;
+			fscanf(annotationFile, "%d\t%u\t%u\t%u\n", &(p->chrID), &(p->blockStart), &(p->blockEnd), &(p->ori));
+		}
+	}
+	hsp->dnaLength = hsp->blockList[hsp->numOfBlock-1].blockEnd;
+#ifdef DEBUG
+	int j = hsp->numOfBlock;
+	fprintf(stderr, "%d\t%d\t%u\t%u\n", hsp->blockList[j-1].chrID, hsp->blockList[j-1].ori, hsp->blockList[j-1].blockStart, hsp->blockList[j-1].blockEnd);
+#endif
+	fclose(annotationFile);
+	return hsp;
+}
+
+void HSPFree(MMPool *mmPool, HSP *hsp) {
+
+	if (hsp->packedDNA != NULL) {
+		DNAFreePacked(hsp->packedDNA, hsp->dnaLength);
+	}
+	int i;
+        for(i=0; i<hsp->chrNum; ++i){
+                MMUnitFree(hsp->chrName[i], (strlen(hsp->chrName[i])+1)*sizeof(char));
+        }
+        MMUnitFree(hsp->chrName,(hsp->chrNum+1)* sizeof(char *));
+        MMUnitFree(hsp->blockList, (hsp->numOfBlock+1) * sizeof(ChrBlock));
+
+	MMPoolReturn(mmPool, hsp, sizeof(hsp));
+}
+
+unsigned int HSPParseFASTAToPacked(const char* FASTAFileName, const char* annotationFileName, const char* packedDNAFileName, const char* ambiguityFileName, const unsigned int FASTARandomSeed, const int maskLowerCase) {
+
+	FILE *FASTAFile, *annotationFile, *packedDNAFile, *ambiguityFile;
+
+	NewAnnotation *chrAnnotation;
+	int chrAnnAllocated = 256;
+	int blockAllocated = 256;
+
+	char c;
+	int chrNum, blockNum;
+	unsigned int i, l;
+	int nCount;
+	unsigned int chrLen, usefulCharNum, numCharInBuffer, totalNumChar;
+	unsigned char charMap[255];
+	char *chrSeq, *p;
+	unsigned int chrAllocated = 65536;
+	unsigned char buffer[PACKED_BUFFER_SIZE];
+	unsigned char packedBuffer[PACKED_BUFFER_SIZE / 4];
+	chrLen = usefulCharNum = numCharInBuffer = totalNumChar = chrNum = blockNum = i = l = nCount = 0;
+	FASTAFile = (FILE*)fopen64(FASTAFileName, "r");
+	if (FASTAFile == NULL) {
+		fprintf(stderr, "ParseFASTToPacked() : Cannot open FASTAFileName!\n");
+		exit(1);
+	}
+
+	annotationFile = (FILE*)fopen64(annotationFileName, "w");
+	if (annotationFile == NULL) {
+		fprintf(stderr, "ParseFASTToPacked() : Cannot open annotationFileName!\n");
+		exit(1);
+	}
+
+	packedDNAFile = (FILE*)fopen64(packedDNAFileName, "wb");
+	if (packedDNAFile == NULL) {
+		fprintf(stderr, "ParseFASTToPacked() : Cannot open packedDNAFileName!\n");
+		exit(1);
+	}
+
+	ambiguityFile = (FILE*)fopen64(ambiguityFileName, "w");
+	if (ambiguityFile == NULL) {
+		fprintf(stderr, "ParseFASTToPacked() : Cannot open ambiguityFileName!\n");
+		exit(1);
+	}
+
+	HSPFillCharMap(charMap);
+
+	c = (char)getc(FASTAFile);
+	if (c != '>') {
+		fprintf(stderr, "ParseFASTToPacked() : FASTA file does not begin with '>'!\n");
+		exit(1);
+	}
+	chrAnnotation = (NewAnnotation *)malloc(sizeof(NewAnnotation)*chrAnnAllocated);
+	chrSeq = (char*)malloc(sizeof(char)*chrAllocated);
+	chrNum = blockNum = usefulCharNum = numCharInBuffer = 0;
+	while(!feof(FASTAFile)){
+		if (feof(FASTAFile)) break;
+		if (chrNum == chrAnnAllocated){
+			chrAnnAllocated <<= 1;
+			chrAnnotation = (NewAnnotation *)realloc(chrAnnotation, sizeof(NewAnnotation)*chrAnnAllocated);
+//			printf("%d\n", chrNum);
+		}
+
+		l=0;
+		c = (char)getc(FASTAFile);
+		while(!feof(FASTAFile) && c!='\t' && c!=' ' && c!='\n' && l<MAX_SEQ_NAME_LENGTH){
+			chrAnnotation[chrNum].chrName[l]=c;
+			l++;
+			c=(char)getc(FASTAFile);
+		}
+		chrAnnotation[chrNum].chrName[l]='\0';
+		while(c!='\n'){
+			c=(char)getc(FASTAFile);
+		}
+		chrLen = 0;
+		while(c!='>' && !feof(FASTAFile)){
+			if (c!='\n'){
+				if (c>='a' && c<='z'){
+					c+='A'-'a';
+				}
+				if (chrLen >= chrAllocated){
+					chrAllocated <<= 1;
+					chrSeq = (char*)realloc(chrSeq, sizeof(char)*chrAllocated);
+				}
+				*(chrSeq+chrLen) = c;
+				chrLen += 1;
+			}
+			c=(char)getc(FASTAFile);
+		}
+		if (chrLen < 75) continue;
+		//*
+		i=0;
+		p=chrSeq;
+		while (ambiguityCount[charMap[(int)*p]] == 1 && i++ != chrLen) p++;
+		if (i == chrLen) {
+			blockNum = 1;
+			chrAnnotation[chrNum].blockInChr = (ChrBlock *)malloc(sizeof(ChrBlock)*blockNum);
+			chrAnnotation[chrNum].chrStart = usefulCharNum;
+			chrAnnotation[chrNum].blockNum = blockNum;
+			chrAnnotation[chrNum].blockInChr[0].blockStart = usefulCharNum;
+			chrAnnotation[chrNum].blockInChr[0].ori = 0;
+			usefulCharNum += chrLen;
+			chrAnnotation[chrNum].chrEnd = usefulCharNum-1;
+			chrAnnotation[chrNum].blockInChr[0].blockEnd = usefulCharNum-1;
+			i=0;
+			while(i<chrLen){
+				if (numCharInBuffer >= PACKED_BUFFER_SIZE) {
+					ConvertTextToBytePacked(buffer, packedBuffer, charMap, 4, PACKED_BUFFER_SIZE);
+					fwrite(packedBuffer, 1, PACKED_BUFFER_SIZE / 4, packedDNAFile);
+					numCharInBuffer = 0;
+				}
+				buffer[numCharInBuffer++] = chrSeq[i++];
+			}
+		} else {
+			i=0;
+			p = chrSeq;
+			while (ambiguityCount[charMap[(int)*p]]!=1 && ++i!=chrLen) p++;
+			if (i<10) { i = 0; p = chrSeq;}
+			blockNum = 1;
+			chrAnnotation[chrNum].blockInChr = (ChrBlock *)malloc(sizeof(ChrBlock)*blockAllocated);
+			chrAnnotation[chrNum].chrStart = usefulCharNum;
+			chrAnnotation[chrNum].blockInChr[blockNum-1].ori = i;
+			chrAnnotation[chrNum].blockInChr[blockNum-1].blockStart = usefulCharNum;
+			int len=0;
+			while (i<chrLen) {
+				if(ambiguityCount[charMap[(int)*p]] == 1){
+					if (numCharInBuffer >= PACKED_BUFFER_SIZE) {
+						ConvertTextToBytePacked(buffer, packedBuffer, charMap, 4, PACKED_BUFFER_SIZE);
+						fwrite(packedBuffer, 1, PACKED_BUFFER_SIZE / 4, packedDNAFile);
+						numCharInBuffer = 0;
+					}
+					buffer[numCharInBuffer++] = *p++;
+					i++;
+					usefulCharNum++;
+					len++;
+				}else{
+					nCount = 0;
+					while((ambiguityCount[charMap[(int)*p]]!=1) && i<chrLen){
+						nCount++;
+						i++;
+						p++;
+					}
+					if (nCount<10) {
+						do {
+							if (numCharInBuffer >= PACKED_BUFFER_SIZE) {
+								ConvertTextToBytePacked(buffer, packedBuffer, charMap, 4, PACKED_BUFFER_SIZE);
+								fwrite(packedBuffer, 1, PACKED_BUFFER_SIZE / 4, packedDNAFile);
+								numCharInBuffer = 0;
+							}
+							buffer[numCharInBuffer++] = 'G';
+							usefulCharNum++;
+							len++;
+						} while(--nCount>0);
+					} else {
+						if (i<chrLen) {
+							chrAnnotation[chrNum].blockInChr[blockNum-1].blockEnd = usefulCharNum -1;
+							chrAnnotation[chrNum].blockInChr[blockNum-1].ori = i-nCount-len;
+							if (blockNum == blockAllocated){
+								blockAllocated <<= 1;
+								chrAnnotation[chrNum].blockInChr = (ChrBlock *)realloc(chrAnnotation[chrNum].blockInChr, sizeof(ChrBlock)*blockAllocated);
+							}
+							blockNum++;
+							len=0;
+							chrAnnotation[chrNum].blockInChr[blockNum-1].blockStart = usefulCharNum;
+						} else {
+							i-=nCount;
+							break;
+						}
+					}
+				}
+			}
+			chrAnnotation[chrNum].blockInChr[blockNum-1].blockEnd = usefulCharNum-1;
+			chrAnnotation[chrNum].blockInChr[blockNum-1].ori = i-len;
+			chrAnnotation[chrNum].blockNum = blockNum;
+			chrAnnotation[chrNum].chrEnd = usefulCharNum-1;
+		}
+//*/
+		chrNum++;
+		totalNumChar += chrLen;
+	}
+	if (numCharInBuffer > 0) {
+		ConvertTextToBytePacked(buffer, packedBuffer, charMap, 4, numCharInBuffer);
+		fwrite(packedBuffer, 1, (numCharInBuffer + 3) / 4, packedDNAFile);
+		numCharInBuffer = 0;
+	}
+	if (totalNumChar % 4 == 0) {
+		c = 0;
+		fwrite(&c, 1, 1, packedDNAFile);
+	}
+	c = (char)(totalNumChar % 4);
+	fwrite(&c, 1, 1, packedDNAFile);
+	fclose(packedDNAFile);
+	fprintf(annotationFile, "%u\t%d\t%d\n", totalNumChar, chrNum, FASTARandomSeed);
+	int j=0;
+	int total = 0;
+	for (i=0;i<chrNum;i++) {
+		fprintf(annotationFile, "%d\t%s\n", (int)strlen(chrAnnotation[i].chrName), chrAnnotation[i].chrName);
+		total += chrAnnotation[i].blockNum;
+	}
+	fprintf(annotationFile, "%d\n", total);
+//	fprintf(stderr, "total block, %d, %d, %d\n",i,chrNum, total);
+	for(i=0;i<chrNum;i++){
+		for(j=0;j<chrAnnotation[i].blockNum;j++){
+			fprintf(annotationFile,"%d\t%u\t%u\t%u\n",i, chrAnnotation[i].blockInChr[j].blockStart, chrAnnotation[i].blockInChr[j].blockEnd, chrAnnotation[i].blockInChr[j].ori);
+		}
+		free(chrAnnotation[i].blockInChr);
+	}
+	free(chrAnnotation);
+	fclose(annotationFile);
+	return chrNum;
+}
diff --git a/HSP.h b/HSP.h
new file mode 100644
index 0000000..59cf0d2
--- /dev/null
+++ b/HSP.h
@@ -0,0 +1,126 @@
+/*
+
+   HSP.h		BWTBlastn functions
+
+   This module contains miscellaneous BWTBlastn functions.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#ifndef __HSP_H__
+#define __HSP_H__
+
+#include "TypeNLimit.h"
+#include "MemManager.h"
+#include "TextConverter.h"
+
+#define ALPHABET_SIZE				4
+#define BIT_PER_CHAR				2
+#define CHAR_PER_128				64
+#define CHAR_PER_WORD				16
+#define CHAR_PER_BYTE				4
+
+#define MAX_ALIGNMENT_LENGTH	131072
+#define SHORTEST	70
+
+typedef struct _ChrBlock{
+	int chrID;
+	unsigned int blockStart;
+	unsigned int blockEnd;
+	unsigned int ori;
+}ChrBlock;
+
+typedef struct _NewAnnotation{
+	char chrName[MAX_SEQ_NAME_LENGTH];
+	int nameLen;
+	unsigned int chrStart;
+	unsigned int chrEnd;
+	int blockNum;
+	ChrBlock *blockInChr;
+}NewAnnotation;
+
+typedef struct Annotation {
+	int gi;
+	char text[MAX_SEQ_NAME_LENGTH+1];
+} Annotation;
+
+typedef struct HSP {
+	unsigned int* packedDNA;
+	int chrNum;
+	char **chrName;
+	int numOfBlock;
+	ChrBlock *blockList;
+	unsigned int dnaLength;
+}HSP;
+
+#define MAX_SEQ_NAME_LENGTH				256
+
+#define MAX_HISTO_SIZE					256
+
+#define INVALID_CHAR_INDEX				15
+
+#define ALIGN_MATCH					0
+#define ALIGN_MISMATCH_AMBIGUITY	1
+#define ALIGN_INSERT				2
+#define ALIGN_DELETE				3
+
+#define ALIGN_PER_WORD				16
+#define ALIGN_BIT					2
+
+#define AUX_TEXT_PER_WORD			8
+#define AUX_TEXT_BIT				4
+
+static const char lowercaseDnaCharIndex = 14;	// Seems that BLAST treat masked characters as 'N' (still have 1/4 chance of matching)
+static const char nonMatchDnaCharIndex  = 15;
+static const char dnaChar[16]			= {'A', 'C', 'G', 'T', 'M', 'R', 'S', 'V', 'W', 'Y', 'H', 'K', 'D', 'B', 'N', 'L'};
+static const char dnaComplement[16]		= {'T', 'G', 'C', 'A', 'K', 'Y', 'S', 'B', 'W', 'R', 'D', 'M', 'H', 'V', 'N', 'L'};
+static const char ambiguityCount[16]    = { 1 ,  1 ,  1 ,  1 ,  2 ,  2 ,  2 ,  3 ,  2 ,  2 ,  3 ,  2 ,  3 ,  3 ,  4 ,  0 };
+static const char ambiguityMatch[16][4] = {{0, 0, 0, 0},
+	{1, 0, 0, 0},
+	{2, 0, 0, 0},
+	{3, 0, 0, 0},
+	{0, 1, 0, 0},
+	{0, 2, 0, 0},
+	{1, 2, 0, 0},
+	{0, 1, 2, 0},
+	{0, 3, 0, 0},
+	{1, 3, 0, 0},
+	{0, 1, 3, 0},
+	{2, 3, 0, 0},
+	{0, 2, 3, 0},
+	{1, 2, 3, 0},
+	{0, 1, 2, 3},
+	{0, 0, 0, 0}
+};
+
+// Map must be allocated with char[256]
+void HSPFillCharMap(unsigned char *charMap);
+void HSPFillComplementMap(unsigned char *complementMap);
+
+HSP *HSPLoad(MMPool *mmPool, const char *PackedDNAFileName, const char *AnnotationFileName);
+HSP *HSPConvertFromText(MMPool *mmPool, const unsigned char *text, const unsigned int textLength,
+						const unsigned int FASTARandomSeed, const int maskLowerCase,
+						const int gi, const char *seqName);
+void HSPFree(MMPool *mmPool, HSP *hsp);
+
+unsigned int HSPParseFASTAToPacked(const char* FASTAFileName, const char* annotationFileName, const char* packedDNAFileName, const char* ambiguityFileName,
+					  const unsigned int FASTARandomSeed, const int maskLowerCase);
+unsigned int HSPPackedToFASTA(const char* FASTAFileName, const char* annotationFileName, const char* packedDNAFileName, const char* ambiguityFileName);
+
+
+#endif
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..281bcd3
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,15 @@
+System Requirements
+===================
+
+To complie SOAP2 requires Intel x86_64 CPU, and gcc version 4.2.3 or above.
+
+
+Compilation
+===========
+
+Type `make' to compile SOAP2.
+
+
+Installation
+============
+
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..8a070e4
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,65 @@
+SHELL         = /bin/sh
+PROG          = soap
+DEBUG         = NO
+PROFILE       = NO
+PTHREADS      = YES
+CC            = gcc
+DEBUG_FLAGS   = -g3 -Wall -O2
+PROFILE_FLAGS = -fprofile-arcs -ftest-coverage -pg 
+RELEASE_FLAGS = -msse3 -O3 -static -funroll-loops -maccumulate-outgoing-args -fomit-frame-pointer 
+STATIC_FLAGS  = -static
+DFLAGS        = -DMAKE_TIME=\""`date`"\"
+LIBS          = -lm
+#TARBALL_EXCLUDE = "*.(o,gz,zip)"
+#ZIP_EXCLUDE     = *.o *.gz *.zip
+
+ifeq (YES, $(DEBUG))
+        CFLAGS   = $(DEBUG_FLAGS) $(STATIC_FLAGS)
+        DFLAGS  += -DDEBUG
+#        PTHREADS = NO
+else
+        CFLAGS   = $(RELEASE_FLAGS) $(STATIC_FLAGS)
+endif
+
+ifeq (YES, $(PTHREADS))
+        LIBS   +=  -lpthread
+        DFLAGS +=  -DPTHREADS
+endif
+
+ifeq (YES, $(PROFILE))
+       DFLAGS += $(PROFILE_FLAGS)
+endif
+
+OBJ = SeqIO.o MiscUtilities.o MemManager.o TextConverter.o r250.o DNACount.o HSP.o Timing.o BWT.o extratools.o soapio.o BWTAln.o Match.o PairMatch.o stdaln.o kstring.o
+.SUFFIX:
+.SUFFIX: .c .o
+
+.c.o:
+	$(CC) -c $(CFLAGS) $(DFLAGS) $< -o $@
+
+all: $(PROG)
+
+$(PROG): $(OBJ) soap.o
+	$(CC) $(CFLAGS) $(DFLAGS) $(OBJ) soap.o -o $@ $(LIBS)
+
+SeqIO.o:SeqIO.h
+r250.o: r250.h
+DNACount.o:DNACount.h
+HSP.o:HSP.h
+MiscUtilities.o:MiscUtilities.h
+MemManager.o:MemManager.h
+TextConverter.o:TextConverter.h
+extratools.o:extratools.h BWT.h MiscUtilities.h MemManager.h TextConverter.h Timing.h HSP.h kstring.h
+soapio.o:soapio.h SeqIO.h
+BWT.o:BWT.h 
+BWTAln.o:BWTAln.h BWT.h
+Match.o:Match.h BWTAln.h soapio.h
+PairMatch.o:Match.h BWTAln.h stdaln.h
+MiscUtilities.o:MiscUtilities.h
+MemManager.o:MemManager.h
+TextConverter.o:TextConverter.h
+stdaln.o:stdaln.h
+kstring.o:kstring.h
+
+clean:
+	rm -f *.o $(PROG)
diff --git a/Match.c b/Match.c
new file mode 100644
index 0000000..3ccb30d
--- /dev/null
+++ b/Match.c
@@ -0,0 +1,393 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  Match.c
+ *
+ *    Description:  *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+
+#include "Match.h"
+
+#define GenCigarMD() {		\
+	if (hits->itemList[i].n_cigar == 0){						\
+		int j, match;				\
+		match = 0;					\
+		for(j=0; j<len && occPos < dnaLength; ++j, ++occPos){			\
+			unsigned c = (((*(pacRef+(occPos>>4)))>>(((~occPos)&0xf)<<1)))&0x3;				\
+			if(!(((*(seq+j))&0x3) ^ c)){				\
+				++match;					\
+			} else {					\
+				if(match||!j) ksprintf(str, "%d", match);				\
+				kputc("ACGTN"[c], str);					\
+				match = 0;						\
+			}								\
+		}				\
+		ksprintf(str, "%d", match);				\
+		(alnSeq->itemList+i)->n_cigar = 1;						\
+		(alnSeq->itemList+i)->cigar = (unsigned short *)malloc(sizeof(unsigned short)*(alnSeq->itemList->n_cigar));	\
+		(alnSeq->itemList+i)->cigar[0] = (FROM_M << 14) | (len & 0x3ff);		\
+		(alnSeq->itemList+i)->md = strdup(str->s);				\
+	} else {				\
+		int n_cigar = hits->itemList[i].n_cigar;			\
+		hits->itemList[i].n_cigar = 0;			\
+		(alnSeq->itemList+i)->cigar = (unsigned short *)malloc(sizeof(unsigned short)*(1+n_cigar));	\
+		memcpy((alnSeq->itemList+i)->cigar, cigar, n_cigar*sizeof(unsigned short));	\
+		unsigned int x = (alnSeq->itemList+i)->occ_pos;				\
+		unsigned int y, z;							\
+		y = z = 0;						\
+		int k, l, u;				\
+		unsigned char c;						\
+		for (k = u = 0; k < n_cigar; ++k) {					\
+			l = cigar[k]&0x3fff;					\
+			if (cigar[k]>>14 == FROM_M) {					\
+				for (z = 0; z < l && x+z < dnaLength; ++z) {		\
+					c = (((*(pacRef+((x+z)>>4)))>>(((~(x+z))&0xf)<<1))) & 0x3;	\
+					if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) {	\
+						if(u||!(y+z)) ksprintf(str, "%d", u);			\
+						kputc("ACGTN"[c], str);			\
+						u = 0;					\
+					} else ++u;					\
+				}																					\
+				x += l; y += l;																		\
+			} else if (cigar[k]>>14 == FROM_I || cigar[k]>>14 == 3) {								\
+				y += l;																				\
+			} else if (cigar[k]>>14 == FROM_D) {													\
+				ksprintf(str, "%d", u);																\
+				kputc('D', str);																	\
+				for (z = 0; z < l && x+z < dnaLength; ++z)											\
+					kputc("ACGTN"[(((*(pacRef+((x+z)>>4)))>>(((~(x+z))&0xf)<<1))) & 0x3], str);		\
+				u = 0;			\
+				x += l;			\
+			}			\
+		}			\
+/*		free(cigar); cigar = NULL;	*/		\
+		ksprintf(str, "%d", u);\
+		(alnSeq->itemList+i)->md = strdup(str->s);			\
+	}			\
+}
+
+inline void PickupHit(ALNSEQ *alnSeq, const int rr,int *site, HITTABLE *hits, const unsigned int *pacRef, const unsigned int dnaLength, unsigned short *cigar){
+#ifdef DEBUG
+//		fprintf(stderr, "Pick up for output\n");
+#endif
+	int i = *site;
+	kstring_t *str = (kstring_t *)calloc(1, sizeof(kstring_t));
+	str->l = 0; str->m = 0;
+	if (!hits->n || (hits->n > 1 && !rr)) {alnSeq->report = 0; alnSeq->nhits =0; return;}
+	else {
+		int n;
+		n = hits->n;
+		if(rr == 1 || rr == 0) {
+			alnSeq->report = 1;
+			alnSeq->itemList = (HITITEM *)malloc(sizeof(HITITEM) *1);
+//			assert(i<hits->n);
+			HITCPY(alnSeq->itemList, hits->itemList+i);
+			unsigned int occPos = alnSeq->itemList->occ_pos;
+			unsigned int len = alnSeq->len;	
+			char *seq = alnSeq->itemList->strain?alnSeq->rc:alnSeq->seq;	
+			if (hits->itemList[i].n_cigar == 0){						
+				int j, match;	
+				match = 0;			
+				for(j=0; j<len && occPos < dnaLength; ++j, ++occPos){	
+					unsigned c = (((*(pacRef+(occPos>>4)))>>(((~occPos)&0xf)<<1)))&0x3;
+					if(!(((*(seq+j))&0x3) ^ c)){
+						++match;	
+					} else {
+						if(match || !j)ksprintf(str, "%d", match);
+						kputc("ACGTN"[c], str);
+						match = 0;
+					}				
+				}
+				if(match)ksprintf(str, "%d", match);
+				alnSeq->itemList->n_cigar = 1;
+				alnSeq->itemList->cigar = (unsigned short *)malloc(sizeof(unsigned short)*(alnSeq->itemList->n_cigar));		
+				alnSeq->itemList->cigar[0] = (FROM_M << 14) | (len & 0x3fff);
+				alnSeq->itemList->md = strdup(str->s);
+//		fprintf(stderr, "%d%c\n", alnSeq->itemList->cigar[0]&0x3ff, "MIDS"[alnSeq->itemList->cigar[0]>>14]);
+			} else {
+				int n_cigar = hits->itemList[i].n_cigar;
+				hits->itemList[i].n_cigar = 0;
+				alnSeq->itemList->cigar = (unsigned short *)malloc(sizeof(unsigned short)*(1+n_cigar));
+				memcpy(alnSeq->itemList->cigar, cigar, n_cigar*sizeof(unsigned short));
+				unsigned int x = alnSeq->itemList->occ_pos;
+				unsigned int y, z;
+				y = z = 0;
+				int k, l, u;	
+				k = l = u = 0;
+				unsigned char c;
+				for (k = u = 0; k < n_cigar; ++k) {
+					l = cigar[k]&0x3fff;
+					if (cigar[k]>>14 == FROM_M) {
+						for (z = 0; z < l && x+z < dnaLength; ++z) {
+							c = (((*(pacRef+((x+z)>>4)))>>(((~(x+z))&0xf)<<1))) & 0x3;
+							if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) {
+								if(u||!(y+z))ksprintf(str, "%d", u);
+								kputc("ACGTN"[c], str);
+								u = 0;
+							} else ++u;
+						}
+						x += l; y += l;
+					} else if (cigar[k]>>14 == FROM_I || cigar[k]>>14 == 3) {
+						y += l;
+					} else if (cigar[k]>>14 == FROM_D) {
+						ksprintf(str, "%d", u);
+						kputc('D', str);
+						for (z = 0; z < l && x+z < dnaLength; ++z)
+							kputc("ACGTN"[(((*(pacRef+((x+z)>>4)))>>(((~(x+z))&0xf)<<1))) & 0x3], str);		
+						u = 0;
+						x += l;
+					}
+				}
+				if (u) ksprintf(str, "%d", u);
+				alnSeq->itemList->md = strdup(str->s);
+			}
+//			GenCigarMD();
+			alnSeq->nhits = n;
+		} else {
+			alnSeq->report = n;
+			alnSeq->itemList = (HITITEM *)malloc(sizeof(HITITEM) * n);
+			for (i = 0; i < n; ++i){
+				str->l = 0;
+				HITCPY(alnSeq->itemList+i, hits->itemList+i);
+				unsigned int occPos = (alnSeq->itemList+i)->occ_pos;
+				unsigned int len = alnSeq->len;	
+				char *seq = (alnSeq->itemList+i)->strain?alnSeq->rc:alnSeq->seq;
+				GenCigarMD();
+			}
+			alnSeq->nhits = n;
+		}
+	}
+	free(str->s);
+	free(str);
+}
+
+void SEAlnCore(int tid, MULTISEQ *mseqs, BWT *bwt, BWT *rev_bwt, LOOKUPTABLE *lookup, LOOKUPTABLE *rev_lookup, HSP *hsp, const SOAPOPT *opt) {
+	int i;
+	ALNSEQ *alnSeq;
+	HITTABLE *hits;
+	hits = (HITTABLE *) malloc (sizeof(HITTABLE) * 1);
+	hits->itemList = (HITITEM *) malloc (sizeof(HITITEM) * MAX_ALN);
+	/*
+	   for(i=0; i<MAX_ALN; ++p,++i)
+	   p->path = (unsigned short *) malloc (sizeof(unsigned short) * MAX_DIFF);
+	//*/
+	int mode, seedLen, ns, rr, cutoff;
+	mode = opt->mode; rr = opt->rr;ns = opt->ns; seedLen = opt->aln_len; cutoff = opt->cutoff;
+
+	if (opt->uniq)  mode = 4;
+	BWTOPT bo;
+	//*
+	bo.nblock = hsp->numOfBlock; bo.blockList = hsp->blockList;
+	bo.cutoff = MAX_ALN;
+	bo.max_mm = opt->max_mm; bo.gap_len = opt->gap_len; bo.gap_fb = opt->gap_fb;
+	bo.pacRef = hsp->packedDNA; bo.dnaLen = hsp->dnaLength;
+//	int count = 0;
+	//*/
+#ifdef DEBUG
+	//	fprintf(stderr, "%d\n", mseqs->n);
+#endif
+	for(i=0; i < mseqs->n; i+=1){
+		//		fprintf(stderr, "n reads %d\n", i);
+		alnSeq = mseqs->seqList+i;
+		//*
+
+#ifdef  PTHREADS
+		ALNSEQ *p = mseqs->seqList + i;
+		if (opt->nthreads > 1) {		
+			pthread_mutex_lock(&lock);	
+			if (alnSeq->tid < 0) { 	
+				int j;	
+				for (j = i; j < mseqs->n && j < i + NSEQ_PER_THREAD; ++j)
+					p++->tid = tid;
+			} else if (alnSeq->tid != tid) {
+				pthread_mutex_unlock(&lock);
+				continue;
+			}
+			pthread_mutex_unlock(&lock);	
+		}
+#endif    ///* -----  not PTHREADS  -----*/
+
+		if (alnSeq->ns <= ns){
+			int h0, h1, h2, h3;
+			h0 = h1 = h2 = h3 = 0;
+			hits->n = 0;
+			bo.seqLen = bo.alnLen = alnSeq->len;
+			unsigned int extLen = 0;
+			bo.fw = alnSeq->seq;
+			bo.rc = alnSeq->rc;
+ALIGN:
+			bo.h = bo.alnLen>>1;
+			bo.x = bo.alnLen>39?bo.alnLen/3:13;
+			bo.y = bo.alnLen-13-bo.x;
+			if (bo.y <= 0) {fprintf(stderr, "length y < 0, countinue as 13\n");}
+			switch (mode) {
+				case 5:
+				case 4:
+				case 0:
+					h0  = BWTExactMatching((unsigned char *)alnSeq->seq, &bo, FORWARD, bwt, lookup, hits);
+					h0 += BWTExactMatching((unsigned char *)alnSeq->rc+extLen,  &bo, REVERSE, bwt, lookup, hits);
+					if(hits->n >= cutoff || mode == 0) break;
+				case 1:
+					h1  = BWT1ErrorMatching((unsigned char *)alnSeq->seq, &bo, FORWARD, bwt, rev_bwt, lookup, rev_lookup, hits);
+					h1 += BWT1ErrorMatching((unsigned char *)alnSeq->rc+extLen,  &bo, REVERSE, bwt, rev_bwt, lookup, rev_lookup, hits);
+					if(hits->n >= cutoff || mode == 1) break;
+				case 2:
+					h2  = BWT2ErrorMatching((unsigned char *)alnSeq->seq, &bo, FORWARD, bwt, rev_bwt, lookup, rev_lookup, hits);
+					h2 += BWT2ErrorMatching((unsigned char *)alnSeq->rc+extLen,  &bo, REVERSE, bwt, rev_bwt, lookup, rev_lookup, hits);
+					if(mode == 4 || hits->n >= cutoff || mode == 2) break;
+			}
+
+			if (!hits->n && seedLen < bo.alnLen) {
+				bo.alnLen = seedLen;
+				extLen = alnSeq->len-seedLen;
+				goto ALIGN;
+			}
+
+			if (hits->n) {
+				alnSeq->flag = 0;
+				int site = hits->n?(hits->n == 1?0:rand()%hits->n):-1;
+				PickupHit(alnSeq, rr, &site, hits, hsp->packedDNA, hsp->dnaLength, NULL);
+			}else{
+				alnSeq->flag = 0;
+				alnSeq->report = 0;
+			}
+
+		}else{
+			alnSeq->flag = 0;
+			alnSeq->report = 0;
+		}
+		
+		//*/
+	}
+//	fprintf(stderr, "Alignment Time: %2.7f\n", getElapsedTime(startTime));
+	free(hits->itemList);free(hits);
+}
+
+#ifdef PTHREADS
+
+typedef struct _THREADAUX_TYPE_{
+	int tid;
+	BWT *bwt;
+	BWT *rev_bwt;
+	LOOKUPTABLE *lookup;
+	LOOKUPTABLE *rev_lookup;
+	HSP *hsp;
+	MULTISEQ *mseqs;
+	SOAPOPT *o;
+}THREADAUX;
+
+static void *Workers(void *threadAux){
+	THREADAUX *aux = (THREADAUX *)threadAux;
+	aux->o->pe? PEAlnCore(aux->tid, aux->mseqs, aux->bwt, aux->rev_bwt, aux->lookup,aux->rev_lookup, aux->hsp, aux->o)
+		:SEAlnCore(aux->tid, aux->mseqs, aux->bwt, aux->rev_bwt, aux->lookup, aux->rev_lookup, aux->hsp, aux->o);
+}
+#endif
+
+
+void MatchProcess (FILEDS *fds, BWT *bwt, BWT *rev_bwt, LOOKUPTABLE *lookup, LOOKUPTABLE *rev_lookup, HSP *hsp, SOAPOPT * const opt) {
+
+	InFileList *ifp;
+	OutFileList *ofp;
+	ifp = (InFileList *) malloc(sizeof(InFileList) * 1);
+	ofp = (OutFileList *) malloc(sizeof(OutFileList) * 1);
+	ifp->ifpA   = fdopen(fds->ifdA, "r");
+	ofp->ofpAln = fdopen(fds->ofdAln, "w");
+	if (opt->pe) {
+		ifp->ifpB   = fdopen(fds->ifdB, "r");
+		ofp->ofpSe  = fdopen(fds->ofdSe, "w");
+	}
+	if (opt->unmapped)
+		ofp->ofpUn  = fdopen(fds->ofdUn, "w");
+	int fast = CheckFast(fds->ifdA);
+	ifp->id = 0;
+	
+	MULTISEQ mseqs;
+	mseqs.n = mseqs.max = 0;
+	mseqs.seqList = (ALNSEQ *)malloc(sizeof(ALNSEQ) * MAX_MULTI_READS);
+
+#define INITALN(aln) {					\
+	int j;						\
+	for(j=0;j<MAX_MULTI_READS;j++){				\
+		aln[j].tid = aln[j].id = aln[j].len = aln[j].ns = aln[j].flag = aln[j].nhits = aln[j].report = 0;			\
+	}		\
+}
+	INITALN(mseqs.seqList);
+	unsigned int i, nseq, nAln, nSE;
+	i = nseq = nAln = nSE = 0;
+//	double startAlnTime = setStartTime();
+	double startAlnTime; 
+	
+	OUTAUX o_aux;
+
+//*
+	o_aux.allErr = opt->allErr;
+	o_aux.un = opt->unmapped;
+	o_aux.id = opt->id;
+	o_aux.chrName = hsp->chrName;
+	o_aux.chrNum = hsp->chrNum;
+	//*/
+
+#ifdef DEBUG
+//	fprintf(stderr, "Begin Aln process\n");
+#endif
+	while (GetMultiSeq(ifp, &mseqs,opt->pe, fast?fastq:fasta) != 0) {
+		nseq += mseqs.n;
+		startAlnTime = setStartTime();
+
+#ifndef PTHREADS
+//		fprintf(stderr, "no threads\n");
+		opt->pe ? PEAlnCore(0, &mseqs, bwt, rev_bwt, lookup, rev_lookup, hsp, opt)
+				:SEAlnCore(0, &mseqs, bwt, rev_bwt, lookup, rev_lookup, hsp, opt);
+#else 
+		if(opt->nthreads <= 1)
+			opt->pe?PEAlnCore(0, &mseqs, bwt, rev_bwt, lookup, rev_lookup, hsp, opt)
+				:SEAlnCore(0, &mseqs, bwt, rev_bwt, lookup, rev_lookup, hsp, opt);
+		else {
+			pthread_t *tid;
+			pthread_attr_t attr;
+			THREADAUX *threadAux;
+			int j;
+			pthread_attr_init(&attr);
+			pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+			threadAux = (THREADAUX *)calloc(opt->nthreads, sizeof(THREADAUX));
+			tid = (pthread_t*)calloc(opt->nthreads, sizeof(pthread_t));
+			for (j = 0; j < opt->nthreads; ++j) {
+				threadAux[j].tid = j;
+				threadAux[j].bwt = bwt; threadAux[j].rev_bwt = rev_bwt;
+				threadAux[j].lookup = lookup; threadAux[j].rev_lookup = rev_lookup;
+				threadAux[j].hsp = hsp;
+				threadAux[j].mseqs = &mseqs; 
+				threadAux[j].o = opt;
+				pthread_create(&tid[j], &attr, Workers, threadAux + j);
+			}
+			pthread_attr_destroy(&attr);
+			for (j = 0; j < opt->nthreads; ++j) pthread_join(tid[j], 0);
+			free(threadAux); free(tid);
+		}
+#endif
+
+		fprintf(stderr, "%d ok %7.2f sec\n", nseq, getElapsedTime(startAlnTime));
+		DumpAln(&mseqs, &o_aux, ofp, &nAln, &nSE);
+		FreeMultiSeq(&mseqs);
+	}
+	if (opt->pe) 
+		fprintf(stderr, "Total Pairs: %d PE\n"
+				"Paired:      %d (%5.2f%%) PE\n"
+				"Singled:     %d (%5.2f%%) SE\n", nseq/2, nAln/2, (float)nAln/nseq*100, nSE, (float)nSE/(nseq)*100);
+	else 
+		fprintf(stderr, "Total Reads: %d\n"
+				"Alignment:   %d (%5.2f%%)\n", nseq, nAln, (float)nAln/nseq*100);
+	free(mseqs.seqList);
+	fclose(ifp->ifpA); fclose(ofp->ofpAln);
+	if(opt->pe){fclose(ifp->ifpB); fclose(ofp->ofpSe);}
+	if(opt->unmapped) fclose(ofp->ofpUn);
+	free(ifp); free(ofp);
+}                               /*                              */
diff --git a/Match.h b/Match.h
new file mode 100644
index 0000000..bd71d6f
--- /dev/null
+++ b/Match.h
@@ -0,0 +1,138 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  Match.h
+ *
+ *    Description:  
+ *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+
+
+#ifndef  _MATCH_H_
+#define  _MATCH_H_
+
+#include "SeqIO.h"
+#include "BWTAln.h"
+#include "BWT.h"
+#include "extratools.h"
+#include "soapio.h"
+#include "stdaln.h"
+
+#ifdef PTHREADS
+#include <pthread.h>
+#define NSEQ_PER_THREAD 0xF00
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+#define SEQ_ALLOC() {\
+	if (opt->nthreads > 1) {		\
+		pthread_mutex_lock(&lock);	\
+		if (alnSeq->tid < 0) { 		\
+			int j;			\
+			for (j = i; j < mseqs->n && j < i + NSEQ_PER_THREAD; ++j)	\
+				alnSeq[j].tid = tid;		\
+		} else if (alnSeq->tid != tid) {			\
+			pthread_mutex_unlock(&lock);	\
+			continue;				\
+		}						\
+		pthread_mutex_unlock(&lock);			\
+	}		\
+}
+#else 
+#define SEQ_ALLOC() 
+#endif
+
+#define MULTI_SEQ 0x100000
+#ifndef MAX_MISMATCH
+#define MAX_MISMATCH 20
+#endif
+
+#ifndef MAX_GAP_LEN
+#define MAX_GAP_LEN 10
+#endif
+#define MAX_SEQ_LEN 256
+#define MAX_ALN 10000
+#define FORWARD	0
+#define REVERSE	1
+#define ALN_MAT 0
+#define ALN_MIS 0x11
+#define ALN_INS 0x22
+#define ALN_DEL 0x33 
+#include <assert.h>
+
+#define HITCPY(dest, ori) {	\
+	(dest)->info    = (ori)->info;		\
+	(dest)->strain  = (ori)->strain;		\
+	(dest)->chr     = (ori)->chr;		\
+	(dest)->occ_pos = (ori)->occ_pos;		\
+	(dest)->pos     = (ori)->pos;		\
+	(dest)->n_mm    = (ori)->n_mm;		\
+	(dest)->n_gapo  = (ori)->n_gapo;		\
+	(dest)->n_gape  = (ori)->n_gape;		\
+	(dest)->gap_beg  = (ori)->gap_beg;		\
+	(dest)->n_diff  = (ori)->n_diff;		\
+	(dest)->n_cigar = (ori)->n_cigar;			\
+}
+
+#define PacReadExt(fw, rc, start, len, seqPac, rcPac)  {\
+	int j;			\
+	for(j=0; j<len; ++j){				\
+		seqPac[j>>4] <<= 2;			\
+		seqPac[j>>4] |= *(fw+j+start);			\
+		rcPac[j>>4] <<= 2;			\
+		rcPac[j>>4] |= *(rc+j);			\
+	}									\
+}
+
+typedef struct _SOAPOPT_{
+	int fast, o_format, chain;
+	int aln_len, ns, max_mm, gap_len, gap_fb;
+	int mode, cutoff; 
+	int pe;
+	int zero_qual;
+	int min_ins, max_ins, FR;
+	int rr;
+	int unmapped;
+	int nthreads;	//number of pthreads
+	int id;
+	int bisulfite;
+	int allErr;
+	int min_len;
+	int uniq;
+}SOAPOPT;
+
+typedef struct _FILEDS_{
+	int ifdA, ifdB;
+	int ofdAln, ofdSe, ofdUn;
+}FILEDS;
+
+typedef struct _MATCHAUX_TYPE_{
+	int max_mm;
+	int len, ext;
+	unsigned int *pac;
+	unsigned int dnaLen;
+	int allErr;
+}MATCHAUX;
+
+typedef struct _PEAUX_TYPE_{
+	int min_ins, max_ins;
+	int FR;
+        int cutoff, len;
+	int allErr;
+}PEAUX;
+
+inline int CheckIns(HITITEM *, HITITEM *, PEAUX *);
+void MatchProcess (FILEDS *, BWT *, BWT *, LOOKUPTABLE *, LOOKUPTABLE *, HSP *, SOAPOPT * const );
+inline void PickupHit(ALNSEQ *, const int ,int *, HITTABLE *,const unsigned int *, const unsigned int, unsigned short * );
+void SEAlnCore(int , MULTISEQ *, BWT *, BWT *, LOOKUPTABLE *, LOOKUPTABLE *, HSP *, const SOAPOPT *);
+void PEAlnCore(int , MULTISEQ *, BWT *, BWT *, LOOKUPTABLE *, LOOKUPTABLE *, HSP *, const SOAPOPT *);
+int HITCMP(const void *a, const void *b);
+
+#endif   /* ----- #ifndef _MATCH_H_INC  ----- */
+
diff --git a/MemManager.c b/MemManager.c
new file mode 100644
index 0000000..b58f520
--- /dev/null
+++ b/MemManager.c
@@ -0,0 +1,1093 @@
+/*
+
+   MemManager.c		Memory Manager
+
+   This module provides memory management functions.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#include <string.h>
+#ifndef _WIN32
+#include <mm_malloc.h>
+#endif
+#include "MiscUtilities.h"
+#include "MemManager.h"
+
+MMMaster mmMaster;
+
+void *MMMalloc(const unsigned int memSize) {
+
+	void *address;
+
+	address = MEMALIGN(memSize, MAX_ALIGN);
+	if (address == NULL) {
+		fprintf(stderr, "MMMalloc() : cannot allocate memory!\n");
+		exit(1);
+	}
+	return address;
+
+}
+
+void MMFree(void *address) {
+
+	FREEALIGN(address);
+
+}
+
+void MMMasterInitialize(const unsigned int maxNumberOfPools, const unsigned int maxNumberOfBulks, 
+						const int traceUnitByteAllocation, FILE* unitByteTraceFile) {
+
+	unsigned int i;
+
+	mmMaster.maxTotalByteAllocated = 0;
+	mmMaster.maxTotalByteDispatched = 0;
+	mmMaster.currentUnitByteAllocated = 0;
+	mmMaster.maxUnitByteAllocated = 0;
+
+	mmMaster.maxNumberOfBulks = maxNumberOfBulks;
+	mmMaster.maxNumberOfPools = maxNumberOfPools;
+	if (maxNumberOfBulks > 0) {
+		mmMaster.mmBulk = MEMALIGN(sizeof(MMBulk*) * maxNumberOfBulks, MAX_ALIGN);
+		for (i=0; i<maxNumberOfBulks; i++) {
+			mmMaster.mmBulk[i] = NULL;
+		}
+	} else {
+		mmMaster.mmBulk = NULL;
+	}
+	if (maxNumberOfPools > 0) {
+		mmMaster.mmPool = MEMALIGN(sizeof(MMPool*) * maxNumberOfPools, MAX_ALIGN);
+		for (i=0; i<maxNumberOfPools; i++) {
+			mmMaster.mmPool[i] = NULL;
+		}
+	} else {
+		mmMaster.mmPool = NULL;
+	}
+
+	mmMaster.traceUnitByteAllocation = traceUnitByteAllocation;
+	mmMaster.unitByteTraceFile = unitByteTraceFile;
+
+}
+
+void MMMasterFreeAll() {
+
+	unsigned int i;
+
+	for (i=0; i < mmMaster.maxNumberOfBulks; i++) {
+		if (mmMaster.mmBulk[i] != NULL) {
+			if (MMBulkIsActive(mmMaster.mmBulk[i])) {
+				MMBulkFree(mmMaster.mmBulk[i]);
+			}
+			if (MMBulkFindPoolUsed(mmMaster.mmBulk[i]) == NULL) {
+				MMUnitFree(mmMaster.mmBulk[i], sizeof(MMBulk));
+			}
+			mmMaster.mmBulk[i] = NULL;
+		}
+	}
+	FREEALIGN(mmMaster.mmBulk);
+
+	for (i=0; i < mmMaster.maxNumberOfPools; i++) {
+		if (mmMaster.mmPool[i] != NULL) {
+			if (MMPoolIsActive(mmMaster.mmPool[i])) {
+				MMPoolFree(mmMaster.mmPool[i]);
+			}
+			FREEALIGN(mmMaster.mmPool[i]);
+			mmMaster.mmPool[i] = NULL;
+		}
+	}
+	FREEALIGN(mmMaster.mmPool);
+
+}
+
+unsigned int MMMasterCurrentTotalByteAllocated() {
+
+	unsigned int i;
+	unsigned int currentTotalByteAllocated;
+
+	// unit memory allocated
+	currentTotalByteAllocated = mmMaster.currentUnitByteAllocated;
+
+	// pool and temp memory allocated
+	for (i=0; i < mmMaster.maxNumberOfPools; i++) {
+        if (mmMaster.mmPool[i] != NULL && MMPoolIsActive(mmMaster.mmPool[i])) {
+			currentTotalByteAllocated += MMPoolCurrentTotalByteAllocated(mmMaster.mmPool[i]);
+		}
+	}
+
+	// bulk memory allocated
+	for (i=0; i < mmMaster.maxNumberOfBulks; i++) {
+        if (mmMaster.mmBulk[i] != NULL && MMBulkIsActive(mmMaster.mmBulk[i])) {
+			currentTotalByteAllocated += MMBulkByteAllocated(mmMaster.mmBulk[i]);
+		}
+	}
+
+	return currentTotalByteAllocated;
+
+}
+
+unsigned int MMMasterCurrentTotalByteDispatched() {
+
+	unsigned int i;
+	unsigned int currentTotalByteDispatched;
+
+	// unit memory dispatched
+	currentTotalByteDispatched = mmMaster.currentUnitByteAllocated;
+
+	// pool and temp memory dispatched
+	for (i=0; i < mmMaster.maxNumberOfPools; i++) {
+        if (mmMaster.mmPool[i] != NULL && MMPoolIsActive(mmMaster.mmPool[i])) {
+			currentTotalByteDispatched += MMPoolCurrentTotalByteDispatched(mmMaster.mmPool[i]);
+		}
+	}
+
+	// bulk memory dispatched
+	for (i=0; i < mmMaster.maxNumberOfBulks; i++) {
+        if (mmMaster.mmBulk[i] != NULL && MMBulkIsActive(mmMaster.mmBulk[i])) {
+			currentTotalByteDispatched += MMBulkByteDispatched(mmMaster.mmBulk[i]);
+		}
+	}
+
+	return currentTotalByteDispatched;
+
+}
+
+unsigned int MMMasterMaxTotalByteAllocated() {
+
+	unsigned int currentTotalByteAllocated;
+
+	currentTotalByteAllocated = MMMasterCurrentTotalByteAllocated();
+
+	if (currentTotalByteAllocated > mmMaster.maxTotalByteAllocated) {
+		return currentTotalByteAllocated;
+	} else {
+		return mmMaster.maxTotalByteAllocated;
+	}
+
+}
+
+unsigned int MMMasterMaxTotalByteDispatched() {
+
+	unsigned int currentTotalByteDispatched ;
+
+	currentTotalByteDispatched = MMMasterCurrentTotalByteDispatched();
+
+	if (currentTotalByteDispatched > mmMaster.maxTotalByteDispatched) {
+		return currentTotalByteDispatched;
+	} else {
+		return mmMaster.maxTotalByteDispatched;
+	}
+
+}
+
+void MMMasterSetMaxTotalByteAllocated() {
+
+	unsigned int currentTotalByteAllocated;
+	
+	currentTotalByteAllocated = MMMasterCurrentTotalByteAllocated();
+
+	if (currentTotalByteAllocated > mmMaster.maxTotalByteAllocated) {
+		mmMaster.maxTotalByteAllocated = currentTotalByteAllocated;
+	}
+
+}
+
+void MMMasterSetMaxTotalByteDispatched() {
+
+	unsigned int currentTotalByteDispatched;
+	
+	currentTotalByteDispatched = MMMasterCurrentTotalByteDispatched();
+
+	if (currentTotalByteDispatched > mmMaster.maxTotalByteDispatched) {
+		mmMaster.maxTotalByteDispatched = currentTotalByteDispatched;
+	}
+
+}
+
+void MMMasterPrintReport(FILE *output, const unsigned int withUnitDetails, const unsigned int withPoolDetails, const unsigned int withBulkDetails) {
+
+	unsigned int i;
+
+	fprintf(output, "Maximum amount of memory allocated:  %u\n", MMMasterMaxTotalByteAllocated());
+	fprintf(output, "Maximum amount of memory dispatched: %u\n", MMMasterMaxTotalByteDispatched());
+
+	if (withUnitDetails) {
+		fprintf(output, "\n");
+		MMUnitPrintReport(output);
+	}
+
+	if (withPoolDetails) {
+		for (i=0; i<mmMaster.maxNumberOfPools; i++) {
+			if (mmMaster.mmPool[i] != NULL) {
+				fprintf(output, "\nPool number %u\n", i);
+				MMPoolPrintReport(mmMaster.mmPool[i], output);
+			}
+		}
+	}
+
+	if (withBulkDetails) {
+		for (i=0; i<mmMaster.maxNumberOfBulks; i++) {
+			if (mmMaster.mmBulk[i] != NULL) {
+				fprintf(output, "\nBulk number %u\n", i);
+				MMBulkPrintReport(mmMaster.mmBulk[i], output);
+			}
+		}
+	}
+
+}
+
+void *MMUnitAllocate(const unsigned int memSize) {
+
+	void *temp;
+
+	#ifdef DEBUG
+	if (memSize == 0) {
+		fprintf(stderr, "MMUnitAllocate() : memSize = 0!\n");
+		exit(1);
+	}
+	#endif
+
+	temp = MEMALIGN(memSize, MAX_ALIGN);
+	if (temp == NULL) {
+		fprintf(stderr, "MMUnitAllocate() : cannot allocate memory!\n");
+		exit(1);
+	}
+
+	mmMaster.currentUnitByteAllocated += memSize;
+	if (mmMaster.traceUnitByteAllocation) {
+		fprintf(mmMaster.unitByteTraceFile, "MMUnitAllocate        : %u\n", memSize);
+	}
+
+	return temp;
+
+}
+
+void *MMUnitReallocate(void *address, const unsigned int newMemSize, const unsigned int oldMemSize) {
+
+	void *temp;
+
+	#ifdef DEBUG
+	if (newMemSize == 0) {
+		fprintf(stderr, "MMUnitReallocate() : newMemSize = 0!\n");
+		exit(1);
+	}
+	if (oldMemSize == 0) {
+		fprintf(stderr, "MMUnitReallocate() : oldMemSize = 0!\n");
+		exit(1);
+	}
+	#endif
+
+	if (mmMaster.traceUnitByteAllocation) {
+		fprintf(mmMaster.unitByteTraceFile, "MMUnitReallocate\n");
+	}
+
+	temp = MMUnitAllocate(newMemSize);
+	if (temp == NULL) {
+		fprintf(stderr, "MMUnitReallocate() : cannot allocate memory!\n");
+		exit(1);
+	}
+	memcpy(temp, address, min(newMemSize, oldMemSize));
+
+	MMUnitFree(address, oldMemSize);
+
+	return temp;
+
+}
+
+void MMUnitFree(void *address, const unsigned int memSize) {
+
+	#ifdef DEBUG
+	if (address == NULL) {
+		fprintf(stderr, "MMUnitFree() : address = NULL!\n");
+		exit(1);
+	}
+	if (mmMaster.currentUnitByteAllocated < memSize) {
+		fprintf(stderr, "MMUnitFree() : currentUnitByteAllocated < memSize!\n");
+		exit(1);
+	}
+	#endif
+
+	FREEALIGN(address);
+
+	#ifdef RECORD_GRAND_TOTAL
+	MMMasterSetMaxTotalByteAllocated();
+	MMMasterSetMaxTotalByteDispatched();
+	#endif
+
+	if (mmMaster.currentUnitByteAllocated > mmMaster.maxUnitByteAllocated) {
+		mmMaster.maxUnitByteAllocated = mmMaster.currentUnitByteAllocated;
+	}
+	mmMaster.currentUnitByteAllocated -= memSize;
+	if (mmMaster.traceUnitByteAllocation) {
+		fprintf(mmMaster.unitByteTraceFile, "MMUnitFree            : %u\n", memSize);
+	}
+
+}
+
+unsigned int MMUnitCurrentByteAllocated() {
+
+	return mmMaster.currentUnitByteAllocated;
+
+}
+
+unsigned int MMUnitMaxByteAllocated() {
+
+	if (mmMaster.currentUnitByteAllocated > mmMaster.maxUnitByteAllocated) {
+		return mmMaster.currentUnitByteAllocated;
+	} else {
+		return mmMaster.maxUnitByteAllocated;
+	}
+
+}
+
+void MMUnitPrintReport(FILE *output) {
+
+	fprintf(output, "Maximum amount of unit memory allocated: %u\n", MMUnitMaxByteAllocated());
+	fprintf(output, "Amount of memory unit memory currently allocated: %u\n", MMUnitCurrentByteAllocated());
+
+}
+
+MMPool *MMPoolCreate(const unsigned int poolSize) {
+
+	MMPool *mmPool;
+	unsigned int i;
+
+	#ifdef DEBUG
+	if (poolSize < sizeof(MMPool)) {
+		fprintf(stderr, "MMPoolCreate() : poolSize < MMPool!\n");
+		exit(1);
+	}
+	#endif
+
+	if (poolSize / MAX_ALIGN * MAX_ALIGN != poolSize) {
+		fprintf(stderr, "MMPoolCreate() : poolSize must be multiple of MAX_ALIGN (%d)!\n", MAX_ALIGN);	// Otherwise temp memory is not properly aligned
+		exit(1);
+	}
+
+	mmPool = MEMALIGN(poolSize, MAX_ALIGN);
+	if (mmPool == NULL) {
+		fprintf(stderr, "MMPoolCreate() : cannot allocate memory!\n");
+		exit(1);
+	}
+
+	mmPool->poolSize = poolSize;
+	mmPool->poolByteDispatched = sizeof(MMPool);
+	mmPool->poolByteSpillover = 0;
+	mmPool->firstSpillOverAddress = NULL;
+	mmPool->currentTempByteDispatched = 0;
+	mmPool->currentTempByteSpillover = 0;
+	mmPool->maxTotalByteDispatched = 0;
+
+	for (i=0; i<mmMaster.maxNumberOfPools; i++) {
+		if (mmMaster.mmPool[i] == NULL) {
+			mmMaster.mmPool[i] = mmPool;
+			return mmPool;
+		}
+	}
+
+	fprintf(stderr, "MMPoolCreate() : number of pools > maxNumberOfPools!\n");
+	exit(1);
+
+}
+
+unsigned int MMPoolIsActive(const MMPool *mmPool) {
+
+	return ((mmPool->firstSpillOverAddress) != (void*)mmPool);
+
+}
+void MMPoolSetInactive(MMPool *mmPool) {
+
+	if (mmPool->firstSpillOverAddress != NULL) {
+		fprintf(stderr, "MMPoolSetInactive() : spillover memory not freed yet!\n");
+		exit(1);
+	}
+
+	mmPool->firstSpillOverAddress = (void*)mmPool;
+}
+
+unsigned int MMPoolCurrentTotalByteAllocated(const MMPool *mmPool) {
+
+	return mmPool->poolSize + mmPool->poolByteSpillover + mmPool->currentTempByteSpillover;
+
+}
+
+unsigned int MMPoolCurrentTotalByteDispatched(const MMPool *mmPool) {
+
+	return mmPool->poolByteDispatched + mmPool->currentTempByteDispatched;
+
+}
+
+unsigned int MMPoolMaxTotalByteDispatched(const MMPool *mmPool) {
+
+	unsigned int currentTotalByteDispatched;
+
+	currentTotalByteDispatched = MMPoolCurrentTotalByteDispatched(mmPool);
+	
+	if (currentTotalByteDispatched > mmPool->maxTotalByteDispatched) {
+		return currentTotalByteDispatched;
+	} else {
+		return mmPool->maxTotalByteDispatched;
+	}
+
+}
+
+unsigned int MMPoolByteAvailable(const MMPool *mmPool) {
+
+	if (mmPool->poolSize > mmPool->poolByteDispatched + MAX_ALIGN) {
+		return (mmPool->poolSize - mmPool->poolByteDispatched + MAX_ALIGN - 1) / MAX_ALIGN * MAX_ALIGN;
+	} else {
+		return 0;
+	}
+
+}
+
+MMPool *MMPoolFree(MMPool *mmPool) {
+
+	MMPool *dummyMMPool;
+	unsigned int i;
+	void *temp1, *temp2;
+
+	#ifdef DEBUG
+	if (mmPool == NULL) {
+		fprintf(stderr, "MMPoolFree(): mmPool = NULL!\n");
+		exit(1);
+	}
+	#endif
+
+	#ifdef RECORD_GRAND_TOTAL
+	MMMasterSetMaxTotalByteAllocated();
+	MMMasterSetMaxTotalByteDispatched();
+	#endif
+
+	dummyMMPool = MEMALIGN(sizeof(MMPool), MAX_ALIGN);
+	if (dummyMMPool == NULL) {
+		fprintf(stderr, "MMPoolFree() : cannot allocate memory!\n");
+		exit(1);
+	}
+
+	// Free spillover memory
+	temp1 = mmPool->firstSpillOverAddress;
+	while (temp1 != NULL) {
+		temp2 = *((void**)temp1);
+		FREEALIGN(temp1);
+		temp1 = temp2;
+	}
+	mmPool->firstSpillOverAddress = NULL;
+
+	dummyMMPool->poolByteDispatched = mmPool->poolByteDispatched;
+	dummyMMPool->poolByteSpillover = mmPool->poolByteSpillover;
+	dummyMMPool->currentTempByteDispatched = mmPool->currentTempByteDispatched;
+	dummyMMPool->currentTempByteSpillover = mmPool->currentTempByteSpillover;
+	dummyMMPool->firstSpillOverAddress = mmPool->firstSpillOverAddress;
+	dummyMMPool->maxTotalByteDispatched = mmPool->maxTotalByteDispatched;
+	dummyMMPool->poolSize = mmPool->poolSize;
+
+	MMPoolSetInactive(dummyMMPool);
+
+	// Update master directory
+	for (i=0; i<mmMaster.maxNumberOfPools; i++) {
+		if (mmMaster.mmPool[i] == mmPool) {
+			mmMaster.mmPool[i] = dummyMMPool;
+			FREEALIGN(mmPool);
+			return dummyMMPool;
+		}
+	}
+
+	fprintf(stderr, "MMPoolFree() : cannot locate pool in master!\n");
+	exit(1);
+
+}
+
+void MMPoolReset(MMPool *mmPool) {
+
+	void *temp1, *temp2;
+
+	#ifdef DEBUG
+	if (mmPool == NULL) {
+		fprintf(stderr, "MMPoolReset(): mmPool = NULL!\n");
+		exit(1);
+	}
+	#endif
+
+	#ifdef RECORD_GRAND_TOTAL
+	MMMasterSetMaxTotalByteAllocated();
+	MMMasterSetMaxTotalByteDispatched();
+	#endif
+
+	// Free spillover memory
+	temp1 = mmPool->firstSpillOverAddress;
+	while (temp1 != NULL) {
+		temp2 = *((void**)temp1);
+		FREEALIGN(temp1);
+		temp1 = temp2;
+	}
+
+	mmPool->poolByteDispatched = sizeof(MMPool);
+	mmPool->poolByteSpillover = 0;
+	mmPool->currentTempByteDispatched = 0;
+	mmPool->currentTempByteSpillover = 0;
+	mmPool->firstSpillOverAddress = NULL;
+	mmPool->maxTotalByteDispatched = 0;
+
+}
+
+void MMPoolDestory(MMPool *mmPool) {
+
+	unsigned int i;
+	MMPool *temp;
+
+	#ifdef DEBUG
+	if (mmPool == NULL) {
+		fprintf(stderr, "MMPoolDestory(): mmPool = NULL!\n");
+		exit(1);
+	}
+	#endif
+
+	if (MMPoolIsActive(mmPool)) {
+		temp = MMPoolFree(mmPool);
+	} else {
+		temp = mmPool;
+	}
+
+	// Update master directory
+	for (i=0; i<mmMaster.maxNumberOfPools; i++) {
+		if (mmMaster.mmPool[i] == temp) {
+			mmMaster.mmPool[i] = NULL;
+			FREEALIGN(temp);
+			temp = NULL;
+		}
+	}
+
+	if (temp != NULL) {
+		fprintf(stderr, "MMPoolDestory() : cannot locate pool in master!\n");
+		exit(1);
+	}
+
+}
+
+void *MMPoolDispatch(MMPool *mmPool, const unsigned int memSize) {
+
+	void **temp;
+	unsigned int totalPoolMemoryUsed, nextPoolMemoryOffset;
+	unsigned int align, skipForAlign;
+
+	if (mmPool == NULL) {
+		return MMUnitAllocate(memSize);
+	}
+	if (memSize == 0) {
+		fprintf(stderr, "MMPoolDispatch(): memSize = 0!\n");
+		exit(1);
+	}
+
+	totalPoolMemoryUsed = mmPool->poolByteDispatched - mmPool->poolByteSpillover +
+						  mmPool->currentTempByteDispatched - mmPool->currentTempByteSpillover;
+	nextPoolMemoryOffset = mmPool->poolByteDispatched - mmPool->poolByteSpillover;
+
+	// Calculate the number of byte to skip in order to align the memory dispatched
+	align = 1 << (BITS_IN_WORD - leadingZero(memSize - 1));
+	if (align > MAX_ALIGN) {
+		align = MAX_ALIGN;
+	}
+	if (align < MIN_ALIGN) {
+		align = MIN_ALIGN;
+	}
+	skipForAlign = nextAlignedBoundary(nextPoolMemoryOffset, align) - nextPoolMemoryOffset;
+
+	if (totalPoolMemoryUsed + memSize + skipForAlign <= mmPool->poolSize) {
+		temp = (void**)(((char*)mmPool) + nextPoolMemoryOffset + skipForAlign);
+		mmPool->poolByteDispatched += memSize + skipForAlign;
+		return temp;
+	} else {
+		// Spillover
+		// Allocate for linked list pointer as well
+		temp = MEMALIGN(memSize + MAX_ALIGN, MAX_ALIGN);	// spillover memory is always aligned to MAX_ALIGN
+		if (temp == NULL) {
+			fprintf(stderr, "MMPoolDispatch(): cannot allocate memory!\n");
+			exit(1);
+		}
+		// Add spillover memory to linked list
+		*temp = mmPool->firstSpillOverAddress;
+		mmPool->firstSpillOverAddress = temp;
+		mmPool->poolByteSpillover += memSize + MAX_ALIGN;
+		mmPool->poolByteDispatched += memSize + MAX_ALIGN;
+		return (char*)temp + MAX_ALIGN;
+	}
+		
+}
+
+unsigned int MMPoolDispatchOffset(MMPool *mmPool, const unsigned int memSize) {
+
+	unsigned int totalPoolMemoryUsed, nextPoolMemoryOffset;
+	unsigned int align, skipForAlign;
+
+	if (mmPool == NULL) {
+		fprintf(stderr, "MMPoolDispatchOffset(): mmPool == NULL!\n");
+		exit(1);
+	}
+	if (memSize == 0) {
+		fprintf(stderr, "MMPoolDispatchOffset(): memSize = 0!\n");
+		exit(1);
+	}
+
+	totalPoolMemoryUsed = mmPool->poolByteDispatched - mmPool->poolByteSpillover +
+						  mmPool->currentTempByteDispatched - mmPool->currentTempByteSpillover;
+	nextPoolMemoryOffset = mmPool->poolByteDispatched - mmPool->poolByteSpillover;
+
+	// Calculate the number of byte to skip in order to align the memory dispatched
+	align = 1 << (BITS_IN_WORD - leadingZero(memSize - 1));
+	if (align > MAX_ALIGN) {
+		align = MAX_ALIGN;
+	}
+	if (align < MIN_ALIGN) {
+		align = MIN_ALIGN;
+	}
+	skipForAlign = nextAlignedBoundary(nextPoolMemoryOffset, align) - nextPoolMemoryOffset;
+
+	if (totalPoolMemoryUsed + memSize + skipForAlign > mmPool->poolSize) {
+		fprintf(stderr, "MMPoolDispatchOffset(): Not enough memory in memory pool!\n");
+		exit(1);
+	}
+
+	mmPool->poolByteDispatched += memSize + skipForAlign;
+
+	return nextPoolMemoryOffset + skipForAlign;
+
+}
+
+void MMPoolReturn(MMPool *mmPool, void *address, const unsigned int memSize) {
+	
+	if (mmPool == NULL) {
+		MMUnitFree(address, memSize);
+	}
+
+}
+
+void MMPoolPrintReport(MMPool *mmPool, FILE *output) {
+
+	fprintf(output, "Pool Size     : %u\n", mmPool->poolSize);
+	fprintf(output, "   Dispatched : %u\n", mmPool->poolByteDispatched);
+	fprintf(output, "     - Spillover             : %u\n", mmPool->poolByteSpillover);
+	fprintf(output, "Maximum amount of memory dispatched including temp memory : %u\n", 
+			MMPoolMaxTotalByteDispatched(mmPool));
+
+}
+
+void *MMTempDispatch(MMPool *mmPool, const unsigned int memSize) {
+
+	void **temp;
+	unsigned int totalPoolMemoryUsed, nextTempMemoryOffset;
+	unsigned int alignedMemSize;
+	void **pointerToLastSpilloverAddress;
+
+	if (mmPool == NULL) {
+		return MMUnitAllocate(memSize);
+	}
+	if (memSize == 0) {
+		fprintf(stderr, "MMTempDispatch(): memSize = 0!\n");
+		exit(1);
+	}
+
+	alignedMemSize = nextAlignedBoundary(memSize, MAX_ALIGN);	// temp memory is always aligned to MAX_ALIGN
+
+	totalPoolMemoryUsed = mmPool->poolByteDispatched - mmPool->poolByteSpillover +
+						  mmPool->currentTempByteDispatched - mmPool->currentTempByteSpillover;
+	nextTempMemoryOffset = mmPool->currentTempByteDispatched - mmPool->currentTempByteSpillover;
+
+	if (totalPoolMemoryUsed + alignedMemSize <= mmPool->poolSize) {
+		temp = (void**)(((char*)mmPool) + mmPool->poolSize - nextTempMemoryOffset - alignedMemSize);
+		mmPool->currentTempByteDispatched += alignedMemSize;
+		return temp;
+	} else {
+		// Spillover
+		// Locate the last spillover memory
+		pointerToLastSpilloverAddress = &(mmPool->firstSpillOverAddress);
+		temp = (void**)(*pointerToLastSpilloverAddress);
+		while (temp != NULL) {
+			pointerToLastSpilloverAddress = temp;
+			temp = (void**)*pointerToLastSpilloverAddress;
+		}
+		// Allocate for linked list pointer as well
+		temp = MEMALIGN(memSize + MAX_ALIGN, MAX_ALIGN);
+		if (temp == NULL) {
+			fprintf(stderr, "MMTempDispatch(): cannot allocate memory!\n");
+			exit(1);
+		}
+		*pointerToLastSpilloverAddress = temp;
+		*temp = NULL;
+		mmPool->currentTempByteDispatched += memSize + MAX_ALIGN;
+		mmPool->currentTempByteSpillover += memSize + MAX_ALIGN;
+		return (char*)temp + MAX_ALIGN;
+	}
+		
+}
+
+void MMTempReturn(MMPool *mmPool, void *address, const unsigned int memSize) {
+
+	void **temp;
+	unsigned int alignedMemSize;
+	void **pointerToLastButOneSpillover;
+	void *spilloverPointerAddress;
+
+	if (mmPool == NULL) {
+		MMUnitFree(address, memSize);
+	} else {
+
+		alignedMemSize = nextAlignedBoundary(memSize, MAX_ALIGN);
+
+		if (address >= (void*)mmPool && address <= (void*)((char*)mmPool + mmPool->poolSize)) {
+			// No need to record the global level max memory dispatched/allocated
+			// because memory pool is allocated as a whole and fluctuation across pools should not be counted
+			if (mmPool->poolByteDispatched + mmPool->currentTempByteDispatched > mmPool->maxTotalByteDispatched) {
+				mmPool->maxTotalByteDispatched = mmPool->poolByteDispatched + mmPool->currentTempByteDispatched;
+			}
+			mmPool->currentTempByteDispatched -= alignedMemSize;
+		} else {
+			#ifdef RECORD_GRAND_TOTAL
+			MMMasterSetMaxTotalByteAllocated();
+			MMMasterSetMaxTotalByteDispatched();
+			#endif
+			// Spillover
+			spilloverPointerAddress = (void*)((char*)address - MAX_ALIGN);	// MAX_ALIGN no. of bytes preceding temp address
+			// Locate the last spillover memory
+			pointerToLastButOneSpillover = &(mmPool->firstSpillOverAddress);
+			temp = (void**)(*pointerToLastButOneSpillover);
+			while (*temp != NULL) {
+				pointerToLastButOneSpillover = temp;
+				temp = (void**)*pointerToLastButOneSpillover;
+			}
+			if (*pointerToLastButOneSpillover != spilloverPointerAddress) {
+				fprintf(stderr, "MMTempReturn(): address != lastSpilloverAddress! Last allocated temp memory must be freed first\n");
+				exit(1);
+			}
+			FREEALIGN(spilloverPointerAddress);
+			*pointerToLastButOneSpillover = NULL;
+
+			if (mmPool->poolByteDispatched + mmPool->currentTempByteDispatched > mmPool->maxTotalByteDispatched) {
+				mmPool->maxTotalByteDispatched = mmPool->poolByteDispatched + mmPool->currentTempByteDispatched;
+			}
+			mmPool->currentTempByteDispatched -= memSize + MAX_ALIGN;
+			mmPool->currentTempByteSpillover -= memSize + MAX_ALIGN;
+		}
+
+	}
+
+}
+
+void MMTempPrintReport(MMPool *mmPool, FILE *output) {
+
+	MMPoolPrintReport(mmPool, output);
+
+}
+
+MMBulk *MMBulkCreate(MMPool *mmPool, const unsigned int itemSize, const unsigned int itemPerAllocationInPowerOf2, 
+					 const unsigned int boundaryCushionSize, const unsigned int directorySize) {
+
+	unsigned int i;
+	MMBulk *mmBulk;
+
+	#ifdef DEBUG
+	if (itemSize == 0) {
+		fprintf(stderr, "MMBulkCreate() : itemSize = 0!\n");
+		exit(1);
+	}
+	if (itemPerAllocationInPowerOf2 >= BITS_IN_WORD) {
+		fprintf(stderr, "MMBulkCreate() : itemPerAllocationInPowerOf2 >= BITS_IN_WORD!\n");
+		exit(1);
+	}
+	#endif
+
+	if (mmPool == NULL) {
+		mmBulk = MMUnitAllocate(sizeof(MMBulk));
+	} else {
+		mmBulk = MMPoolDispatch(mmPool, sizeof(MMBulk));
+	}
+
+	mmBulk->itemSize = itemSize;
+	mmBulk->itemPerAllocationInPowerOf2 = itemPerAllocationInPowerOf2;
+	mmBulk->boundaryCushionSize = boundaryCushionSize;
+	mmBulk->indexMask = truncateLeft(ALL_ONE_MASK,  BITS_IN_WORD - itemPerAllocationInPowerOf2);
+	mmBulk->currentDirectoryEntry = 0;
+	mmBulk->nextUnusedItem = 0;
+	mmBulk->directorySize = directorySize;
+
+	if (mmPool == NULL) {
+		mmBulk->directory = MMUnitAllocate(sizeof(unsigned char*) * directorySize);
+	} else {
+		mmBulk->directory = MMPoolDispatch(mmPool, sizeof(unsigned char*) * directorySize);
+	}
+
+	//Allocate memory for the first directory entry
+	mmBulk->directory[0] = MEMALIGN(boundaryCushionSize * 2 + (itemSize << itemPerAllocationInPowerOf2), MAX_ALIGN);
+	if (mmBulk->directory[0] == NULL) {
+		fprintf(stderr, "MMBulkCreate() : cannot allocate memory!\n");
+		exit(1);
+	}
+
+	//Advance the address by boundaryCushionSize
+	mmBulk->directory[0] += boundaryCushionSize;
+
+	for (i=0; i<mmMaster.maxNumberOfBulks; i++) {
+		if (mmMaster.mmBulk[i] == NULL) {
+			mmMaster.mmBulk[i] = mmBulk;
+			return mmBulk;
+		}
+	}
+
+	fprintf(stderr, "MMBulkCreate() : number of bulks > maxNumberOfBulk!\n");
+	exit(1);
+
+}
+
+unsigned int MMBulkIsActive(const MMBulk *mmBulk) {
+
+	return (mmBulk->directory != (void*)mmBulk);
+
+}
+
+void MMBulkSetInactive(MMBulk *mmBulk) {
+
+	if (mmBulk->directory != NULL) {
+	}
+	mmBulk->directory = (void*)mmBulk;
+
+}
+
+unsigned int MMBulkByteAllocated(const MMBulk *mmBulk) {
+
+	return (mmBulk->currentDirectoryEntry + 1) *
+			(mmBulk->boundaryCushionSize * 2 + (mmBulk->itemSize << mmBulk->itemPerAllocationInPowerOf2));
+
+}
+
+unsigned int MMBulkByteDispatched(const MMBulk *mmBulk) {
+
+	return (mmBulk->currentDirectoryEntry) *
+			(mmBulk->boundaryCushionSize * 2 + (mmBulk->itemSize << mmBulk->itemPerAllocationInPowerOf2)) +
+			mmBulk->boundaryCushionSize * 2 +
+			mmBulk->itemSize * mmBulk->nextUnusedItem;
+
+}
+
+unsigned int MMBulkUnitDispatched(const MMBulk *mmBulk) {
+
+	return mmBulk->currentDirectoryEntry * (1 << mmBulk->itemPerAllocationInPowerOf2) + mmBulk->nextUnusedItem;
+
+}
+
+void MMBulkFree(MMBulk *mmBulk) {
+
+	unsigned int i;
+
+	#ifdef RECORD_GRAND_TOTAL
+	MMMasterSetMaxTotalByteAllocated();
+	MMMasterSetMaxTotalByteDispatched();
+	#endif
+
+	for (i=0; i<=mmBulk->currentDirectoryEntry; i++) {
+		FREEALIGN(mmBulk->directory[i] - mmBulk->boundaryCushionSize);
+	}
+
+	if (MMBulkFindPoolUsed(mmBulk) == NULL) {
+        MMUnitFree(mmBulk->directory, sizeof(unsigned char*) * mmBulk->directorySize);
+	}
+
+	mmBulk->directory = NULL;
+
+	MMBulkSetInactive(mmBulk);
+
+}
+
+void MMBulkDestory(MMBulk *mmBulk) {
+
+	unsigned int i;
+	MMBulk *temp;
+
+	#ifdef DEBUG
+	if (mmBulk == NULL) {
+		fprintf(stderr, "MMBulkDestory(): mmBulk = NULL!\n");
+		exit(1);
+	}
+	#endif
+
+	if (MMBulkIsActive(mmBulk)) {
+		MMBulkFree(mmBulk);
+	}
+
+	temp = mmBulk;
+
+	// Update master directory
+	for (i=0; i<mmMaster.maxNumberOfBulks; i++) {
+		if (mmMaster.mmBulk[i] == temp) {
+			mmMaster.mmBulk[i] = NULL;
+			if (MMBulkFindPoolUsed(temp) == NULL) {
+				MMUnitFree(temp, sizeof(MMBulk));
+			}
+			temp = NULL;
+		}
+	}
+
+	if (temp != NULL) {
+		fprintf(stderr, "MMBulkDestory() : cannot locate bulk in master!\n");
+		exit(1);
+	}
+
+}
+unsigned int MMBulkDispatch(MMBulk *mmBulk) {
+
+	if (mmBulk->nextUnusedItem >> mmBulk->itemPerAllocationInPowerOf2) {
+		mmBulk->currentDirectoryEntry++;
+		if (mmBulk->currentDirectoryEntry >= mmBulk->directorySize) {
+			fprintf(stderr, "MMBulkDispatch() : memory directory size overflow!\n");
+			exit(1);
+		}
+		//Allocate memory for the next directory entry
+		mmBulk->directory[mmBulk->currentDirectoryEntry] = MEMALIGN(mmBulk->boundaryCushionSize * 2 + (mmBulk->itemSize << mmBulk->itemPerAllocationInPowerOf2), MAX_ALIGN);
+		if (mmBulk->directory[mmBulk->currentDirectoryEntry] == NULL) {
+			fprintf(stderr, "MMBulkDispatch() : cannot allocate memory!\n");
+			exit(1);
+		}
+		//Advance the address by boundaryCushionSize
+		mmBulk->directory[mmBulk->currentDirectoryEntry] += mmBulk->boundaryCushionSize;
+		mmBulk->nextUnusedItem = 0;
+	}
+	return ((mmBulk->currentDirectoryEntry << mmBulk->itemPerAllocationInPowerOf2) | mmBulk->nextUnusedItem++);
+
+}
+
+void *MMBulkAddress(const MMBulk *mmBulk, const unsigned int index) {
+
+	#ifdef DEBUG
+	if (index >= (((mmBulk->currentDirectoryEntry+1) << mmBulk->itemPerAllocationInPowerOf2) | mmBulk->nextUnusedItem)) {
+		fprintf(stderr, "MMBulkAddress() : index out of range!\n");
+		exit(1);
+	}
+	#endif
+
+	return &(mmBulk->directory[index >> mmBulk->itemPerAllocationInPowerOf2][(index & mmBulk->indexMask) * mmBulk->itemSize]);
+}
+
+MMPool *MMBulkFindPoolUsed(const MMBulk *mmBulk) {
+
+	unsigned int i;
+	void *temp;
+
+	for (i=0; i<mmMaster.maxNumberOfPools; i++) {
+		if (mmMaster.mmPool[i] != NULL) {
+			if ((void*)mmBulk >= (void*)mmMaster.mmPool[i] &&
+				(void*)mmBulk <= (void*)((char*)mmMaster.mmPool[i] + mmMaster.mmPool[i]->poolSize)) {
+				return mmMaster.mmPool[i];
+			}
+			temp = mmMaster.mmPool[i]->firstSpillOverAddress;
+			while (temp != NULL) {
+				if ((void*)((char*)temp + sizeof(void*)) == (void*)mmBulk) {
+					return mmMaster.mmPool[i];
+				}
+				temp = *((void**)temp);
+			}
+		}
+	}
+
+	return NULL;
+
+}
+
+void MMBulkPrintReport(MMBulk *mmBulk, FILE *output){
+
+	fprintf(output, "Memory allocated  : %u\n", MMBulkByteAllocated(mmBulk));
+	fprintf(output, "Memory dispatched : %u\n", MMBulkByteDispatched(mmBulk));
+
+}
+
+void MMBulkSave(MMBulk *mmBulk, FILE *output) {
+
+	unsigned int i;
+
+	fwrite(&mmBulk->itemSize, sizeof(unsigned int), 1, output);
+	fwrite(&mmBulk->itemPerAllocationInPowerOf2, sizeof(unsigned int), 1, output);
+	fwrite(&mmBulk->boundaryCushionSize, sizeof(unsigned int), 1, output);
+	fwrite(&mmBulk->currentDirectoryEntry, sizeof(unsigned int), 1, output);
+	fwrite(&mmBulk->nextUnusedItem, sizeof(unsigned int), 1, output);
+	fwrite(&mmBulk->directorySize, sizeof(unsigned int), 1, output);
+
+	for (i=0; i<mmBulk->currentDirectoryEntry; i++) {
+		fwrite(mmBulk->directory[i], mmBulk->itemSize << mmBulk->itemPerAllocationInPowerOf2, 1, output);
+	}
+
+	if (mmBulk->nextUnusedItem > 0) {
+		fwrite(mmBulk->directory[i], mmBulk->itemSize * mmBulk->nextUnusedItem, 1, output);
+	}
+
+}
+
+MMBulk *MMBulkLoad(MMPool *mmPool, FILE *input) {
+
+	unsigned int i;
+	MMBulk *mmBulk;
+
+	mmBulk = MMPoolDispatch(mmPool, sizeof(MMBulk));
+
+	fread(&mmBulk->itemSize, sizeof(unsigned int), 1, input);
+	fread(&mmBulk->itemPerAllocationInPowerOf2, sizeof(unsigned int), 1, input);
+	fread(&mmBulk->boundaryCushionSize, sizeof(unsigned int), 1, input);
+	fread(&mmBulk->currentDirectoryEntry, sizeof(unsigned int), 1, input);
+	fread(&mmBulk->nextUnusedItem, sizeof(unsigned int), 1, input);
+	fread(&mmBulk->directorySize, sizeof(unsigned int), 1, input);
+
+	mmBulk->indexMask = truncateLeft(ALL_ONE_MASK,  BITS_IN_WORD - mmBulk->itemPerAllocationInPowerOf2);
+
+	mmBulk->directory = MMPoolDispatch(mmPool, sizeof(unsigned char*) * mmBulk->directorySize);
+
+	for (i=0; i<mmBulk->currentDirectoryEntry; i++) {
+		mmBulk->directory[i] = MEMALIGN(mmBulk->boundaryCushionSize * 2 + 
+									(mmBulk->itemSize << mmBulk->itemPerAllocationInPowerOf2), MAX_ALIGN);
+		if (mmBulk->directory[i] == NULL) {
+			fprintf(stderr, "MMBulkLoad() : cannot allocate memory!\n");
+			exit(1);
+		}
+
+		//Advance the address by boundaryCushionSize
+		mmBulk->directory[i] += mmBulk->boundaryCushionSize;
+		
+		fread(mmBulk->directory[i], mmBulk->itemSize << mmBulk->itemPerAllocationInPowerOf2, 1, input);
+	}
+
+	mmBulk->directory[i] = MEMALIGN(mmBulk->boundaryCushionSize * 2 + 
+								(mmBulk->itemSize << mmBulk->itemPerAllocationInPowerOf2), MAX_ALIGN);
+	if (mmBulk->directory[i] == NULL) {
+		fprintf(stderr, "MMBulkLoad() : cannot allocate memory!\n");
+		exit(1);
+	}
+
+	//Advance the address by boundaryCushionSize
+	mmBulk->directory[i] += mmBulk->boundaryCushionSize;
+
+	if (mmBulk->nextUnusedItem > 0) {
+		fread(mmBulk->directory[i], mmBulk->itemSize * mmBulk->nextUnusedItem, 1, input);
+	}
+
+
+	for (i=0; i<mmMaster.maxNumberOfBulks; i++) {
+		if (mmMaster.mmBulk[i] == NULL) {
+			mmMaster.mmBulk[i] = mmBulk;
+			return mmBulk;
+		}
+	}
+
+	fprintf(stderr, "MMBulkLoad() : number of bulks > maxNumberOfBulk!\n");
+	exit(1);
+
+}
diff --git a/MemManager.h b/MemManager.h
new file mode 100644
index 0000000..e62f96b
--- /dev/null
+++ b/MemManager.h
@@ -0,0 +1,151 @@
+/*
+
+   MemManager.h		Memory Manager
+
+   This module provides memory management functions.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is FREEALIGN software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#ifndef __MEM_MANAGER_H__
+#define __MEM_MANAGER_H__
+
+#include "TypeNLimit.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+#define MAX_ALIGN	64	// All memory except pool memory are aligned to MAX_ALIGN; pool memory is aligned to finer boundary for small memory size
+#define MIN_ALIGN	1
+
+#define RECORD_GRAND_TOTAL
+
+//	Memory type:
+//
+//		unit memory:	allocation managed by malloc() individually;
+//						to be used for large and less frequently accessed items
+//						allocation can be freed individually at any time
+//		pool memory:	pre-allocated memory pool for items with varying sizes
+//						allocation cannot be freed by individually
+//						to be used for small and frequently accessed items
+//		temp memory:	temporary use granted from pool memory
+//						allocation is allocated and freed like the items in a stack
+//						pool memory allocation is disabled while temporary memory is in use
+//		bulk memory:	pre-allocated memory pool for items with the same size
+//						to be used for massively numbered items
+//						memory address of dispatched items can be calculated by dispatch index
+
+
+#ifdef DEBUG
+#define Mem(mmBulk, index)  MMBulkAddress(mmBulk, index)
+#else
+#define Mem(mmBulk, index) 	(void*)&(mmBulk->directory[index >> mmBulk->itemPerAllocationInPowerOf2][(index & mmBulk->indexMask) * mmBulk->itemSize])
+#endif
+
+typedef struct MMPool {
+	unsigned int poolSize;						// Size of memory pool; the beginning of the pool holds the MMPool structure
+	unsigned int poolByteDispatched;			// Includes any spillover and memory skipped for align
+	unsigned int poolByteSpillover;				// Exclude spillover pointers
+	unsigned int currentTempByteDispatched;		// Includes any spillover
+	unsigned int currentTempByteSpillover;		// Exclude spillover pointers
+	unsigned int maxTotalByteDispatched;		// The max of pool memory + temp memory dispatched
+	void *firstSpillOverAddress;				// if pool is freed, = address of mmPool
+} MMPool;
+
+
+typedef struct MMBulk {
+	unsigned int itemSize;
+	unsigned int itemPerAllocationInPowerOf2;
+	unsigned int boundaryCushionSize;			// boundary cushion is a piece of memory allocated so that the memory around items can be safely referenced
+	unsigned int indexMask;
+	unsigned int currentDirectoryEntry;
+	unsigned int nextUnusedItem;
+	unsigned int directorySize;
+	unsigned char **directory;			// if bulk is freed, = NULL
+} MMBulk;
+
+typedef struct MMMaster {
+	unsigned int currentUnitByteAllocated;
+	unsigned int maxUnitByteAllocated;
+	unsigned int maxNumberOfPools;
+	MMPool **mmPool;
+	unsigned int maxNumberOfBulks;
+	MMBulk **mmBulk;
+	unsigned int maxTotalByteAllocated;
+	unsigned int maxTotalByteDispatched;
+	int traceUnitByteAllocation;
+	FILE *unitByteTraceFile;
+} MMMaster;
+
+void *MMMalloc(const unsigned int memSize);
+void MMFree(void *address);
+void MMMasterInitialize(const unsigned int maxNumberOfPools, const unsigned int maxNumberOfBulks,
+						const int traceUnitByteAllocation, FILE *unitByteTraceFile);
+void MMMasterFreeAll();
+unsigned int MMMasterCurrentTotalByteAllocated();
+unsigned int MMMasterCurrentTotalByteDispatched();
+unsigned int MMMasterMaxTotalByteAllocated();
+unsigned int MMMasterMaxTotalByteDispatched();
+void MMMasterSetMaxTotalByteAllocated();
+void MMMasterSetMaxTotalByteDispatched();
+void MMMasterPrintReport(FILE *output, const unsigned int withUnitDetails, const unsigned int withPoolDetails, const unsigned int withBulkDetails);
+
+void *MMUnitAllocate(const unsigned int memSize);
+void *MMUnitReallocate(void *address, const unsigned int newMemSize, const unsigned int oldMemSize);
+void MMUnitFree(void *address, const unsigned int memSize);
+unsigned int MMUnitCurrentByteAllocated();
+unsigned int MMUnitMaxByteAllocated();
+void MMUnitPrintReport(FILE *output);
+
+MMPool *MMPoolCreate(const unsigned int poolSize);
+unsigned int MMPoolIsActive(const MMPool *mmPool);
+void MMPoolSetInactive(MMPool *mmPool);
+unsigned int MMPoolCurrentTotalByteAllocated(const MMPool *mmPool);
+unsigned int MMPoolCurrentTotalByteDispatched(const MMPool *mmPool);
+unsigned int MMPoolMaxTotalByteDispatched(const MMPool *mmPool);
+unsigned int MMPoolByteAvailable(const MMPool *mmPool);
+MMPool *MMPoolFree(MMPool *mmPool);
+void MMPoolReset(MMPool *mmPool);
+void MMPoolDestory(MMPool *mmPool);
+void *MMPoolDispatch(MMPool *mmPool, const unsigned int memSize);
+unsigned int MMPoolDispatchOffset(MMPool *mmPool, const unsigned int memSize);
+void MMPoolReturn(MMPool *mmPool, void *address, const unsigned int memSize);		// Dummy function
+void MMPoolPrintReport(MMPool *mmPool, FILE *output);
+
+void *MMTempDispatch(MMPool *mmPool, const unsigned int memsize);
+void MMTempReturn(MMPool *mmPool, void *address, const unsigned int memSize);
+void MMTempPrintReport(MMPool *mmPool, FILE *output);
+
+MMBulk *MMBulkCreate(MMPool *mmPool, const unsigned int itemSize, const unsigned int itemPerAllocationInPowerOf2, 
+					 unsigned int const boundaryCushionSize, unsigned int const directorySize);
+unsigned int MMBulkIsActive(const MMBulk *mmBulk);
+void MMBulkSetInactive(MMBulk *mmBulk);
+unsigned int MMBulkByteAllocated(const MMBulk *mmBulk);
+unsigned int MMBulkByteDispatched(const MMBulk *mmBulk);
+unsigned int MMBulkUnitDispatched(const MMBulk *mmBulk);
+void MMBulkFree(MMBulk *mmBulk);
+void MMBulkDestory(MMBulk *mmBulk);
+unsigned int MMBulkDispatch(MMBulk *mmBulk);
+void *MMBulkAddress(const MMBulk *mmBulk, const unsigned int index);
+MMPool *MMBulkFindPoolUsed(const MMBulk *mmBulk);
+void MMBulkPrintReport(MMBulk *mmBulk, FILE *output);
+
+void MMBulkSave(MMBulk *mmBulk, FILE *output);
+MMBulk *MMBulkLoad(MMPool *mmPool, FILE *input);
+
+
+#endif
diff --git a/MiscUtilities.c b/MiscUtilities.c
new file mode 100644
index 0000000..83ae38f
--- /dev/null
+++ b/MiscUtilities.c
@@ -0,0 +1,1368 @@
+/*
+
+   MiscUtilities.c		Miscellaneous Utilities
+
+   This module contains miscellaneous utility functions.
+
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "MiscUtilities.h"
+
+// static functions
+static int DustWo(const int len, const unsigned char *s, int *beg, int *end, const int wo);
+static void DustWo1(const int len, const unsigned char *s, const int ivv, const int wo, int *mv, int *iv, int *jv);
+
+
+
+void Dust(const unsigned int patternLength, unsigned char *pattern, const unsigned int cutoff, const unsigned int window, const unsigned int word) {
+
+	int i, j, l;
+	int from ,to;
+	int a, b, v;
+
+	int len;
+	int level;
+	int win, win2;
+	int wo;
+
+	// Set default parameters
+	if (cutoff == 0) {
+		level = 20;
+	} else {
+		level = (int)cutoff;
+	}
+	if (window == 0) {
+		win = 64;
+	} else {
+		win = (int)window;
+	}
+	if (word == 0) {
+		wo = 3;
+	} else {
+		wo = (int)word;
+	}
+	win2 = win / 2;
+	len = (int)patternLength;
+
+	from = 0;
+	to = -1;
+	for (i=0; i < len; i += win2) {
+		from -= win2;
+		to -= win2;
+		l = (len > i+win) ? win : len-i;
+		v = DustWo(l, pattern+i, &a, &b, wo);
+		for (j = from; j <= to; j++) {
+			if (i+j>=0 && i+j<len) {
+				// mask with lowercase
+				if (pattern[i+j] >= 'A' && pattern[i+j] <= 'Z') {
+					pattern[i+j] += 'a' - 'A';
+				}
+			}
+		}
+		if (v > level) {
+			for (j = a; j <= b && j < win2; j++) {
+				if (i+j>=0 && i+j<len) {
+					// mask with lowercase
+				if (pattern[i+j] >= 'A' && pattern[i+j] <= 'Z') {
+						pattern[i+j] += 'a' - 'A';
+					}
+				}
+			}
+			from = j;
+			to = b;
+		} else {
+			from = 0;
+			to = -1;
+		}
+	}
+
+}
+
+static int DustWo(const int len, const unsigned char *s, int *beg, int *end, const int wo) {
+	int i, l1;
+	int mv, iv, jv;
+
+	l1 = len - wo + 1;
+	if (l1 < 0) {
+		*beg = 0;
+		*end = len - 1;
+		return 0;
+	}
+	mv = 0;
+	iv = 0;
+	jv = 0;
+	for (i=0; i < l1; i++) {
+		DustWo1(len-i, s+i, i, wo, &mv, &iv, &jv);
+	}
+	*beg = iv;
+	*end = iv + jv;
+	return mv;
+}
+
+static void DustWo1(const int len, const unsigned char *s, const int ivv, const int wo, int *mv, int *iv, int *jv) {
+	int i, ii, j, v, t, n, n1, sum;
+	static int counts[32*32*32];
+	static int iis[32*32*32];
+	int js, nis;
+
+	n = 32 * 32 * 32;
+	n1 = n - 1;
+	nis = 0;
+	i = 0;
+	ii = 0;
+	sum = 0;
+	v = 0;
+	for (j=0; j < len; j++, s++) {
+		ii <<= 5;
+		if (*s >= 'A' && *s <= 'Z') {
+			ii |= *s - 'A';
+		} else {
+			// Ignoring lower case
+			i = 0;
+			continue;
+		}
+		ii &= n1;
+		i++;
+		if (i >= wo) {
+			for (js=0; js < nis && iis[js] != ii; js++) ;
+			if (js == nis) {
+				iis[nis] = ii;
+				counts[ii] = 0;
+				nis++;
+			}
+			if ((t = counts[ii]) > 0) {
+				sum += t;
+				v = 10 * sum / j;
+				if (*mv < v) {
+					*mv = v;
+					*iv = ivv;
+					*jv = j;
+				}
+			}
+			counts[ii]++;
+		}
+	}
+}
+
+void LimitCodeGenerateCodeTable(const unsigned int limit, unsigned int** codeValue, unsigned int** codeLength) {
+
+	unsigned int i, j;
+	unsigned int code, c;
+	unsigned int domainSize;
+	unsigned int gammaCodeLength;
+	unsigned int bitToExpand;
+	unsigned int expandBitPosition;
+
+	#ifdef DEBUG
+	if (limit <= 1) {
+		fprintf(stderr, "LimitCodeGenerateCodeTable(): Limit <= 1!\n");
+		exit(1);
+	}
+	#endif
+
+	domainSize = 2;
+	codeLength[domainSize][1] = 1;
+	codeLength[domainSize][2] = 1;
+	codeValue[domainSize][1] = 0;
+	codeValue[domainSize][2] = 1;
+
+	// First determine number of bit
+
+	domainSize++;
+	while (domainSize <= limit) {
+		// copy from domainSize - 1
+		for (i=1; i<domainSize; i++) {
+			codeLength[domainSize][i] = codeLength[domainSize - 1][i];
+		}
+		// find the first number with number of bit < that of Gamma
+		init(bitToExpand);	// to avoid compiler warning only
+		init(expandBitPosition);	// to avoid compiler warning only
+		for (i=1; i<domainSize; i++) {
+			gammaCodeLength = floorLog2(i) * 2 + 1;
+			if (codeLength[domainSize][i] < gammaCodeLength) {
+				bitToExpand = codeLength[domainSize][i];
+				break;
+			}
+		}
+		// find the last number with number of bit for expand
+		for (i=domainSize-1; i>0; i--) {
+			if (codeLength[domainSize][i] == bitToExpand) {
+				expandBitPosition = i;
+				break;
+			}
+		}
+		// Increase the number of bit at expandBitPosition and assign the same number of bit to the next code
+		codeLength[domainSize][expandBitPosition]++;
+		codeLength[domainSize][domainSize] = codeLength[domainSize][expandBitPosition];
+
+		// Assign code value
+		codeValue[domainSize][1] = 0;	// 1 always take '0' as code
+		code = 0;
+		for (i=2; i<=domainSize; i++) {
+			for (j=1; j<i; j++) {
+				while (TRUE) {
+					c = code >> (codeLength[domainSize][i] - codeLength[domainSize][j]);
+					if (c == codeValue[domainSize][j]) {
+						code++;	// code conflict
+					} else {
+						break;	// no conflict, proceed to check next number
+					}
+				}
+			}
+			// all preceding numbers checked
+			codeValue[domainSize][i] = code;
+			code++;
+		}
+			
+		domainSize++;
+
+	}
+
+}
+
+int QSortUnsignedIntOrder(const void *data, const int index1, const int index2) {
+
+	if (*((unsigned int*)data + index1) != *((unsigned int*)data + index2)) {
+		if (*((unsigned int*)data + index1) > *((unsigned int*)data + index2)) {
+			return 1;
+		} else {
+			return -1;
+		}
+	} else {
+		return 0;
+	}
+
+}
+
+static void QSortSwap(void* __restrict data, const int dataWidth, const int index1, const int index2) {
+
+	int k;
+	char temp;
+
+	for (k=0; k<dataWidth; k++) {
+		temp = *((char*)data + index1 * dataWidth + k);
+		*((char*)data + index1 * dataWidth + k) = *((char*)data + index2 * dataWidth + k);
+		*((char*)data + index2 * dataWidth + k) = temp;
+	}
+
+}
+
+void QSort(void* __restrict data, const int numData, const int dataWidth, int (*QSortComp)(const void*, const int, const int) ) {
+
+	#define SMALL_ARRAY_SIZE	8	// Use insertion sort if data array size is smaller than or equal to SMALL_ARRAY_SIZE
+	#define EQUAL_KEY_THRESHOLD	4	// Partition for equal key if data array size / the number of data with equal value with partition key < EQUAL_KEY_THRESHOLD
+
+	int lowIndex, highIndex, midIndex;
+	int lowPartitionIndex, highPartitionIndex;
+	int lowStack[32], highStack[32];
+	int stackDepth;
+	int i, j;
+	int c;
+	int numberOfEqualKey;
+
+	if (numData < 2) {
+		return;
+	}
+
+	stackDepth = 0;
+
+    lowIndex = 0;
+    highIndex = numData - 1;
+
+	for (;;) {
+
+		for (;;) {
+
+			if (highIndex - lowIndex < SMALL_ARRAY_SIZE) {
+
+				// Sort small array of data by insertion sort
+
+				for (i=lowIndex+1; i<=highIndex; i++) {
+					for (j=i; j>lowIndex && QSortComp(data, j - 1, j) > 0; j--) {
+						QSortSwap(data, dataWidth, j - 1, j);
+					}
+				}
+				break;
+
+			} else {
+
+				// Choose pivot as median of the lowest, middle, and highest data; sort the three data
+
+				midIndex = average(lowIndex, highIndex);
+				if (QSortComp(data, lowIndex, midIndex) > 0) {
+					QSortSwap(data, dataWidth, lowIndex, midIndex);
+				}
+				if (QSortComp(data, lowIndex, highIndex) > 0) {
+					QSortSwap(data, dataWidth, lowIndex, highIndex);
+				}
+				if (QSortComp(data, midIndex, highIndex) > 0) {
+					QSortSwap(data, dataWidth, midIndex, highIndex);
+				}
+
+				// Move partition key to the 2nd entry
+				QSortSwap(data, dataWidth, midIndex, lowIndex + 1);
+				midIndex = lowIndex + 1;
+			
+				// Partition data
+
+				numberOfEqualKey = 0;
+
+				lowPartitionIndex = lowIndex + 2;
+				highPartitionIndex = highIndex - 1;
+
+				for (;;) {
+					// keys that are equal to the partition key is sorted into the low partition
+					while (lowPartitionIndex <= highPartitionIndex) {
+						c = QSortComp(data, lowPartitionIndex, midIndex);
+						numberOfEqualKey += (c == 0);
+						if (c > 0) {
+							break;
+						}
+						lowPartitionIndex++;
+					}
+					while (lowPartitionIndex < highPartitionIndex) {
+						c = QSortComp(data, midIndex, highPartitionIndex);
+						numberOfEqualKey += (c == 0);
+						if (c >= 0) {
+							break;
+						}
+						highPartitionIndex--;
+					}
+					if (lowPartitionIndex < highPartitionIndex) {
+						QSortSwap(data, dataWidth, lowPartitionIndex, highPartitionIndex);
+						//if (highPartitionIndex == midIndex) {
+						//	// partition key has been moved
+						//	midIndex = lowPartitionIndex;
+						//}
+						lowPartitionIndex++;
+						highPartitionIndex--;
+					} else {
+						break;
+					}
+				}
+
+				// Adjust the partition index
+				highPartitionIndex = lowPartitionIndex;
+				lowPartitionIndex--;
+
+				// move the partition key to end of low partition
+				QSortSwap(data, dataWidth, midIndex, lowPartitionIndex);
+
+				if (highIndex - lowIndex + SMALL_ARRAY_SIZE > EQUAL_KEY_THRESHOLD * numberOfEqualKey) {
+				} else {
+
+					// Many keys equals to the partition key; separate the equal key data from the lower partition
+			
+					midIndex = lowIndex;
+
+					for (;;) {
+						while (midIndex < lowPartitionIndex && QSortComp(data, midIndex, lowPartitionIndex) < 0) {
+							midIndex++;
+						}
+						while (midIndex < lowPartitionIndex && QSortComp(data, lowPartitionIndex, lowPartitionIndex - 1) == 0) {
+							lowPartitionIndex--;
+						}
+						if (midIndex >= lowPartitionIndex) {
+							break;
+						}
+						QSortSwap(data, dataWidth, midIndex, lowPartitionIndex - 1);
+						midIndex++;
+						lowPartitionIndex--;
+					}
+
+				}
+
+				if (lowPartitionIndex - lowIndex > highIndex - highPartitionIndex) {
+					// put the larger partition to stack
+					lowStack[stackDepth] = lowIndex;
+					highStack[stackDepth] = lowPartitionIndex - 1;
+					stackDepth++;
+					// sort the smaller partition first
+					lowIndex = highPartitionIndex;
+				} else {
+					// put the larger partition to stack
+					lowStack[stackDepth] = highPartitionIndex;
+					highStack[stackDepth] = highIndex;
+					stackDepth++;
+					if (lowPartitionIndex > lowIndex) {
+						// sort the smaller partition first
+						highIndex = lowPartitionIndex - 1;
+					} else {
+						break;
+					}
+				}
+
+			}
+
+		}
+
+		// Pop a range from stack
+		if (stackDepth > 0) {
+			stackDepth--;
+			lowIndex = lowStack[stackDepth];
+			highIndex = highStack[stackDepth];
+			continue;
+		} else {
+			break;
+		}
+
+	}
+
+
+}
+
+unsigned int checkDuplicate(int *input, const unsigned int numItem, const int minValue, const int maxValue, char* text) {
+
+	unsigned int *present;
+	unsigned int i;
+	char defaultText[17] = "checkDuplicate()";
+
+	if (text == NULL) {
+		text = defaultText;
+	}
+
+	present = malloc((maxValue - minValue + 1) * sizeof(unsigned int));
+	initializeVAL(present, maxValue - minValue + 1, 0);
+
+	for (i=0; i<numItem; i++) {
+		if (input[i] >= minValue && input[i] <= maxValue) {
+			if (present[input[i] - minValue] > 0) {
+				fprintf(stderr, "%s : Item %u and %u contains duplicate value of %d\n", 
+							    text, present[input[i] - minValue], i, input[i]);
+				free(present);
+				return FALSE;
+			}
+			present[input[i] - minValue] = i;
+		}
+	}
+
+	free(present);
+	return TRUE;
+
+}
+
+
+unsigned int leadingZero(const unsigned int input) {
+
+	unsigned int l;
+	const static unsigned int leadingZero8bit[256] = {8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+											 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+											 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+											 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+											 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+											 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+											 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+											 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+	if (input & 0xFFFF0000) {
+		if (input & 0xFF000000) {
+			l = leadingZero8bit[input >> 24];
+		} else {
+			l = 8 + leadingZero8bit[input >> 16];
+		}
+	} else {
+		if (input & 0x0000FF00) {
+			l = 16 + leadingZero8bit[input >> 8];
+		} else {
+			l = 24 + leadingZero8bit[input];
+		}
+	}
+	return l;
+
+}
+
+unsigned int ceilLog2(const unsigned int input) {
+
+	if (input <= 1) {
+		return 0;
+	}
+
+	return BITS_IN_WORD - leadingZero(input - 1);
+
+}
+
+unsigned int floorLog2(const unsigned int input) {
+
+	if (input <= 1) {
+		return 0;
+	}
+
+	return BITS_IN_WORD - leadingZero(input) - 1;
+}
+
+unsigned int power(const unsigned int base, const unsigned int power) {
+
+	unsigned int i;
+	unsigned int result = 1;
+
+	for (i=0; i<power; i++)	{
+		result *= base;
+	}
+
+	return result;
+
+}
+
+void formatVALAsBinary(const unsigned int input, char* output, unsigned int bitGroup) {
+
+	int i, j=0;
+
+	for (i=0; i<BITS_IN_WORD; i++) {
+		if ((input & (input << i >> i)) >> (BITS_IN_WORD - i - 1)) {
+			output[j] = '1';
+		} else {
+			output[j] = '0';
+		}
+		j++;
+		if (bitGroup > 0 && bitGroup < BITS_IN_WORD) {
+			if ((i+1) % bitGroup == 0) {
+				output[j] = ' ';
+				j++;
+			}
+		}
+	}
+	output[j] = '\0';
+
+}
+
+unsigned int getRandomSeed() {
+
+	time_t timer;
+
+	time(&timer);
+	if (sizeof(time_t) > sizeof(unsigned int)) {
+		return (unsigned int)(timer % 0xFFFFFFFF);
+	} else {
+		return (unsigned int)(timer);
+	}
+
+}
+
+void ConvertBytePackedDNAToWordPacked(const unsigned char *input, unsigned int *output, const unsigned int textLength) {
+/*
+	unsigned int i, j, k;
+	unsigned int c;
+	unsigned int bitPerBytePackedChar;
+	unsigned int bitPerWordPackedChar;
+	unsigned int charPerWord;
+	unsigned int charPerByte;
+	unsigned int bytePerIteration;
+	unsigned int byteProcessed = 0;
+	unsigned int wordProcessed = 0;
+	unsigned int mask, shift;
+	
+	unsigned int buffer[BITS_IN_WORD];
+
+	unsigned char tempChar[4];
+
+	bitPerBytePackedChar = BitPerBytePackedChar(alphabetSize);
+	bitPerWordPackedChar = BitPerWordPackedChar(alphabetSize);
+	charPerByte = BITS_IN_BYTE / bitPerBytePackedChar;
+	charPerWord = BITS_IN_WORD / bitPerWordPackedChar;
+
+	bytePerIteration = charPerWord / charPerByte;
+	mask = truncateRight(ALL_ONE_MASK, BITS_IN_WORD - bitPerWordPackedChar);
+	shift = BITS_IN_WORD - BITS_IN_BYTE + bitPerBytePackedChar - bitPerWordPackedChar;
+
+	while ((wordProcessed + 1) * CHAR_PER_WORD < textLength) {
+
+		memcpy(tempChar, input[wordProcessed], 4);
+		output[wordProcessed] = tempChar[0] << 24 | tempChar[1] << 16 | tempChar[2] << 8 | tempChar[3];
+		wordProcessed++;
+
+	}
+
+	k = 0;
+	for (i=0; i < (textLength - wordProcessed * CHAR_PER_WORD - 1) / CHAR_PER_BYTE + 1; i++) {
+		c = (unsigned int)input[byteProcessed] << shift;
+		for (j=0; j<charPerByte; j++) {
+			buffer[k] = c & mask;
+			c <<= bitPerBytePackedChar;
+			k++;
+		}
+		byteProcessed++;
+	}
+
+	c = 0;
+	for (i=0; i<textLength - wordProcessed * charPerWord; i++) {
+		c |= buffer[i] >> bitPerWordPackedChar * i;
+	}
+	output[wordProcessed] = c;
+
+*/
+
+}
+
+
+
+unsigned int reverseBit(unsigned int x)
+{
+	x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
+	x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
+	x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
+	x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
+	return((x >> 16) | (x << 16));
+
+}
+
+void initializeVAL(unsigned int *startAddr, const unsigned int length, const unsigned int initValue) {
+
+	unsigned int i;
+
+	for (i=0; i<length; i++) {
+		startAddr[i] = initValue;
+	}
+
+}
+
+void initializeCHAR(unsigned char *startAddr, const unsigned int length, const unsigned char initValue) {
+
+	unsigned int i;
+
+	for (i=0; i<length; i++) {
+		startAddr[i] = initValue;
+	}
+
+}
+
+unsigned int numberOfMatchInVAL(unsigned int *startAddr, const unsigned int length, const unsigned int searchValue) {
+
+	unsigned int i;
+	unsigned int numberOfMatch = 0;
+
+	for (i=0; i<length; i++) {
+		if (startAddr[i] == searchValue) {
+			numberOfMatch++;
+		}
+	}
+
+	return numberOfMatch;
+
+}
+
+unsigned int numberOfMatchInCHAR(unsigned char *startAddr, const unsigned int length, const unsigned char searchValue) {
+
+	unsigned int i;
+	unsigned int numberOfMatch = 0;
+
+	for (i=0; i<length; i++) {
+		if (startAddr[i] == searchValue) {
+			numberOfMatch++;
+		}
+	}
+
+	return numberOfMatch;
+
+}
+
+
+// destinationAddress + up to the next 4 words boundary (depending on copy length) will be overridden
+// sourceAddress + multiple of 4 words (depending on copy length) will be accessed
+// calling program must ensure that those address, although not directly useful/used as it seems,
+// must be safely overridden/accessed
+// The remaining bits in the resulting ending word, if the resulting bit offset > 0, 
+// are guaranteed to be cleared as 0
+// The bits in the resulting ending word are undefined if the resulting bit offset = 0
+// The remaining words (to make up the last 4 word multiple) are undefined
+
+void bitCopyNoDestOffset(unsigned int *destinationAddress, const unsigned int *sourceAddress,
+						int sourceBitOffset, int copyLengthInBit) {
+
+	unsigned int i;
+	unsigned int rightShift;
+	unsigned int copyLeftBuffer[4], copyRightBuffer[4];
+	unsigned int copyWordLength, copyWordLengthRoundTo4;
+
+	#ifdef DEBUG
+	if (copyLengthInBit == 0) {
+		fprintf(stderr, "bitCopyNoDestOffset() : copyLengthInBit = 0!\n");
+		exit(1);
+	}
+	#endif
+
+	copyWordLength = (copyLengthInBit + BITS_IN_WORD_MINUS_1) / BITS_IN_WORD;
+
+    if (sourceBitOffset == 0) {
+		memcpy(destinationAddress, sourceAddress, copyWordLength * 4); 
+	} else {
+		rightShift = BITS_IN_WORD - sourceBitOffset;
+		copyWordLengthRoundTo4 = (copyWordLength + 3) & FOUR_MULTIPLE_MASK;
+		for (i=0; i<copyWordLengthRoundTo4; i+=4) {
+			// This is supposed to generate SSE2 codes
+			// Need to check rewrite using intrinics is necessary
+			copyLeftBuffer[0] = sourceAddress[i + 0] << sourceBitOffset;
+			copyLeftBuffer[1] = sourceAddress[i + 1] << sourceBitOffset;
+			copyLeftBuffer[2] = sourceAddress[i + 2] << sourceBitOffset;
+			copyLeftBuffer[3] = sourceAddress[i + 3] << sourceBitOffset;
+			copyRightBuffer[0] = sourceAddress[i + 1] >> rightShift;
+			copyRightBuffer[1] = sourceAddress[i + 2] >> rightShift;
+			copyRightBuffer[2] = sourceAddress[i + 3] >> rightShift;
+			copyRightBuffer[3] = sourceAddress[i + 4] >> rightShift;
+			destinationAddress[i + 0] = copyLeftBuffer[0] | copyRightBuffer[0];
+			destinationAddress[i + 1] = copyLeftBuffer[1] | copyRightBuffer[1];
+			destinationAddress[i + 2] = copyLeftBuffer[2] | copyRightBuffer[2];
+			destinationAddress[i + 3] = copyLeftBuffer[3] | copyRightBuffer[3];
+		}
+	}
+
+	if (copyLengthInBit % BITS_IN_WORD > 0) {
+		destinationAddress[copyWordLength - 1] = truncateRight(destinationAddress[copyWordLength - 1], 
+													BITS_IN_WORD - (copyLengthInBit % BITS_IN_WORD));
+	}
+
+}
+
+void bitCopyDestWordOffsetOnly(unsigned int *destinationAddress, unsigned int destinationWordOffset,
+							const unsigned int *sourceAddress, unsigned int sourceBitOffset, unsigned int copyLengthInBit) {
+
+	unsigned int i;
+	unsigned int rightShift;
+	unsigned int copyLeftBuffer[4], copyRightBuffer[4];
+	unsigned int copyWordLength, copyWordLengthRoundTo4, wordToNext4WordBoundary;
+	unsigned int *destAddr;
+	const unsigned int *srcAddr;
+
+	#ifdef DEBUG
+	if (copyLengthInBit == 0) {
+		fprintf(stderr, "bitCopyDestWordOffsetOnly() : copyLengthInBit = 0!\n");
+		exit(1);
+	}
+	#endif
+
+	copyWordLength = (copyLengthInBit + BITS_IN_WORD_MINUS_1) / BITS_IN_WORD;
+	destAddr = destinationAddress + destinationWordOffset;
+	srcAddr = sourceAddress;
+
+	wordToNext4WordBoundary = (FOUR_MULTIPLE_MASK - destinationWordOffset) % 4;
+
+    if (sourceBitOffset == 0) {
+		memcpy(destAddr, srcAddr, copyWordLength * 4); 
+	} else {
+		rightShift = BITS_IN_WORD - sourceBitOffset;
+		for (i=0; i<wordToNext4WordBoundary; i++) {
+			destAddr[i] = (srcAddr[i] << sourceBitOffset) | 
+						  (srcAddr[i+1] >> rightShift);
+		}
+		destAddr += wordToNext4WordBoundary;
+		srcAddr += wordToNext4WordBoundary;
+		copyWordLengthRoundTo4 = (copyWordLength - wordToNext4WordBoundary + 3) & FOUR_MULTIPLE_MASK;
+		for (i=0; i<copyWordLengthRoundTo4; i+=4) {
+			// This is supposed to generate SSE2 codes
+			// Need to check rewrite using intrinics is necessary
+			copyLeftBuffer[0] = srcAddr[i + 0] << sourceBitOffset;
+			copyLeftBuffer[1] = srcAddr[i + 1] << sourceBitOffset;
+			copyLeftBuffer[2] = srcAddr[i + 2] << sourceBitOffset;
+			copyLeftBuffer[3] = srcAddr[i + 3] << sourceBitOffset;
+			copyRightBuffer[0] = srcAddr[i + 1] >> rightShift;
+			copyRightBuffer[1] = srcAddr[i + 2] >> rightShift;
+			copyRightBuffer[2] = srcAddr[i + 3] >> rightShift;
+			copyRightBuffer[3] = srcAddr[i + 4] >> rightShift;
+			destAddr[i + 0] = copyLeftBuffer[0] | copyRightBuffer[0];
+			destAddr[i + 1] = copyLeftBuffer[1] | copyRightBuffer[1];
+			destAddr[i + 2] = copyLeftBuffer[2] | copyRightBuffer[2];
+			destAddr[i + 3] = copyLeftBuffer[3] | copyRightBuffer[3];
+		}
+	}
+
+	if (copyLengthInBit % BITS_IN_WORD > 0) {
+		destinationAddress[copyWordLength - 1] = truncateRight(destinationAddress[copyWordLength - 1], 
+													BITS_IN_WORD - (copyLengthInBit % BITS_IN_WORD));
+	}
+
+}
+
+// The remaining bits in destinationAddress, if destinationBitOffset > 0, must be cleared as 0
+
+unsigned int bitCopy(unsigned int *destinationAddress, int destinationWordOffset, int destinationBitOffset,
+			 const unsigned int *sourceAddress, int sourceBitOffset, int copyLengthInBit) {
+
+	unsigned int i;
+	unsigned int rightShift;
+	unsigned int copyLeftBuffer[4], copyRightBuffer[4];
+	unsigned int copyWordLength, copyWordLengthRoundTo4, wordToNext4WordBoundary;
+	unsigned int *destAddr;
+	const unsigned int *srcAddr;
+
+	#ifdef DEBUG
+	if (copyLengthInBit == 0) {
+		fprintf(stderr, "bitCopy() : copyLengthInBit = 0!\n");
+		exit(1);
+	}
+	#endif
+
+	destAddr = destinationAddress + destinationWordOffset;
+	srcAddr = sourceAddress;
+
+	if (destinationBitOffset > 0) {
+		destAddr[0] = destAddr[0] | (srcAddr[0] << sourceBitOffset >> destinationBitOffset);
+		if (destinationBitOffset < sourceBitOffset) {
+			destAddr[0] = destAddr[0] |
+						(srcAddr[1] >> destinationBitOffset >> (BITS_IN_WORD - sourceBitOffset));
+		}
+		if (copyLengthInBit > BITS_IN_WORD - destinationBitOffset) {
+			destAddr++;
+			srcAddr += (sourceBitOffset + BITS_IN_WORD - destinationBitOffset) / BITS_IN_WORD;
+			sourceBitOffset = (sourceBitOffset + BITS_IN_WORD - destinationBitOffset) % BITS_IN_WORD;
+			copyLengthInBit -= BITS_IN_WORD - destinationBitOffset;
+			destinationWordOffset++;
+		} else {
+			if ((destinationBitOffset + copyLengthInBit) % BITS_IN_WORD > 0) {
+				destAddr[0] = truncateRight(destAddr[0], BITS_IN_WORD - destinationBitOffset - copyLengthInBit);
+			}
+			return 0;
+		}
+	}
+
+	copyWordLength = (copyLengthInBit + BITS_IN_WORD_MINUS_1) / BITS_IN_WORD;
+
+	if (sourceBitOffset == 0) {
+		memcpy(destAddr, srcAddr, copyWordLength * 4); 
+	} else {
+		wordToNext4WordBoundary = (FOUR_MULTIPLE_MASK - destinationWordOffset) % 4;
+		rightShift = BITS_IN_WORD - sourceBitOffset;
+		for (i=0; i<wordToNext4WordBoundary; i++) {
+			destAddr[i] = (srcAddr[i] << sourceBitOffset) | 
+						(srcAddr[i+1] >> rightShift);
+		}
+		if (wordToNext4WordBoundary >= copyWordLength) {
+			if (copyLengthInBit % BITS_IN_WORD > 0) {
+				destAddr[copyWordLength - 1] = truncateRight(destAddr[copyWordLength - 1], 
+								BITS_IN_WORD - (copyLengthInBit % BITS_IN_WORD));
+			}
+			return 0;
+		}
+		destAddr += wordToNext4WordBoundary;
+		srcAddr += wordToNext4WordBoundary;
+		copyWordLength -= wordToNext4WordBoundary;
+		copyWordLengthRoundTo4 = (copyWordLength + 3) & FOUR_MULTIPLE_MASK;
+		for (i=0; i<copyWordLengthRoundTo4; i+=4) {
+			// This is supposed to generate SSE2 codes
+			// Need to check rewrite using intrinics is necessary
+			copyLeftBuffer[0] = srcAddr[i + 0] << sourceBitOffset;
+			copyLeftBuffer[1] = srcAddr[i + 1] << sourceBitOffset;
+			copyLeftBuffer[2] = srcAddr[i + 2] << sourceBitOffset;
+			copyLeftBuffer[3] = srcAddr[i + 3] << sourceBitOffset;
+			copyRightBuffer[0] = srcAddr[i + 1] >> rightShift;
+			copyRightBuffer[1] = srcAddr[i + 2] >> rightShift;
+			copyRightBuffer[2] = srcAddr[i + 3] >> rightShift;
+			copyRightBuffer[3] = srcAddr[i + 4] >> rightShift;
+			destAddr[i + 0] = copyLeftBuffer[0] | copyRightBuffer[0];
+			destAddr[i + 1] = copyLeftBuffer[1] | copyRightBuffer[1];
+			destAddr[i + 2] = copyLeftBuffer[2] | copyRightBuffer[2];
+			destAddr[i + 3] = copyLeftBuffer[3] | copyRightBuffer[3];
+		}
+	}
+
+	if (copyLengthInBit % BITS_IN_WORD > 0) {
+		destAddr[copyWordLength - 1] = truncateRight(destAddr[copyWordLength - 1], 
+							BITS_IN_WORD - (copyLengthInBit % BITS_IN_WORD));
+	}
+
+	return 0;
+
+}
+
+// return a prime number >= number
+unsigned int nextPrime(const unsigned int number) {
+
+	// the smallest prime larger than 2^16 is 65537, which is the 6543th prime number
+
+	static const unsigned int prime[6543] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
+									59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131,
+									137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223,
+									227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, 293, 307, 311,
+									313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, 389, 397, 401, 409,
+									419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503,
+									509, 521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599, 601, 607, 613,
+									617, 619, 631, 641, 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, 709, 719,
+									727, 733, 739, 743, 751, 757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827,
+									829, 839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919, 929, 937, 941,
+									947, 953, 967, 971, 977, 983, 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049,
+									1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,
+									1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283,
+									1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423,
+									1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,
+									1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619,
+									1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747,
+									1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877,
+									1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949, 1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003,
+									2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,
+									2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203, 2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267,
+									2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377,
+									2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503,
+									2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657,
+									2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,
+									2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801, 2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861,
+									2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011,
+									3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167,
+									3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301,
+									3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,
+									3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491, 3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541,
+									3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671,
+									3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797,
+									3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863, 3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923,
+									3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,
+									4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129, 4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211,
+									4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337,
+									4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481,
+									4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547, 4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621,
+									4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,
+									4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813, 4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909,
+									4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967, 4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011,
+									5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087, 5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167,
+									5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233, 5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309,
+									5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399, 5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,
+									5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507, 5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573,
+									5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653, 5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711,
+									5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791, 5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849,
+									5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897, 5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007,
+									6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073, 6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,
+									6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211, 6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271,
+									6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329, 6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379,
+									6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473, 6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563,
+									6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637, 6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701,
+									6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779, 6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,
+									6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907, 6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971,
+									6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027, 7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121,
+									7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207, 7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253,
+									7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349, 7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457,
+									7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517, 7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,
+									7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621, 7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691,
+									7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757, 7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853,
+									7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919, 7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009,
+									8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087, 8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161,
+									8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231, 8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291,
+									8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369, 8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443,
+									8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537, 8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609,
+									8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677, 8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731,
+									8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803, 8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861,
+									8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941, 8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011,
+									9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091, 9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161,
+									9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227, 9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311,
+									9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377, 9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433,
+									9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491, 9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587,
+									9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649, 9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733,
+									9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791, 9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857,
+									9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929, 9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037,
+									10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099, 10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
+									10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247, 10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
+									10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369, 10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
+									10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531, 10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
+									10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691, 10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
+									10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859, 10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
+									10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003, 11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
+									11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161, 11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
+									11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317, 11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
+									11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483, 11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
+									11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657, 11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
+									11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813, 11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
+									11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941, 11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
+									12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101, 12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
+									12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251, 12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
+									12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401, 12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
+									12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527, 12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
+									12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653, 12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
+									12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821, 12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
+									12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967, 12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
+									13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109, 13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
+									13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259, 13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
+									13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421, 13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
+									13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597, 13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
+									13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723, 13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
+									13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879, 13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
+									13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033, 14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
+									14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221, 14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
+									14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407, 14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
+									14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549, 14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
+									14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699, 14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
+									14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821, 14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
+									14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957, 14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
+									15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137, 15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
+									15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277, 15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
+									15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401, 15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
+									15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569, 15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
+									15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727, 15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
+									15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859, 15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
+									15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007, 16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
+									16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183, 16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
+									16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
+									16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493, 16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
+									16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661, 16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
+									16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843, 16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
+									16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993, 17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
+									17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159, 17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
+									17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327, 17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
+									17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467, 17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
+									17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599, 17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
+									17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783, 17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
+									17881, 17891, 17903, 17909, 17911, 17921, 17923, 17929, 17939, 17957, 17959, 17971, 17977, 17981, 17987, 17989,
+									18013, 18041, 18043, 18047, 18049, 18059, 18061, 18077, 18089, 18097, 18119, 18121, 18127, 18131, 18133, 18143,
+									18149, 18169, 18181, 18191, 18199, 18211, 18217, 18223, 18229, 18233, 18251, 18253, 18257, 18269, 18287, 18289,
+									18301, 18307, 18311, 18313, 18329, 18341, 18353, 18367, 18371, 18379, 18397, 18401, 18413, 18427, 18433, 18439,
+									18443, 18451, 18457, 18461, 18481, 18493, 18503, 18517, 18521, 18523, 18539, 18541, 18553, 18583, 18587, 18593,
+									18617, 18637, 18661, 18671, 18679, 18691, 18701, 18713, 18719, 18731, 18743, 18749, 18757, 18773, 18787, 18793,
+									18797, 18803, 18839, 18859, 18869, 18899, 18911, 18913, 18917, 18919, 18947, 18959, 18973, 18979, 19001, 19009,
+									19013, 19031, 19037, 19051, 19069, 19073, 19079, 19081, 19087, 19121, 19139, 19141, 19157, 19163, 19181, 19183,
+									19207, 19211, 19213, 19219, 19231, 19237, 19249, 19259, 19267, 19273, 19289, 19301, 19309, 19319, 19333, 19373,
+									19379, 19381, 19387, 19391, 19403, 19417, 19421, 19423, 19427, 19429, 19433, 19441, 19447, 19457, 19463, 19469,
+									19471, 19477, 19483, 19489, 19501, 19507, 19531, 19541, 19543, 19553, 19559, 19571, 19577, 19583, 19597, 19603,
+									19609, 19661, 19681, 19687, 19697, 19699, 19709, 19717, 19727, 19739, 19751, 19753, 19759, 19763, 19777, 19793,
+									19801, 19813, 19819, 19841, 19843, 19853, 19861, 19867, 19889, 19891, 19913, 19919, 19927, 19937, 19949, 19961,
+									19963, 19973, 19979, 19991, 19993, 19997, 20011, 20021, 20023, 20029, 20047, 20051, 20063, 20071, 20089, 20101,
+									20107, 20113, 20117, 20123, 20129, 20143, 20147, 20149, 20161, 20173, 20177, 20183, 20201, 20219, 20231, 20233,
+									20249, 20261, 20269, 20287, 20297, 20323, 20327, 20333, 20341, 20347, 20353, 20357, 20359, 20369, 20389, 20393,
+									20399, 20407, 20411, 20431, 20441, 20443, 20477, 20479, 20483, 20507, 20509, 20521, 20533, 20543, 20549, 20551,
+									20563, 20593, 20599, 20611, 20627, 20639, 20641, 20663, 20681, 20693, 20707, 20717, 20719, 20731, 20743, 20747,
+									20749, 20753, 20759, 20771, 20773, 20789, 20807, 20809, 20849, 20857, 20873, 20879, 20887, 20897, 20899, 20903,
+									20921, 20929, 20939, 20947, 20959, 20963, 20981, 20983, 21001, 21011, 21013, 21017, 21019, 21023, 21031, 21059,
+									21061, 21067, 21089, 21101, 21107, 21121, 21139, 21143, 21149, 21157, 21163, 21169, 21179, 21187, 21191, 21193,
+									21211, 21221, 21227, 21247, 21269, 21277, 21283, 21313, 21317, 21319, 21323, 21341, 21347, 21377, 21379, 21383,
+									21391, 21397, 21401, 21407, 21419, 21433, 21467, 21481, 21487, 21491, 21493, 21499, 21503, 21517, 21521, 21523,
+									21529, 21557, 21559, 21563, 21569, 21577, 21587, 21589, 21599, 21601, 21611, 21613, 21617, 21647, 21649, 21661,
+									21673, 21683, 21701, 21713, 21727, 21737, 21739, 21751, 21757, 21767, 21773, 21787, 21799, 21803, 21817, 21821,
+									21839, 21841, 21851, 21859, 21863, 21871, 21881, 21893, 21911, 21929, 21937, 21943, 21961, 21977, 21991, 21997,
+									22003, 22013, 22027, 22031, 22037, 22039, 22051, 22063, 22067, 22073, 22079, 22091, 22093, 22109, 22111, 22123,
+									22129, 22133, 22147, 22153, 22157, 22159, 22171, 22189, 22193, 22229, 22247, 22259, 22271, 22273, 22277, 22279,
+									22283, 22291, 22303, 22307, 22343, 22349, 22367, 22369, 22381, 22391, 22397, 22409, 22433, 22441, 22447, 22453,
+									22469, 22481, 22483, 22501, 22511, 22531, 22541, 22543, 22549, 22567, 22571, 22573, 22613, 22619, 22621, 22637,
+									22639, 22643, 22651, 22669, 22679, 22691, 22697, 22699, 22709, 22717, 22721, 22727, 22739, 22741, 22751, 22769,
+									22777, 22783, 22787, 22807, 22811, 22817, 22853, 22859, 22861, 22871, 22877, 22901, 22907, 22921, 22937, 22943,
+									22961, 22963, 22973, 22993, 23003, 23011, 23017, 23021, 23027, 23029, 23039, 23041, 23053, 23057, 23059, 23063,
+									23071, 23081, 23087, 23099, 23117, 23131, 23143, 23159, 23167, 23173, 23189, 23197, 23201, 23203, 23209, 23227,
+									23251, 23269, 23279, 23291, 23293, 23297, 23311, 23321, 23327, 23333, 23339, 23357, 23369, 23371, 23399, 23417,
+									23431, 23447, 23459, 23473, 23497, 23509, 23531, 23537, 23539, 23549, 23557, 23561, 23563, 23567, 23581, 23593,
+									23599, 23603, 23609, 23623, 23627, 23629, 23633, 23663, 23669, 23671, 23677, 23687, 23689, 23719, 23741, 23743,
+									23747, 23753, 23761, 23767, 23773, 23789, 23801, 23813, 23819, 23827, 23831, 23833, 23857, 23869, 23873, 23879,
+									23887, 23893, 23899, 23909, 23911, 23917, 23929, 23957, 23971, 23977, 23981, 23993, 24001, 24007, 24019, 24023,
+									24029, 24043, 24049, 24061, 24071, 24077, 24083, 24091, 24097, 24103, 24107, 24109, 24113, 24121, 24133, 24137,
+									24151, 24169, 24179, 24181, 24197, 24203, 24223, 24229, 24239, 24247, 24251, 24281, 24317, 24329, 24337, 24359,
+									24371, 24373, 24379, 24391, 24407, 24413, 24419, 24421, 24439, 24443, 24469, 24473, 24481, 24499, 24509, 24517,
+									24527, 24533, 24547, 24551, 24571, 24593, 24611, 24623, 24631, 24659, 24671, 24677, 24683, 24691, 24697, 24709,
+									24733, 24749, 24763, 24767, 24781, 24793, 24799, 24809, 24821, 24841, 24847, 24851, 24859, 24877, 24889, 24907,
+									24917, 24919, 24923, 24943, 24953, 24967, 24971, 24977, 24979, 24989, 25013, 25031, 25033, 25037, 25057, 25073,
+									25087, 25097, 25111, 25117, 25121, 25127, 25147, 25153, 25163, 25169, 25171, 25183, 25189, 25219, 25229, 25237,
+									25243, 25247, 25253, 25261, 25301, 25303, 25307, 25309, 25321, 25339, 25343, 25349, 25357, 25367, 25373, 25391,
+									25409, 25411, 25423, 25439, 25447, 25453, 25457, 25463, 25469, 25471, 25523, 25537, 25541, 25561, 25577, 25579,
+									25583, 25589, 25601, 25603, 25609, 25621, 25633, 25639, 25643, 25657, 25667, 25673, 25679, 25693, 25703, 25717,
+									25733, 25741, 25747, 25759, 25763, 25771, 25793, 25799, 25801, 25819, 25841, 25847, 25849, 25867, 25873, 25889,
+									25903, 25913, 25919, 25931, 25933, 25939, 25943, 25951, 25969, 25981, 25997, 25999, 26003, 26017, 26021, 26029,
+									26041, 26053, 26083, 26099, 26107, 26111, 26113, 26119, 26141, 26153, 26161, 26171, 26177, 26183, 26189, 26203,
+									26209, 26227, 26237, 26249, 26251, 26261, 26263, 26267, 26293, 26297, 26309, 26317, 26321, 26339, 26347, 26357,
+									26371, 26387, 26393, 26399, 26407, 26417, 26423, 26431, 26437, 26449, 26459, 26479, 26489, 26497, 26501, 26513,
+									26539, 26557, 26561, 26573, 26591, 26597, 26627, 26633, 26641, 26647, 26669, 26681, 26683, 26687, 26693, 26699,
+									26701, 26711, 26713, 26717, 26723, 26729, 26731, 26737, 26759, 26777, 26783, 26801, 26813, 26821, 26833, 26839,
+									26849, 26861, 26863, 26879, 26881, 26891, 26893, 26903, 26921, 26927, 26947, 26951, 26953, 26959, 26981, 26987,
+									26993, 27011, 27017, 27031, 27043, 27059, 27061, 27067, 27073, 27077, 27091, 27103, 27107, 27109, 27127, 27143,
+									27179, 27191, 27197, 27211, 27239, 27241, 27253, 27259, 27271, 27277, 27281, 27283, 27299, 27329, 27337, 27361,
+									27367, 27397, 27407, 27409, 27427, 27431, 27437, 27449, 27457, 27479, 27481, 27487, 27509, 27527, 27529, 27539,
+									27541, 27551, 27581, 27583, 27611, 27617, 27631, 27647, 27653, 27673, 27689, 27691, 27697, 27701, 27733, 27737,
+									27739, 27743, 27749, 27751, 27763, 27767, 27773, 27779, 27791, 27793, 27799, 27803, 27809, 27817, 27823, 27827,
+									27847, 27851, 27883, 27893, 27901, 27917, 27919, 27941, 27943, 27947, 27953, 27961, 27967, 27983, 27997, 28001,
+									28019, 28027, 28031, 28051, 28057, 28069, 28081, 28087, 28097, 28099, 28109, 28111, 28123, 28151, 28163, 28181,
+									28183, 28201, 28211, 28219, 28229, 28277, 28279, 28283, 28289, 28297, 28307, 28309, 28319, 28349, 28351, 28387,
+									28393, 28403, 28409, 28411, 28429, 28433, 28439, 28447, 28463, 28477, 28493, 28499, 28513, 28517, 28537, 28541,
+									28547, 28549, 28559, 28571, 28573, 28579, 28591, 28597, 28603, 28607, 28619, 28621, 28627, 28631, 28643, 28649,
+									28657, 28661, 28663, 28669, 28687, 28697, 28703, 28711, 28723, 28729, 28751, 28753, 28759, 28771, 28789, 28793,
+									28807, 28813, 28817, 28837, 28843, 28859, 28867, 28871, 28879, 28901, 28909, 28921, 28927, 28933, 28949, 28961,
+									28979, 29009, 29017, 29021, 29023, 29027, 29033, 29059, 29063, 29077, 29101, 29123, 29129, 29131, 29137, 29147,
+									29153, 29167, 29173, 29179, 29191, 29201, 29207, 29209, 29221, 29231, 29243, 29251, 29269, 29287, 29297, 29303,
+									29311, 29327, 29333, 29339, 29347, 29363, 29383, 29387, 29389, 29399, 29401, 29411, 29423, 29429, 29437, 29443,
+									29453, 29473, 29483, 29501, 29527, 29531, 29537, 29567, 29569, 29573, 29581, 29587, 29599, 29611, 29629, 29633,
+									29641, 29663, 29669, 29671, 29683, 29717, 29723, 29741, 29753, 29759, 29761, 29789, 29803, 29819, 29833, 29837,
+									29851, 29863, 29867, 29873, 29879, 29881, 29917, 29921, 29927, 29947, 29959, 29983, 29989, 30011, 30013, 30029,
+									30047, 30059, 30071, 30089, 30091, 30097, 30103, 30109, 30113, 30119, 30133, 30137, 30139, 30161, 30169, 30181,
+									30187, 30197, 30203, 30211, 30223, 30241, 30253, 30259, 30269, 30271, 30293, 30307, 30313, 30319, 30323, 30341,
+									30347, 30367, 30389, 30391, 30403, 30427, 30431, 30449, 30467, 30469, 30491, 30493, 30497, 30509, 30517, 30529,
+									30539, 30553, 30557, 30559, 30577, 30593, 30631, 30637, 30643, 30649, 30661, 30671, 30677, 30689, 30697, 30703,
+									30707, 30713, 30727, 30757, 30763, 30773, 30781, 30803, 30809, 30817, 30829, 30839, 30841, 30851, 30853, 30859,
+									30869, 30871, 30881, 30893, 30911, 30931, 30937, 30941, 30949, 30971, 30977, 30983, 31013, 31019, 31033, 31039,
+									31051, 31063, 31069, 31079, 31081, 31091, 31121, 31123, 31139, 31147, 31151, 31153, 31159, 31177, 31181, 31183,
+									31189, 31193, 31219, 31223, 31231, 31237, 31247, 31249, 31253, 31259, 31267, 31271, 31277, 31307, 31319, 31321,
+									31327, 31333, 31337, 31357, 31379, 31387, 31391, 31393, 31397, 31469, 31477, 31481, 31489, 31511, 31513, 31517,
+									31531, 31541, 31543, 31547, 31567, 31573, 31583, 31601, 31607, 31627, 31643, 31649, 31657, 31663, 31667, 31687,
+									31699, 31721, 31723, 31727, 31729, 31741, 31751, 31769, 31771, 31793, 31799, 31817, 31847, 31849, 31859, 31873,
+									31883, 31891, 31907, 31957, 31963, 31973, 31981, 31991, 32003, 32009, 32027, 32029, 32051, 32057, 32059, 32063,
+									32069, 32077, 32083, 32089, 32099, 32117, 32119, 32141, 32143, 32159, 32173, 32183, 32189, 32191, 32203, 32213,
+									32233, 32237, 32251, 32257, 32261, 32297, 32299, 32303, 32309, 32321, 32323, 32327, 32341, 32353, 32359, 32363,
+									32369, 32371, 32377, 32381, 32401, 32411, 32413, 32423, 32429, 32441, 32443, 32467, 32479, 32491, 32497, 32503,
+									32507, 32531, 32533, 32537, 32561, 32563, 32569, 32573, 32579, 32587, 32603, 32609, 32611, 32621, 32633, 32647,
+									32653, 32687, 32693, 32707, 32713, 32717, 32719, 32749, 32771, 32779, 32783, 32789, 32797, 32801, 32803, 32831,
+									32833, 32839, 32843, 32869, 32887, 32909, 32911, 32917, 32933, 32939, 32941, 32957, 32969, 32971, 32983, 32987,
+									32993, 32999, 33013, 33023, 33029, 33037, 33049, 33053, 33071, 33073, 33083, 33091, 33107, 33113, 33119, 33149,
+									33151, 33161, 33179, 33181, 33191, 33199, 33203, 33211, 33223, 33247, 33287, 33289, 33301, 33311, 33317, 33329,
+									33331, 33343, 33347, 33349, 33353, 33359, 33377, 33391, 33403, 33409, 33413, 33427, 33457, 33461, 33469, 33479,
+									33487, 33493, 33503, 33521, 33529, 33533, 33547, 33563, 33569, 33577, 33581, 33587, 33589, 33599, 33601, 33613,
+									33617, 33619, 33623, 33629, 33637, 33641, 33647, 33679, 33703, 33713, 33721, 33739, 33749, 33751, 33757, 33767,
+									33769, 33773, 33791, 33797, 33809, 33811, 33827, 33829, 33851, 33857, 33863, 33871, 33889, 33893, 33911, 33923,
+									33931, 33937, 33941, 33961, 33967, 33997, 34019, 34031, 34033, 34039, 34057, 34061, 34123, 34127, 34129, 34141,
+									34147, 34157, 34159, 34171, 34183, 34211, 34213, 34217, 34231, 34253, 34259, 34261, 34267, 34273, 34283, 34297,
+									34301, 34303, 34313, 34319, 34327, 34337, 34351, 34361, 34367, 34369, 34381, 34403, 34421, 34429, 34439, 34457,
+									34469, 34471, 34483, 34487, 34499, 34501, 34511, 34513, 34519, 34537, 34543, 34549, 34583, 34589, 34591, 34603,
+									34607, 34613, 34631, 34649, 34651, 34667, 34673, 34679, 34687, 34693, 34703, 34721, 34729, 34739, 34747, 34757,
+									34759, 34763, 34781, 34807, 34819, 34841, 34843, 34847, 34849, 34871, 34877, 34883, 34897, 34913, 34919, 34939,
+									34949, 34961, 34963, 34981, 35023, 35027, 35051, 35053, 35059, 35069, 35081, 35083, 35089, 35099, 35107, 35111,
+									35117, 35129, 35141, 35149, 35153, 35159, 35171, 35201, 35221, 35227, 35251, 35257, 35267, 35279, 35281, 35291,
+									35311, 35317, 35323, 35327, 35339, 35353, 35363, 35381, 35393, 35401, 35407, 35419, 35423, 35437, 35447, 35449,
+									35461, 35491, 35507, 35509, 35521, 35527, 35531, 35533, 35537, 35543, 35569, 35573, 35591, 35593, 35597, 35603,
+									35617, 35671, 35677, 35729, 35731, 35747, 35753, 35759, 35771, 35797, 35801, 35803, 35809, 35831, 35837, 35839,
+									35851, 35863, 35869, 35879, 35897, 35899, 35911, 35923, 35933, 35951, 35963, 35969, 35977, 35983, 35993, 35999,
+									36007, 36011, 36013, 36017, 36037, 36061, 36067, 36073, 36083, 36097, 36107, 36109, 36131, 36137, 36151, 36161,
+									36187, 36191, 36209, 36217, 36229, 36241, 36251, 36263, 36269, 36277, 36293, 36299, 36307, 36313, 36319, 36341,
+									36343, 36353, 36373, 36383, 36389, 36433, 36451, 36457, 36467, 36469, 36473, 36479, 36493, 36497, 36523, 36527,
+									36529, 36541, 36551, 36559, 36563, 36571, 36583, 36587, 36599, 36607, 36629, 36637, 36643, 36653, 36671, 36677,
+									36683, 36691, 36697, 36709, 36713, 36721, 36739, 36749, 36761, 36767, 36779, 36781, 36787, 36791, 36793, 36809,
+									36821, 36833, 36847, 36857, 36871, 36877, 36887, 36899, 36901, 36913, 36919, 36923, 36929, 36931, 36943, 36947,
+									36973, 36979, 36997, 37003, 37013, 37019, 37021, 37039, 37049, 37057, 37061, 37087, 37097, 37117, 37123, 37139,
+									37159, 37171, 37181, 37189, 37199, 37201, 37217, 37223, 37243, 37253, 37273, 37277, 37307, 37309, 37313, 37321,
+									37337, 37339, 37357, 37361, 37363, 37369, 37379, 37397, 37409, 37423, 37441, 37447, 37463, 37483, 37489, 37493,
+									37501, 37507, 37511, 37517, 37529, 37537, 37547, 37549, 37561, 37567, 37571, 37573, 37579, 37589, 37591, 37607,
+									37619, 37633, 37643, 37649, 37657, 37663, 37691, 37693, 37699, 37717, 37747, 37781, 37783, 37799, 37811, 37813,
+									37831, 37847, 37853, 37861, 37871, 37879, 37889, 37897, 37907, 37951, 37957, 37963, 37967, 37987, 37991, 37993,
+									37997, 38011, 38039, 38047, 38053, 38069, 38083, 38113, 38119, 38149, 38153, 38167, 38177, 38183, 38189, 38197,
+									38201, 38219, 38231, 38237, 38239, 38261, 38273, 38281, 38287, 38299, 38303, 38317, 38321, 38327, 38329, 38333,
+									38351, 38371, 38377, 38393, 38431, 38447, 38449, 38453, 38459, 38461, 38501, 38543, 38557, 38561, 38567, 38569,
+									38593, 38603, 38609, 38611, 38629, 38639, 38651, 38653, 38669, 38671, 38677, 38693, 38699, 38707, 38711, 38713,
+									38723, 38729, 38737, 38747, 38749, 38767, 38783, 38791, 38803, 38821, 38833, 38839, 38851, 38861, 38867, 38873,
+									38891, 38903, 38917, 38921, 38923, 38933, 38953, 38959, 38971, 38977, 38993, 39019, 39023, 39041, 39043, 39047,
+									39079, 39089, 39097, 39103, 39107, 39113, 39119, 39133, 39139, 39157, 39161, 39163, 39181, 39191, 39199, 39209,
+									39217, 39227, 39229, 39233, 39239, 39241, 39251, 39293, 39301, 39313, 39317, 39323, 39341, 39343, 39359, 39367,
+									39371, 39373, 39383, 39397, 39409, 39419, 39439, 39443, 39451, 39461, 39499, 39503, 39509, 39511, 39521, 39541,
+									39551, 39563, 39569, 39581, 39607, 39619, 39623, 39631, 39659, 39667, 39671, 39679, 39703, 39709, 39719, 39727,
+									39733, 39749, 39761, 39769, 39779, 39791, 39799, 39821, 39827, 39829, 39839, 39841, 39847, 39857, 39863, 39869,
+									39877, 39883, 39887, 39901, 39929, 39937, 39953, 39971, 39979, 39983, 39989, 40009, 40013, 40031, 40037, 40039,
+									40063, 40087, 40093, 40099, 40111, 40123, 40127, 40129, 40151, 40153, 40163, 40169, 40177, 40189, 40193, 40213,
+									40231, 40237, 40241, 40253, 40277, 40283, 40289, 40343, 40351, 40357, 40361, 40387, 40423, 40427, 40429, 40433,
+									40459, 40471, 40483, 40487, 40493, 40499, 40507, 40519, 40529, 40531, 40543, 40559, 40577, 40583, 40591, 40597,
+									40609, 40627, 40637, 40639, 40693, 40697, 40699, 40709, 40739, 40751, 40759, 40763, 40771, 40787, 40801, 40813,
+									40819, 40823, 40829, 40841, 40847, 40849, 40853, 40867, 40879, 40883, 40897, 40903, 40927, 40933, 40939, 40949,
+									40961, 40973, 40993, 41011, 41017, 41023, 41039, 41047, 41051, 41057, 41077, 41081, 41113, 41117, 41131, 41141,
+									41143, 41149, 41161, 41177, 41179, 41183, 41189, 41201, 41203, 41213, 41221, 41227, 41231, 41233, 41243, 41257,
+									41263, 41269, 41281, 41299, 41333, 41341, 41351, 41357, 41381, 41387, 41389, 41399, 41411, 41413, 41443, 41453,
+									41467, 41479, 41491, 41507, 41513, 41519, 41521, 41539, 41543, 41549, 41579, 41593, 41597, 41603, 41609, 41611,
+									41617, 41621, 41627, 41641, 41647, 41651, 41659, 41669, 41681, 41687, 41719, 41729, 41737, 41759, 41761, 41771,
+									41777, 41801, 41809, 41813, 41843, 41849, 41851, 41863, 41879, 41887, 41893, 41897, 41903, 41911, 41927, 41941,
+									41947, 41953, 41957, 41959, 41969, 41981, 41983, 41999, 42013, 42017, 42019, 42023, 42043, 42061, 42071, 42073,
+									42083, 42089, 42101, 42131, 42139, 42157, 42169, 42179, 42181, 42187, 42193, 42197, 42209, 42221, 42223, 42227,
+									42239, 42257, 42281, 42283, 42293, 42299, 42307, 42323, 42331, 42337, 42349, 42359, 42373, 42379, 42391, 42397,
+									42403, 42407, 42409, 42433, 42437, 42443, 42451, 42457, 42461, 42463, 42467, 42473, 42487, 42491, 42499, 42509,
+									42533, 42557, 42569, 42571, 42577, 42589, 42611, 42641, 42643, 42649, 42667, 42677, 42683, 42689, 42697, 42701,
+									42703, 42709, 42719, 42727, 42737, 42743, 42751, 42767, 42773, 42787, 42793, 42797, 42821, 42829, 42839, 42841,
+									42853, 42859, 42863, 42899, 42901, 42923, 42929, 42937, 42943, 42953, 42961, 42967, 42979, 42989, 43003, 43013,
+									43019, 43037, 43049, 43051, 43063, 43067, 43093, 43103, 43117, 43133, 43151, 43159, 43177, 43189, 43201, 43207,
+									43223, 43237, 43261, 43271, 43283, 43291, 43313, 43319, 43321, 43331, 43391, 43397, 43399, 43403, 43411, 43427,
+									43441, 43451, 43457, 43481, 43487, 43499, 43517, 43541, 43543, 43573, 43577, 43579, 43591, 43597, 43607, 43609,
+									43613, 43627, 43633, 43649, 43651, 43661, 43669, 43691, 43711, 43717, 43721, 43753, 43759, 43777, 43781, 43783,
+									43787, 43789, 43793, 43801, 43853, 43867, 43889, 43891, 43913, 43933, 43943, 43951, 43961, 43963, 43969, 43973,
+									43987, 43991, 43997, 44017, 44021, 44027, 44029, 44041, 44053, 44059, 44071, 44087, 44089, 44101, 44111, 44119,
+									44123, 44129, 44131, 44159, 44171, 44179, 44189, 44201, 44203, 44207, 44221, 44249, 44257, 44263, 44267, 44269,
+									44273, 44279, 44281, 44293, 44351, 44357, 44371, 44381, 44383, 44389, 44417, 44449, 44453, 44483, 44491, 44497,
+									44501, 44507, 44519, 44531, 44533, 44537, 44543, 44549, 44563, 44579, 44587, 44617, 44621, 44623, 44633, 44641,
+									44647, 44651, 44657, 44683, 44687, 44699, 44701, 44711, 44729, 44741, 44753, 44771, 44773, 44777, 44789, 44797,
+									44809, 44819, 44839, 44843, 44851, 44867, 44879, 44887, 44893, 44909, 44917, 44927, 44939, 44953, 44959, 44963,
+									44971, 44983, 44987, 45007, 45013, 45053, 45061, 45077, 45083, 45119, 45121, 45127, 45131, 45137, 45139, 45161,
+									45179, 45181, 45191, 45197, 45233, 45247, 45259, 45263, 45281, 45289, 45293, 45307, 45317, 45319, 45329, 45337,
+									45341, 45343, 45361, 45377, 45389, 45403, 45413, 45427, 45433, 45439, 45481, 45491, 45497, 45503, 45523, 45533,
+									45541, 45553, 45557, 45569, 45587, 45589, 45599, 45613, 45631, 45641, 45659, 45667, 45673, 45677, 45691, 45697,
+									45707, 45737, 45751, 45757, 45763, 45767, 45779, 45817, 45821, 45823, 45827, 45833, 45841, 45853, 45863, 45869,
+									45887, 45893, 45943, 45949, 45953, 45959, 45971, 45979, 45989, 46021, 46027, 46049, 46051, 46061, 46073, 46091,
+									46093, 46099, 46103, 46133, 46141, 46147, 46153, 46171, 46181, 46183, 46187, 46199, 46219, 46229, 46237, 46261,
+									46271, 46273, 46279, 46301, 46307, 46309, 46327, 46337, 46349, 46351, 46381, 46399, 46411, 46439, 46441, 46447,
+									46451, 46457, 46471, 46477, 46489, 46499, 46507, 46511, 46523, 46549, 46559, 46567, 46573, 46589, 46591, 46601,
+									46619, 46633, 46639, 46643, 46649, 46663, 46679, 46681, 46687, 46691, 46703, 46723, 46727, 46747, 46751, 46757,
+									46769, 46771, 46807, 46811, 46817, 46819, 46829, 46831, 46853, 46861, 46867, 46877, 46889, 46901, 46919, 46933,
+									46957, 46993, 46997, 47017, 47041, 47051, 47057, 47059, 47087, 47093, 47111, 47119, 47123, 47129, 47137, 47143,
+									47147, 47149, 47161, 47189, 47207, 47221, 47237, 47251, 47269, 47279, 47287, 47293, 47297, 47303, 47309, 47317,
+									47339, 47351, 47353, 47363, 47381, 47387, 47389, 47407, 47417, 47419, 47431, 47441, 47459, 47491, 47497, 47501,
+									47507, 47513, 47521, 47527, 47533, 47543, 47563, 47569, 47581, 47591, 47599, 47609, 47623, 47629, 47639, 47653,
+									47657, 47659, 47681, 47699, 47701, 47711, 47713, 47717, 47737, 47741, 47743, 47777, 47779, 47791, 47797, 47807,
+									47809, 47819, 47837, 47843, 47857, 47869, 47881, 47903, 47911, 47917, 47933, 47939, 47947, 47951, 47963, 47969,
+									47977, 47981, 48017, 48023, 48029, 48049, 48073, 48079, 48091, 48109, 48119, 48121, 48131, 48157, 48163, 48179,
+									48187, 48193, 48197, 48221, 48239, 48247, 48259, 48271, 48281, 48299, 48311, 48313, 48337, 48341, 48353, 48371,
+									48383, 48397, 48407, 48409, 48413, 48437, 48449, 48463, 48473, 48479, 48481, 48487, 48491, 48497, 48523, 48527,
+									48533, 48539, 48541, 48563, 48571, 48589, 48593, 48611, 48619, 48623, 48647, 48649, 48661, 48673, 48677, 48679,
+									48731, 48733, 48751, 48757, 48761, 48767, 48779, 48781, 48787, 48799, 48809, 48817, 48821, 48823, 48847, 48857,
+									48859, 48869, 48871, 48883, 48889, 48907, 48947, 48953, 48973, 48989, 48991, 49003, 49009, 49019, 49031, 49033,
+									49037, 49043, 49057, 49069, 49081, 49103, 49109, 49117, 49121, 49123, 49139, 49157, 49169, 49171, 49177, 49193,
+									49199, 49201, 49207, 49211, 49223, 49253, 49261, 49277, 49279, 49297, 49307, 49331, 49333, 49339, 49363, 49367,
+									49369, 49391, 49393, 49409, 49411, 49417, 49429, 49433, 49451, 49459, 49463, 49477, 49481, 49499, 49523, 49529,
+									49531, 49537, 49547, 49549, 49559, 49597, 49603, 49613, 49627, 49633, 49639, 49663, 49667, 49669, 49681, 49697,
+									49711, 49727, 49739, 49741, 49747, 49757, 49783, 49787, 49789, 49801, 49807, 49811, 49823, 49831, 49843, 49853,
+									49871, 49877, 49891, 49919, 49921, 49927, 49937, 49939, 49943, 49957, 49991, 49993, 49999, 50021, 50023, 50033,
+									50047, 50051, 50053, 50069, 50077, 50087, 50093, 50101, 50111, 50119, 50123, 50129, 50131, 50147, 50153, 50159,
+									50177, 50207, 50221, 50227, 50231, 50261, 50263, 50273, 50287, 50291, 50311, 50321, 50329, 50333, 50341, 50359,
+									50363, 50377, 50383, 50387, 50411, 50417, 50423, 50441, 50459, 50461, 50497, 50503, 50513, 50527, 50539, 50543,
+									50549, 50551, 50581, 50587, 50591, 50593, 50599, 50627, 50647, 50651, 50671, 50683, 50707, 50723, 50741, 50753,
+									50767, 50773, 50777, 50789, 50821, 50833, 50839, 50849, 50857, 50867, 50873, 50891, 50893, 50909, 50923, 50929,
+									50951, 50957, 50969, 50971, 50989, 50993, 51001, 51031, 51043, 51047, 51059, 51061, 51071, 51109, 51131, 51133,
+									51137, 51151, 51157, 51169, 51193, 51197, 51199, 51203, 51217, 51229, 51239, 51241, 51257, 51263, 51283, 51287,
+									51307, 51329, 51341, 51343, 51347, 51349, 51361, 51383, 51407, 51413, 51419, 51421, 51427, 51431, 51437, 51439,
+									51449, 51461, 51473, 51479, 51481, 51487, 51503, 51511, 51517, 51521, 51539, 51551, 51563, 51577, 51581, 51593,
+									51599, 51607, 51613, 51631, 51637, 51647, 51659, 51673, 51679, 51683, 51691, 51713, 51719, 51721, 51749, 51767,
+									51769, 51787, 51797, 51803, 51817, 51827, 51829, 51839, 51853, 51859, 51869, 51871, 51893, 51899, 51907, 51913,
+									51929, 51941, 51949, 51971, 51973, 51977, 51991, 52009, 52021, 52027, 52051, 52057, 52067, 52069, 52081, 52103,
+									52121, 52127, 52147, 52153, 52163, 52177, 52181, 52183, 52189, 52201, 52223, 52237, 52249, 52253, 52259, 52267,
+									52289, 52291, 52301, 52313, 52321, 52361, 52363, 52369, 52379, 52387, 52391, 52433, 52453, 52457, 52489, 52501,
+									52511, 52517, 52529, 52541, 52543, 52553, 52561, 52567, 52571, 52579, 52583, 52609, 52627, 52631, 52639, 52667,
+									52673, 52691, 52697, 52709, 52711, 52721, 52727, 52733, 52747, 52757, 52769, 52783, 52807, 52813, 52817, 52837,
+									52859, 52861, 52879, 52883, 52889, 52901, 52903, 52919, 52937, 52951, 52957, 52963, 52967, 52973, 52981, 52999,
+									53003, 53017, 53047, 53051, 53069, 53077, 53087, 53089, 53093, 53101, 53113, 53117, 53129, 53147, 53149, 53161,
+									53171, 53173, 53189, 53197, 53201, 53231, 53233, 53239, 53267, 53269, 53279, 53281, 53299, 53309, 53323, 53327,
+									53353, 53359, 53377, 53381, 53401, 53407, 53411, 53419, 53437, 53441, 53453, 53479, 53503, 53507, 53527, 53549,
+									53551, 53569, 53591, 53593, 53597, 53609, 53611, 53617, 53623, 53629, 53633, 53639, 53653, 53657, 53681, 53693,
+									53699, 53717, 53719, 53731, 53759, 53773, 53777, 53783, 53791, 53813, 53819, 53831, 53849, 53857, 53861, 53881,
+									53887, 53891, 53897, 53899, 53917, 53923, 53927, 53939, 53951, 53959, 53987, 53993, 54001, 54011, 54013, 54037,
+									54049, 54059, 54083, 54091, 54101, 54121, 54133, 54139, 54151, 54163, 54167, 54181, 54193, 54217, 54251, 54269,
+									54277, 54287, 54293, 54311, 54319, 54323, 54331, 54347, 54361, 54367, 54371, 54377, 54401, 54403, 54409, 54413,
+									54419, 54421, 54437, 54443, 54449, 54469, 54493, 54497, 54499, 54503, 54517, 54521, 54539, 54541, 54547, 54559,
+									54563, 54577, 54581, 54583, 54601, 54617, 54623, 54629, 54631, 54647, 54667, 54673, 54679, 54709, 54713, 54721,
+									54727, 54751, 54767, 54773, 54779, 54787, 54799, 54829, 54833, 54851, 54869, 54877, 54881, 54907, 54917, 54919,
+									54941, 54949, 54959, 54973, 54979, 54983, 55001, 55009, 55021, 55049, 55051, 55057, 55061, 55073, 55079, 55103,
+									55109, 55117, 55127, 55147, 55163, 55171, 55201, 55207, 55213, 55217, 55219, 55229, 55243, 55249, 55259, 55291,
+									55313, 55331, 55333, 55337, 55339, 55343, 55351, 55373, 55381, 55399, 55411, 55439, 55441, 55457, 55469, 55487,
+									55501, 55511, 55529, 55541, 55547, 55579, 55589, 55603, 55609, 55619, 55621, 55631, 55633, 55639, 55661, 55663,
+									55667, 55673, 55681, 55691, 55697, 55711, 55717, 55721, 55733, 55763, 55787, 55793, 55799, 55807, 55813, 55817,
+									55819, 55823, 55829, 55837, 55843, 55849, 55871, 55889, 55897, 55901, 55903, 55921, 55927, 55931, 55933, 55949,
+									55967, 55987, 55997, 56003, 56009, 56039, 56041, 56053, 56081, 56087, 56093, 56099, 56101, 56113, 56123, 56131,
+									56149, 56167, 56171, 56179, 56197, 56207, 56209, 56237, 56239, 56249, 56263, 56267, 56269, 56299, 56311, 56333,
+									56359, 56369, 56377, 56383, 56393, 56401, 56417, 56431, 56437, 56443, 56453, 56467, 56473, 56477, 56479, 56489,
+									56501, 56503, 56509, 56519, 56527, 56531, 56533, 56543, 56569, 56591, 56597, 56599, 56611, 56629, 56633, 56659,
+									56663, 56671, 56681, 56687, 56701, 56711, 56713, 56731, 56737, 56747, 56767, 56773, 56779, 56783, 56807, 56809,
+									56813, 56821, 56827, 56843, 56857, 56873, 56891, 56893, 56897, 56909, 56911, 56921, 56923, 56929, 56941, 56951,
+									56957, 56963, 56983, 56989, 56993, 56999, 57037, 57041, 57047, 57059, 57073, 57077, 57089, 57097, 57107, 57119,
+									57131, 57139, 57143, 57149, 57163, 57173, 57179, 57191, 57193, 57203, 57221, 57223, 57241, 57251, 57259, 57269,
+									57271, 57283, 57287, 57301, 57329, 57331, 57347, 57349, 57367, 57373, 57383, 57389, 57397, 57413, 57427, 57457,
+									57467, 57487, 57493, 57503, 57527, 57529, 57557, 57559, 57571, 57587, 57593, 57601, 57637, 57641, 57649, 57653,
+									57667, 57679, 57689, 57697, 57709, 57713, 57719, 57727, 57731, 57737, 57751, 57773, 57781, 57787, 57791, 57793,
+									57803, 57809, 57829, 57839, 57847, 57853, 57859, 57881, 57899, 57901, 57917, 57923, 57943, 57947, 57973, 57977,
+									57991, 58013, 58027, 58031, 58043, 58049, 58057, 58061, 58067, 58073, 58099, 58109, 58111, 58129, 58147, 58151,
+									58153, 58169, 58171, 58189, 58193, 58199, 58207, 58211, 58217, 58229, 58231, 58237, 58243, 58271, 58309, 58313,
+									58321, 58337, 58363, 58367, 58369, 58379, 58391, 58393, 58403, 58411, 58417, 58427, 58439, 58441, 58451, 58453,
+									58477, 58481, 58511, 58537, 58543, 58549, 58567, 58573, 58579, 58601, 58603, 58613, 58631, 58657, 58661, 58679,
+									58687, 58693, 58699, 58711, 58727, 58733, 58741, 58757, 58763, 58771, 58787, 58789, 58831, 58889, 58897, 58901,
+									58907, 58909, 58913, 58921, 58937, 58943, 58963, 58967, 58979, 58991, 58997, 59009, 59011, 59021, 59023, 59029,
+									59051, 59053, 59063, 59069, 59077, 59083, 59093, 59107, 59113, 59119, 59123, 59141, 59149, 59159, 59167, 59183,
+									59197, 59207, 59209, 59219, 59221, 59233, 59239, 59243, 59263, 59273, 59281, 59333, 59341, 59351, 59357, 59359,
+									59369, 59377, 59387, 59393, 59399, 59407, 59417, 59419, 59441, 59443, 59447, 59453, 59467, 59471, 59473, 59497,
+									59509, 59513, 59539, 59557, 59561, 59567, 59581, 59611, 59617, 59621, 59627, 59629, 59651, 59659, 59663, 59669,
+									59671, 59693, 59699, 59707, 59723, 59729, 59743, 59747, 59753, 59771, 59779, 59791, 59797, 59809, 59833, 59863,
+									59879, 59887, 59921, 59929, 59951, 59957, 59971, 59981, 59999, 60013, 60017, 60029, 60037, 60041, 60077, 60083,
+									60089, 60091, 60101, 60103, 60107, 60127, 60133, 60139, 60149, 60161, 60167, 60169, 60209, 60217, 60223, 60251,
+									60257, 60259, 60271, 60289, 60293, 60317, 60331, 60337, 60343, 60353, 60373, 60383, 60397, 60413, 60427, 60443,
+									60449, 60457, 60493, 60497, 60509, 60521, 60527, 60539, 60589, 60601, 60607, 60611, 60617, 60623, 60631, 60637,
+									60647, 60649, 60659, 60661, 60679, 60689, 60703, 60719, 60727, 60733, 60737, 60757, 60761, 60763, 60773, 60779,
+									60793, 60811, 60821, 60859, 60869, 60887, 60889, 60899, 60901, 60913, 60917, 60919, 60923, 60937, 60943, 60953,
+									60961, 61001, 61007, 61027, 61031, 61043, 61051, 61057, 61091, 61099, 61121, 61129, 61141, 61151, 61153, 61169,
+									61211, 61223, 61231, 61253, 61261, 61283, 61291, 61297, 61331, 61333, 61339, 61343, 61357, 61363, 61379, 61381,
+									61403, 61409, 61417, 61441, 61463, 61469, 61471, 61483, 61487, 61493, 61507, 61511, 61519, 61543, 61547, 61553,
+									61559, 61561, 61583, 61603, 61609, 61613, 61627, 61631, 61637, 61643, 61651, 61657, 61667, 61673, 61681, 61687,
+									61703, 61717, 61723, 61729, 61751, 61757, 61781, 61813, 61819, 61837, 61843, 61861, 61871, 61879, 61909, 61927,
+									61933, 61949, 61961, 61967, 61979, 61981, 61987, 61991, 62003, 62011, 62017, 62039, 62047, 62053, 62057, 62071,
+									62081, 62099, 62119, 62129, 62131, 62137, 62141, 62143, 62171, 62189, 62191, 62201, 62207, 62213, 62219, 62233,
+									62273, 62297, 62299, 62303, 62311, 62323, 62327, 62347, 62351, 62383, 62401, 62417, 62423, 62459, 62467, 62473,
+									62477, 62483, 62497, 62501, 62507, 62533, 62539, 62549, 62563, 62581, 62591, 62597, 62603, 62617, 62627, 62633,
+									62639, 62653, 62659, 62683, 62687, 62701, 62723, 62731, 62743, 62753, 62761, 62773, 62791, 62801, 62819, 62827,
+									62851, 62861, 62869, 62873, 62897, 62903, 62921, 62927, 62929, 62939, 62969, 62971, 62981, 62983, 62987, 62989,
+									63029, 63031, 63059, 63067, 63073, 63079, 63097, 63103, 63113, 63127, 63131, 63149, 63179, 63197, 63199, 63211,
+									63241, 63247, 63277, 63281, 63299, 63311, 63313, 63317, 63331, 63337, 63347, 63353, 63361, 63367, 63377, 63389,
+									63391, 63397, 63409, 63419, 63421, 63439, 63443, 63463, 63467, 63473, 63487, 63493, 63499, 63521, 63527, 63533,
+									63541, 63559, 63577, 63587, 63589, 63599, 63601, 63607, 63611, 63617, 63629, 63647, 63649, 63659, 63667, 63671,
+									63689, 63691, 63697, 63703, 63709, 63719, 63727, 63737, 63743, 63761, 63773, 63781, 63793, 63799, 63803, 63809,
+									63823, 63839, 63841, 63853, 63857, 63863, 63901, 63907, 63913, 63929, 63949, 63977, 63997, 64007, 64013, 64019,
+									64033, 64037, 64063, 64067, 64081, 64091, 64109, 64123, 64151, 64153, 64157, 64171, 64187, 64189, 64217, 64223,
+									64231, 64237, 64271, 64279, 64283, 64301, 64303, 64319, 64327, 64333, 64373, 64381, 64399, 64403, 64433, 64439,
+									64451, 64453, 64483, 64489, 64499, 64513, 64553, 64567, 64577, 64579, 64591, 64601, 64609, 64613, 64621, 64627,
+									64633, 64661, 64663, 64667, 64679, 64693, 64709, 64717, 64747, 64763, 64781, 64783, 64793, 64811, 64817, 64849,
+									64853, 64871, 64877, 64879, 64891, 64901, 64919, 64921, 64927, 64937, 64951, 64969, 64997, 65003, 65011, 65027,
+									65029, 65033, 65053, 65063, 65071, 65089, 65099, 65101, 65111, 65119, 65123, 65129, 65141, 65147, 65167, 65171,
+									65173, 65179, 65183, 65203, 65213, 65239, 65257, 65267, 65269, 65287, 65293, 65309, 65323, 65327, 65353, 65357,
+									65371, 65381, 65393, 65407, 65413, 65419, 65423, 65437, 65447, 65449, 65479, 65497, 65519, 65521, 65537};
+
+	unsigned int i;
+	unsigned int nextPrime;
+	unsigned int nextSqrootIndex;
+
+	if (number <= 65537) {
+		for (i=0; i<6543; i++) {
+			if (prime[i] >= number) {
+				return prime[i];
+			}
+		}
+	} else {
+		if (number > 4294967291UL) {	// this is the largest 32 bit prime
+			if (number > 4294967293UL) {
+				return 4294967295UL;	// 4294967295 = 3*5*17*257*65537
+			} else {
+				return 4294967293UL;	// 4294967293 = 9241*464773
+			}
+		}
+		if (number % 2 == 0) {
+			nextPrime = number + 1;
+		} else {
+			nextPrime = number;
+		}
+		for (nextSqrootIndex=54; nextSqrootIndex<6542; nextSqrootIndex++) {	// the 54th prime is 251; 251*251 = 63001 < 65538
+																			// the 55th prime is 257; 257*257 = 66049 > 65538
+			if (prime[nextSqrootIndex] * prime[nextSqrootIndex] > nextPrime) {
+				break;
+			}
+
+		}
+		i = 1;
+		while (TRUE) {
+			while (i<nextSqrootIndex) {
+				if (nextPrime % prime[i] == 0) {
+					nextPrime += 2;
+					i = 0;
+				}
+				i++;
+			}
+			if (i < 6542 && prime[i] * prime[i] == nextPrime) {
+				nextSqrootIndex++;
+				nextPrime += 2;
+				i = 1;
+			} else {
+				return nextPrime;
+			}
+		}
+	}
+
+	fprintf(stderr, "nextPrime(): unexpected error!\n");
+	exit(1);
+}
+
+unsigned int popCount(const unsigned int bitVector) {
+
+	unsigned int x;
+
+	x = bitVector;
+
+	x -= ((x >> 1) & 0x55555555);
+	x = (((x >> 2) & 0x33333333) + (x & 0x33333333));
+	x = (((x >> 4) + x) & 0x0f0f0f0f);
+	x += (x >> 8);
+	x += (x >> 16);
+	return(x & 0x0000003f);
+
+}
+
diff --git a/MiscUtilities.h b/MiscUtilities.h
new file mode 100644
index 0000000..5ff4d0d
--- /dev/null
+++ b/MiscUtilities.h
@@ -0,0 +1,83 @@
+/*
+
+   MiscUtilities.h		Miscellaneous Utilities
+
+   This module contains miscellaneous utility functions.
+
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#ifndef __MISC_UTILITIES_H__
+#define __MISC_UTILITIES_H__
+
+#include "TypeNLimit.h"
+#include "stdio.h"
+
+#define init(variable)							variable = 0;	// this is for avoiding compiler warning
+																// disable it if compiler becomes smarter!
+
+#define truncateRight(value, offset)			( (value) >> (offset) << (offset) )
+#define truncateLeft(value, offset)				( (value) << (offset) >> (offset) )
+// alignBoundary must be power of 2
+#define nextAlignedBoundary(offset, alignBoundary)	( ((offset) + (alignBoundary) - 1) & (- (alignBoundary)) )
+#define lastAlignedBoundary(offset, alignBoundary)		( (offset) & (- (alignBoundary)) )
+#define average(value1, value2)					( ((value1) & (value2)) + ((value1) ^ (value2)) / 2 )
+#define min(value1, value2)						( ((value1) < (value2)) ? (value1) : (value2) )
+#define max(value1, value2)						( ((value1) > (value2)) ? (value1) : (value2) )
+#define med3(a, b, c)							( a<b ? (b<c ? b : a<c ? c : a) : (b>c ? b : a>c ? c : a))
+#define med3Index(key, ia, ib, ic)				( key[ia]<key[ib] ? (key[ib]<key[ic] ? ib : key[ia]<key[ic] ? ic : ia) : (key[ib]>key[ic] ? ib : key[ia]>key[ic] ? ic : ia))
+#define swap(a, b, t);							t = a; a = b; b = t;
+
+void Dust(const unsigned int len, unsigned char *pattern, const unsigned int level, const unsigned int window, const unsigned int word);
+
+void LimitCodeGenerateCodeTable(const unsigned int limit, unsigned int** codeValue, unsigned int** codeLength);
+
+int QSortUnsignedIntOrder(const void *data, const int index1, const int index2);
+void QSort(void* __restrict data, const int numData, const int dataWidth, int (*QSortComp)(const void*, const int, const int) );
+
+unsigned int checkDuplicate(int *input, const unsigned int numItem, const int minValue, const int maxValue, char* text);
+unsigned int leadingZero(const unsigned int input);
+unsigned int ceilLog2(const unsigned int input);
+unsigned int floorLog2(const unsigned int input);
+unsigned int power(const unsigned int base, const unsigned int power);
+void formatVALAsBinary(const unsigned int input, char* output, unsigned int bitGroup);
+unsigned int getRandomSeed();
+
+void ConvertBytePackedDNAToWordPacked(const unsigned char *input, unsigned int *output, const unsigned int textLength);
+
+
+unsigned int reverseBit(unsigned int x);
+void initializeVAL(unsigned int *startAddr, const unsigned int length, const unsigned int initValue);
+void initializeCHAR(unsigned char *startAddr, const unsigned int length, const unsigned char initValue);
+unsigned int numberOfMatchInVAL(unsigned int *startAddr, const unsigned int length, const unsigned int searchValue);
+unsigned int numberOfMatchInCHAR(unsigned char *startAddr, const unsigned int length, const unsigned char searchValue);
+
+void bitCopyNoDestOffset(unsigned int *destinationAddress, const unsigned int *sourceAddress,
+							int sourceBitOffset, int copyLengthInBit);
+void bitCopyNoDestBitOffset(unsigned int *destinationAddress, int destinationWordOffset,
+							const unsigned int *sourceAddress, int sourceWordOffset,
+							int sourceBitOffset, int copyLengthInBit);
+unsigned int bitCopy(unsigned int *destinationAddress, int destinationWordOffset, int destinationBitOffset,
+			 const unsigned int *sourceAddress, int sourceBitOffset, int copyLengthInBit);
+
+unsigned int nextPrime(const unsigned int number);
+unsigned int popCount(const unsigned int bitVector);
+
+#endif
+
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..8bb8173
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,6 @@
+Beta Release 2.20 (5 May, 2010)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First source code release version.
+
+(2.20: 5 May, 2010)
diff --git a/PairMatch.c b/PairMatch.c
new file mode 100644
index 0000000..b963c4b
--- /dev/null
+++ b/PairMatch.c
@@ -0,0 +1,483 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  PariMatch.c
+ *
+ *    Description:  
+ *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or aboAve
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+
+
+#include "Match.h"
+
+inline int CheckIns(HITITEM *p, HITITEM *q, PEAUX *o) {
+	int strain1 = (p->info >> 24)&1;
+	int strain2 = (q->info >> 24)&1;
+	if(p->chr != q->chr || strain1 == strain2) return FALSE;
+	else if(o->FR) {
+		if(!strain1 && q->pos-p->pos+o->len >= o->min_ins && q->pos-p->pos+o->len <= o->max_ins) return TRUE;
+		else if(strain1 && p->pos-q->pos+o->len >= o->min_ins && p->pos-q->pos+o->len <= o->max_ins) return TRUE;
+		else{ 
+			return FALSE;
+		};
+	}
+	else if(!o->FR){
+		if(strain1 && q->pos-p->pos >= o->min_ins && q->pos-p->pos+o->len <= o->max_ins) return TRUE;
+		else if(!strain1 && p->pos-q->pos >= o->min_ins && p->pos-q->pos+o->len <= o->max_ins) return TRUE;
+		else{
+			return FALSE;
+		}
+	}
+	return TRUE;
+}
+
+int HITCMP(const void *a, const void *b){
+       if ((*(HITITEM *)a).chr != (*(HITITEM *)b).chr){
+		return (*(HITITEM *)a).chr - (*(HITITEM *)b).chr;
+	} else 
+		return (*(HITITEM *)a).pos - (*(HITITEM *)b).pos;
+}
+
+int GenPair(HITTABLE **hitse, PEAUX *po,  HITTABLE **hitpe) {
+	if(!hitse[0]->n || !hitse[1]->n) return 0;
+	HITITEM *p, *q;
+	p = hitse[0]->itemList; q = hitse[1]->itemList;
+	const int cutoff = po->cutoff;
+	if(hitse[0]->n == 1 && hitse[1]->n == 1 ){
+		if(CheckIns(p, q, po)){
+			HITCPY(hitpe[0]->itemList+hitpe[0]->n, p);
+			HITCPY(hitpe[1]->itemList+hitpe[1]->n, q);
+			hitpe[0]->n++; hitpe[1]->n++;
+			return 1;
+		}else{
+			return  0;
+		}
+	}else{
+		if(hitse[0]->n > 1)qsort(hitse[0]->itemList, hitse[0]->n, sizeof(HITITEM), HITCMP);
+		if(hitse[1]->n > 1)qsort(hitse[1]->itemList, hitse[1]->n, sizeof(HITITEM), HITCMP);
+		int n = hitpe[0]->n;
+//		fprintf(stderr, "%d\n", n);
+		p=hitse[0]->itemList; q = hitse[1]->itemList;
+		while (p!=hitse[0]->itemList+hitse[0]->n) {
+			while(p!=hitse[0]->itemList+hitse[0]->n && p->chr<q->chr)p++;
+			while(q!=hitse[1]->itemList+hitse[1]->n && p->chr>q->chr)q++;
+			if (p==(hitse[0]->itemList+hitse[0]->n) || q==(hitse[1]->itemList+hitse[1]->n)) return n ;
+			while (p->chr==q->chr && q!=(hitse[1]->itemList+hitse[1]->n)){
+				if (CheckIns(p, q, po)) {
+					HITCPY(hitpe[0]->itemList+n, p);
+					HITCPY(hitpe[1]->itemList+n, q);
+					n++; hitpe[0]->n++; hitpe[1]->n++;
+					if(n >= cutoff){
+						hitpe[0]->n = hitpe[1]->n = n;
+						return n;
+					}
+				}
+				q++;
+			}
+			p++;
+			q = hitse[1]->itemList;
+		}
+		hitpe[0]->n = hitpe[1]->n = n;
+		return n;
+	}
+}
+
+#if 1	
+unsigned short *SWRescue(const ALNSEQ *alnSeq, const BWTOPT *bo, const PEAUX *po, const int rescue, HITTABLE **hitse, HITTABLE **hitpe, int *nc, int *n_rescue){
+
+	HITITEM *hitf = hitse[rescue^1]->itemList;
+	int nfound = hitse[rescue^1]->n;
+
+	const unsigned int *pacRef = bo->pacRef;
+	const unsigned int dnaLen = bo->dnaLen;
+	char *seq;
+	ChrBlock *blockList = bo->blockList;
+
+	int minIns, maxIns, len, keyLength, n;
+	unsigned int occPos, beg;
+
+	keyLength = alnSeq->len;
+	minIns = po->min_ins; maxIns = po->max_ins;
+	len = maxIns-minIns+3*keyLength;
+	occPos = beg = 0;
+	AlnParam ap = aln_param_bwa;
+
+	path_t *path, *p;
+	int i, path_len, n_cigar;
+	path_len = n_cigar = 0;
+
+	cigar_t * cigar = NULL;
+	path = (path_t *)calloc((len+keyLength), sizeof(path_t));
+
+
+	unsigned char *refSeq = (unsigned char *)calloc(len, sizeof(unsigned char));
+
+	int SWCutoffX, SWCutoffY;
+	SWCutoffX = SWCutoffY = bo->min_len < keyLength ? bo->min_len : (keyLength < 17 ? keyLength : 17);
+
+	HITITEM *peItem1, *peItem2;
+	peItem1 = hitpe[rescue^1]->itemList;
+	peItem2 = hitpe[rescue]->itemList;
+
+	int mm = 10;
+
+	int n_mm, n_gapo, n_gape, gap_beg, ed_dist;
+	n_mm = n_gapo = n_gape = gap_beg = 0;
+	ed_dist = keyLength;
+
+	unsigned short tmp_cigar[16];
+
+//	fprintf(stderr, "%d\n", nfound);
+	for (i = 0; i < nfound; ++i) {
+
+		occPos = (hitf+i)->occ_pos;
+
+		if((((hitf+i)->info>>24)&0x7) > mm) continue;
+		mm = (hitf+i)->info>>24&0x7;
+		n = (hitf+i)->blockid;
+		if(po->FR ^ (hitf+i)->strain) {
+			beg = occPos + minIns - keyLength;
+			if(beg + len >= (blockList + n)->blockEnd) continue;
+			seq = (hitf+i)->strain ? alnSeq->seq : alnSeq->rc;
+		} else {
+			beg = occPos - maxIns - keyLength;
+			if(beg < (blockList + n)->blockStart) continue;
+			seq = (hitf + i)->strain ? alnSeq->seq : alnSeq->rc;
+		}
+
+		{
+			unsigned char *p = refSeq;
+			unsigned int j, l;
+			for(j=beg, l=0; l<len && j<dnaLen; ++l, ++j)
+				*p++ = (((*(pacRef+(j>>4)))>>(((~j)&0xf)<<1))&0x3);
+		}
+
+//		fprintf(stderr, "%d	n_cigar %d\n", i, n_cigar);
+		if (n_cigar) {free(cigar); n_cigar = 0;}
+//		fprintf(stderr, "%d	n_cigar %d\n", i, n_cigar);
+
+		aln_local_core(refSeq, len, (unsigned char *)seq, keyLength, &ap, path, &path_len, 1);
+		cigar = aln_path2cigar(path, path_len, &n_cigar);
+		int k, x, y;
+		x = y = k = 0;
+		for (k = 0, x = y = 0; k < n_cigar; ++k) {
+			unsigned short c = cigar[k];
+			if (c>>14 == FROM_M) x += c&0x3fff, y += c&0x3fff;
+			else if (c>>14 == FROM_D) x += c&0x3fff;
+			else y += c&0x3fff;
+		}
+
+		if (x < SWCutoffX  && y < SWCutoffY) continue;
+
+		{ // update cigar and coordinate;
+			SWCutoffX = x; SWCutoffY = y;
+			int start, end;
+			p = path + path_len - 1;
+			beg += (p->i? p->i : 1) - 1;
+			start = (p->j? p->j : 1) - 1;
+			end = path->j;
+			cigar = (unsigned short*)realloc(cigar, 2 * (n_cigar + 2));
+			if (start) {
+				memmove(cigar + 1, cigar, 2 * (n_cigar));
+				cigar[0] = 3<<14 | start;
+				++(n_cigar);
+			}
+			if (end < keyLength) {
+				cigar[n_cigar] = 3<<14 | (keyLength - end);
+				++(n_cigar);
+			}
+		}
+
+		n_mm = n_gapo = n_gape = gap_beg = 0;
+		int indel = 3;
+		{
+			p = path + path_len - 1;
+			x = p->i? p->i - 1 : 0; y = p->j? p->j - 1 : 0;
+			int l=0;
+			for (k = 0; k < n_cigar; ++k) {
+				unsigned short c = cigar[k];
+				if (c>>14 == FROM_M) {
+					for (l = 0; l < (c&0x3fff); ++l)
+						if (refSeq[x+l] < 4 && seq[y+l] < 4 && refSeq[x+l] != seq[y+l]) ++n_mm;
+					x += c&0x3fff, y += c&0x3fff;
+				} else if (c>>14 == FROM_D) {
+					indel = 3; gap_beg = y; x += c&0x3fff; ++n_gapo; n_gape += (c&0x3fff) - 1;
+				} else if (c>>14 == FROM_I){
+					indel = 4; gap_beg = y; y += c&0x3fff; ++n_gapo; n_gape += (c&0x3fff) - 1;
+				}
+			}
+			if(n_mm >= bo->max_mm || n_gapo > 1 || n_gape + n_gapo > bo->gap_len) continue;
+			if (!n_gapo) indel=0;
+		}
+
+		*n_rescue += 1;
+
+		if (n_gape + n_gapo + n_mm < ed_dist)
+		{// update pe hit
+			hitpe[rescue^1]->n = hitpe[rescue]->n = 1;
+			HITCPY(peItem1, hitf+i);
+			peItem2->chr = peItem1->chr;
+			peItem2->pos = beg-occPos+(hitf+i)->pos;
+			peItem2->occ_pos = beg;
+			peItem2->strain = 1 ^ peItem1->strain;
+			peItem2->n_mm = n_mm;
+			peItem2->n_gapo = n_gapo;
+			peItem2->n_gape = n_gape;
+			peItem2->info = 0;
+			peItem2->info |= ((indel<<25) | ((gap_beg&0xff)<<12) | ((n_gape+1)&0xff));
+			peItem2->gap_beg = gap_beg;
+			peItem2->n_cigar = n_cigar;
+			ed_dist = n_gape + n_gapo + n_mm;
+			for(k=0; k < n_cigar; ++k)tmp_cigar[k] = cigar[k];
+		}
+
+	}
+
+	if (*n_rescue) {
+		if (n_cigar < peItem2->n_cigar) cigar = (unsigned short *) calloc (peItem2->n_cigar, sizeof(unsigned short));
+		for(i = 0; i < peItem2->n_cigar; ++i) cigar[i] = tmp_cigar[i];
+		*nc = peItem2->n_cigar;
+	}
+
+	free(path);
+	free(refSeq);
+	return cigar;
+
+}
+
+#endif
+
+void PEAlnCore(int tid, MULTISEQ *mseqs, BWT *bwt, BWT *rev_bwt, LOOKUPTABLE *lookup, LOOKUPTABLE *rev_lookup, HSP *hsp,const SOAPOPT *opt) {
+	int i;
+	ALNSEQ *alnSeq[2];
+ 	HITTABLE *hitse[2], *hitpe[2];
+	hitse[0] = (HITTABLE *)malloc(sizeof(HITTABLE));
+	hitse[1] = (HITTABLE *)malloc(sizeof(HITTABLE));
+	hitpe[0] = (HITTABLE *)malloc(sizeof(HITTABLE));
+	hitpe[1] = (HITTABLE *)malloc(sizeof(HITTABLE));
+	hitse[0]->itemList = (HITITEM *) malloc (sizeof(HITITEM) * MAX_ALN);
+	hitse[1]->itemList = (HITITEM *) malloc (sizeof(HITITEM) * MAX_ALN);
+	hitpe[0]->itemList = (HITITEM *) malloc (sizeof(HITITEM) * (MAX_ALN+1));
+	hitpe[1]->itemList = (HITITEM *) malloc (sizeof(HITITEM) * (MAX_ALN+1));
+	const int multiTotal = mseqs->n;
+	PEAUX pe_aux;
+	BWTOPT boA, boB;
+	int mode, cutoff, ns, seedLen, rr;
+	mode = opt->mode; cutoff = opt->cutoff; ns = opt->ns; seedLen = opt->aln_len; rr = opt->rr;
+	boB.nblock = boA.nblock = hsp->numOfBlock;boB.blockList = boA.blockList = hsp->blockList;
+	boB.cutoff=boA.cutoff = MAX_ALN; boB.gap_len = boA.gap_len = opt->gap_len; boB.gap_fb = boA.gap_fb = opt->gap_fb;
+	boB.max_mm = boA.max_mm = opt->max_mm; boB.pacRef = boA.pacRef = hsp->packedDNA;
+	boB.dnaLen = boA.dnaLen = hsp->dnaLength;
+	boA.min_len = boB.min_len = opt->min_len;
+	boA.h = boA.x = boA.y = boB.h = boB.x = boB.y = 0;
+        pe_aux.min_ins = opt->min_ins; pe_aux.max_ins = opt->max_ins; 
+	pe_aux.FR = opt->FR; pe_aux.len = 0; pe_aux.cutoff = MAX_ALN;
+	int x = 0;
+	int se, pe, non;
+	se=pe=non=0;
+	double swBeg, swTime;
+	swBeg = swTime = 0;
+//	int swRun=0;
+	int nRescue = 0;
+	for(i=0; i < multiTotal; i += 2){
+//		fprintf(stderr, "%d\n", i);
+#ifdef  PTHREADS
+		if (opt->nthreads > 1) {
+			pthread_mutex_lock(&lock);
+			ALNSEQ *p = mseqs->seqList+i;
+			if (p->tid < 0) {
+				int j;
+				int pend = multiTotal-i;
+				for (j = 0; j < pend && j < NSEQ_PER_THREAD; j+=2){
+					(p+j)->tid = (p+j+1)->tid = tid;
+				}
+			} else if (p->tid != tid) {
+				pthread_mutex_unlock(&lock);
+				continue;
+			}
+			pthread_mutex_unlock(&lock);
+		}
+#endif
+
+		alnSeq[0] = mseqs->seqList+i;
+		alnSeq[1] = mseqs->seqList+i+1;
+		hitse[0]->n = hitse[1]->n = hitpe[0]->n = hitpe[1]->n = 0;
+
+		if(alnSeq[0]->ns <= ns || alnSeq[1]->ns <= ns){
+			int nc = 0;
+			nRescue = 0;
+			x+=2;
+			int ah0, ah1, ah2, bh0, bh1, bh2, ah3, bh3;
+			boA.seqLen = boA.alnLen = alnSeq[0]->len;
+			boB.seqLen = boB.alnLen = alnSeq[1]->len;
+			unsigned int extLen = 0;
+			pe_aux.len = alnSeq[0]->len;
+			boA.fw = alnSeq[0]->seq; boA.rc = alnSeq[0]->rc;
+			boB.fw = alnSeq[1]->seq; boB.rc = alnSeq[1]->rc;
+
+ALIGN:
+			ah0 = ah1 = ah2 = ah3 = bh0 = bh1 = bh2 = bh3 = 0;
+			boA.h = boA.alnLen>>1;
+			boA.x = boA.y = boA.alnLen>=39?boA.alnLen/3:(boA.alnLen>=32 && boA.alnLen<39)?10:7;
+			boB.h = boB.alnLen>>1;
+			boB.x = boB.y = boB.alnLen>=39?boB.alnLen/3:(boB.alnLen>=32 && boB.alnLen<39)?10:7;
+			switch (mode) {
+				case 5:
+				case 4: cutoff = opt->cutoff;
+				case 0:
+					ah0  = BWTExactMatching((unsigned char*)alnSeq[0]->seq, &boA, FORWARD, bwt, lookup, hitse[0]);
+					ah0 += BWTExactMatching((unsigned char*)alnSeq[0]->rc+extLen, &boA, REVERSE, bwt, lookup, hitse[0]);
+					bh0  = BWTExactMatching((unsigned char*)alnSeq[1]->seq, &boB, FORWARD, bwt, lookup, hitse[1]);
+					bh0 += BWTExactMatching((unsigned char*)alnSeq[1]->rc+extLen, &boB, REVERSE, bwt, lookup, hitse[1]);
+					if (ah0 && bh0) {
+							 GenPair(hitse, &pe_aux, hitpe);}
+					if (hitpe[0]->n >= cutoff || mode == 0) break;
+				case 1:
+					ah1  = BWT1ErrorMatching((unsigned char*)alnSeq[0]->seq, &boA,  FORWARD, bwt, rev_bwt, lookup, rev_lookup, hitse[0]);
+					ah1 += BWT1ErrorMatching((unsigned char*)alnSeq[0]->rc+extLen, &boA, REVERSE, bwt, rev_bwt, lookup, rev_lookup, hitse[0]);
+					bh1  = BWT1ErrorMatching((unsigned char*)alnSeq[1]->seq, &boB, FORWARD, bwt, rev_bwt, lookup, rev_lookup, hitse[1]);
+					bh1 += BWT1ErrorMatching((unsigned char*)alnSeq[1]->rc+extLen, &boB, REVERSE, bwt, rev_bwt, lookup, rev_lookup, hitse[1]);
+					if (ah1 || bh1) {
+							 GenPair(hitse, &pe_aux, hitpe);}
+					if (hitpe[0]->n >= cutoff || mode == 1) break;
+				case 2:
+					ah2  = BWT2ErrorMatching((unsigned char*)alnSeq[0]->seq, &boA, FORWARD, bwt, rev_bwt,  lookup, rev_lookup, hitse[0]);
+					ah2 += BWT2ErrorMatching((unsigned char*)alnSeq[0]->rc+extLen, &boA, REVERSE, bwt, rev_bwt,  lookup, rev_lookup, hitse[0]);
+					bh2  = BWT2ErrorMatching((unsigned char*)alnSeq[1]->seq, &boB, FORWARD, bwt, rev_bwt, lookup, rev_lookup, hitse[1]);
+					bh2 += BWT2ErrorMatching((unsigned char*)alnSeq[1]->rc+extLen, &boB, REVERSE, bwt, rev_bwt,  lookup, rev_lookup, hitse[1]);
+					if (ah2 || bh2){ 
+							 GenPair(hitse, &pe_aux, hitpe);}
+					if (hitpe[0]->n >= cutoff || mode == 4 || mode == 2) break;
+			}
+
+			if (seedLen<boB.alnLen) {
+				if (!hitse[0]->n && !hitse[1]->n && (seedLen<boA.alnLen && seedLen< boB.alnLen)) {
+					boB.alnLen = boA.alnLen = seedLen;
+					boA.extLen = alnSeq[0]->len-seedLen;
+					boB.extLen = alnSeq[1]->len-seedLen;
+					if (alnSeq[0]->len < seedLen || alnSeq[1]->len <seedLen){
+						fprintf(stderr, "read_len shorter than seed_len%d. Continue\n", seedLen);
+						goto OUTPUT;
+					}
+					goto ALIGN;
+				} else if (!hitpe[0]->n &&  !hitse[0]->n && seedLen<boA.alnLen) {
+					if (alnSeq[0]->len < seedLen){
+						fprintf(stderr, "read_len shorter than seed_len%d. Continue\n", seedLen);
+						goto OUTPUT;
+					}
+					boA.alnLen = seedLen; boA.extLen = alnSeq[0]->len - seedLen; boA.h = boA.alnLen>>1;
+					boA.x = boA.y = boA.alnLen>=39?boA.alnLen/3:(boA.alnLen>=32 && boA.alnLen<39)?10:7;
+					ah0  = BWTExactMatching((unsigned char*)alnSeq[0]->seq, &boA, FORWARD, bwt, lookup, hitse[0]);
+					ah0 += BWTExactMatching((unsigned char*)alnSeq[0]->rc+boA.extLen, &boA, REVERSE, bwt, lookup, hitse[0]);
+					if (ah0 && GenPair(hitse, &pe_aux, hitpe)) goto OUTPUT;
+					ah1  = BWT1ErrorMatching((unsigned char*)alnSeq[0]->seq, &boA,  FORWARD, bwt, rev_bwt, lookup, rev_lookup, hitse[0]);
+					ah1 += BWT1ErrorMatching((unsigned char*)alnSeq[0]->rc+boA.extLen, &boA, REVERSE, bwt, rev_bwt, lookup, rev_lookup, hitse[0]);
+					if (ah1 && GenPair(hitse, &pe_aux,  hitpe))goto OUTPUT;
+					ah2  = BWT2ErrorMatching((unsigned char*)alnSeq[0]->seq, &boA, FORWARD, bwt, rev_bwt, lookup, rev_lookup, hitse[0]);
+					ah2 += BWT2ErrorMatching((unsigned char*)alnSeq[0]->rc+boA.extLen, &boA, REVERSE, bwt, rev_bwt, lookup, rev_lookup, hitse[0]);
+					if (ah2 && GenPair(hitse, &pe_aux, hitpe))goto OUTPUT;
+				} else if (!hitpe[1]->n &&  !hitse[1]->n &&  seedLen <boB.alnLen) {
+					if (alnSeq[1]->len < seedLen){
+						fprintf(stderr, "read_len shorter than seed_len%d. Continue\n", seedLen);
+						goto OUTPUT;
+					}
+					boB.alnLen = seedLen; boB.extLen = alnSeq[1]->len - seedLen; boB.h = boB.alnLen>>1;
+					boB.x = boB.y = boB.alnLen>=39?boB.alnLen/3:(boB.alnLen>=32 && boB.alnLen<39)?10:7;
+					bh0  = BWTExactMatching((unsigned char*)alnSeq[1]->seq, &boB, FORWARD, bwt, lookup, hitse[1]);
+					bh0 += BWTExactMatching((unsigned char*)alnSeq[1]->rc+boB.extLen, &boB, REVERSE, bwt, lookup, hitse[1]);
+					if(bh0 && GenPair(hitse, &pe_aux,  hitpe)) goto OUTPUT;
+					bh1  = BWT1ErrorMatching((unsigned char*)alnSeq[1]->seq, &boB, FORWARD, bwt, rev_bwt, lookup, rev_lookup, hitse[1]);
+					bh1 += BWT1ErrorMatching((unsigned char*)alnSeq[1]->rc+boB.extLen, &boB, REVERSE, bwt, rev_bwt, lookup, rev_lookup, hitse[1]);
+					if(bh1 && GenPair(hitse, &pe_aux, hitpe)) goto OUTPUT;
+					bh2  = BWT2ErrorMatching((unsigned char*)alnSeq[1]->seq, &boB, FORWARD, bwt, rev_bwt, lookup, rev_lookup, hitse[1]);
+					bh2 += BWT2ErrorMatching((unsigned char*)alnSeq[1]->rc+boB.extLen, &boB, REVERSE, bwt, rev_bwt, lookup, rev_lookup, hitse[1]);
+					if(bh2 && GenPair(hitse, &pe_aux,  hitpe)) goto OUTPUT;
+				}
+			}
+			
+			unsigned short * cigar = NULL;
+//			if (hitse[1]->n && !hitse[0]->n && boA.gap_len){                       /* gap goto sw */
+			if (hitse[1]->n && !hitse[0]->n && (boA.min_len < alnSeq[0]->len || boA.gap_len)){
+//				swBeg = setStartTime();
+				cigar = SWRescue(alnSeq[0], &boA, &pe_aux, 0, hitse, hitpe, &nc, &nRescue); 
+//				swTime += getElapsedTime(swBeg);
+//				swRun++;
+				goto OUTPUT;
+//			} else if (!hitse[1]->n && hitse[0]->n && boA.gap_len) {            /* gap goto sw */
+			} else if (!hitse[1]->n && hitse[0]->n && (boB.min_len < alnSeq[1]->len || boA.gap_len)) {
+//				swBeg = setStartTime();
+				cigar = SWRescue(alnSeq[1], &boB, &pe_aux, 1, hitse, hitpe, &nc, &nRescue);
+				
+//				swTime += getElapsedTime(swBeg);
+//				swRun++;
+				goto OUTPUT;
+			}
+
+OUTPUT:
+			if (hitpe[0]->n && hitpe[1]->n){
+				pe+=2;
+//				assert(hitpe[0]->n==hitpe[1]->n);
+//				printf("site: %d\n", hitpe[0]->n);
+				int site = (hitpe[0]->n == 1)?0:rand()%hitpe[0]->n;
+				alnSeq[0]->flag = alnSeq[1]->flag = 0x3;
+//				printf("site: %d\n", site);
+//				printf("out:%d\t%d\n", hitpe[0]->itemList[site].n_cigar, nc);
+				PickupHit(alnSeq[0], rr, &site, hitpe[0], hsp->packedDNA, hsp->dnaLength, cigar);
+//				printf("out:%d\n", hitpe[1]->itemList[site].n_cigar);
+				PickupHit(alnSeq[1], rr, &site, hitpe[1], hsp->packedDNA, hsp->dnaLength, cigar);
+				if (nRescue) alnSeq[0]->nhits = alnSeq[1]->nhits = nRescue;
+			} else {
+				int site = 0;
+				if (hitse[0]->n && hitse[1]->n) {
+					se+=2;
+					site = hitse[0]->n == 1?0:rand()%hitse[0]->n;
+					PickupHit(alnSeq[0], rr, &site, hitse[0], hsp->packedDNA, hsp->dnaLength, cigar);
+					site = hitse[1]->n == 1?0:rand()%hitse[1]->n;
+					PickupHit(alnSeq[1], rr, &site, hitse[1], hsp->packedDNA, hsp->dnaLength, cigar);
+					alnSeq[0]->flag = alnSeq[1]->flag = 1;
+				} else if(!hitse[1]->n && hitse[0]->n) {
+					se++;
+					site = hitse[0]->n == 1?0:rand()%hitse[0]->n;
+					PickupHit(alnSeq[0], rr, &site, hitse[0], hsp->packedDNA, hsp->dnaLength, cigar);
+					alnSeq[1]->flag |= 0x8;
+					non++;
+				} else if(!hitse[0]->n && hitse[1]->n) {
+					se++;
+					site = hitse[1]->n == 1?0:rand()%hitse[1]->n;
+					PickupHit(alnSeq[1], rr, &site, hitse[1], hsp->packedDNA, hsp->dnaLength, cigar);
+					alnSeq[0]->flag |= 0x8;
+					non++;
+				} else {
+					non+=2;
+					alnSeq[0]->flag |= 0x12;
+					alnSeq[1]->flag |= 0x12;
+					alnSeq[0]->report = 0;
+					alnSeq[1]->report = 0;
+				}
+			}
+			if(nc) {
+				free(cigar);
+				nc = 0;
+			}
+		} else {
+			non+=2;
+			alnSeq[0]->flag |= 0x12;
+			alnSeq[1]->flag |= 0x12;
+			alnSeq[0]->report = 0;
+			alnSeq[1]->report = 0;
+		}
+	}
+	free(hitse[0]->itemList);
+	free(hitse[1]->itemList);
+	free(hitpe[0]->itemList);
+	free(hitpe[1]->itemList);
+	free(hitse[0]);free(hitse[1]);free(hitpe[0]);free(hitpe[1]);
+}
diff --git a/SeqIO.c b/SeqIO.c
new file mode 100644
index 0000000..b1e196f
--- /dev/null
+++ b/SeqIO.c
@@ -0,0 +1,201 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  SeqIO.c
+ *
+ *    Description:  
+ *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+#include "SeqIO.h"
+extern unsigned char charMap[256];
+extern unsigned char complementMap[256];
+extern const char ambiguityCount[16];
+
+int CheckFast (int fd) {
+	char c;
+	if (read(fd, &c, 1)>0) {
+		lseek(fd, -1, SEEK_CUR);
+		if (c == '>') return FASTA;
+		else if (c == '@') return FASTQ;
+		else {
+			fprintf(stderr, "File Error: unrecognized file\n");
+			close(fd);
+			exit(EXIT_FAILURE);
+		}
+	}
+	return FASTA;
+}
+
+int fasta(FILE *fp, seq_t *seq, const int CONV){
+
+	int l, max, ns;
+	int c;
+	char *p, *q;
+	
+	l = ns = 0;
+	max=seq->max;
+	while (!feof(fp) && getc(fp)!= (int)'>');
+	if (feof(fp)) return -1;
+
+	p = seq->name;
+	while ((c= getc(fp)) != ' ' && c != '\r' && c != '\t' && c != '\n' && ++l < MAX_NAME_LEN) *p++ = c;
+	/*parse RG ID for SAM
+	if(o->SAM){
+		if(p[l-1] == '1' && p[l-2] == '/') {r->RG_ID = 1; l-=2;}
+		else if(p[l-1] == '2' && p[l-2] == '/'){r->RG_ID = 2; l-=2;}
+		else {seq->RG_ID = 1;}
+	}
+		//*/
+	*p = '\0';
+
+	while (c != '\n') c = (char) getc(fp);
+	
+	if (feof(fp)) {
+		fprintf(stderr, "\nFile Error: unexpected feof\n");
+		exit(EXIT_FAILURE);
+	}
+
+	l = 0;
+	p=seq->seq; q=seq->rc;
+	while ((c = getc(fp)) != '>' && !feof(fp)) {
+		if (c != '\n' && c != '\r') {
+			if (l >= max) {
+				max += QUERY_LEN;
+				seq->seq  = (char *)realloc(seq->seq, sizeof(char)* max);
+				seq->rc   = (char *)realloc(seq->rc, sizeof(char)* max);
+				seq->qual = (char *)realloc(seq->qual, sizeof(char) * max);
+				p = seq->seq + l;
+				q = seq->rc + l;
+			}
+			if (ambiguityCount[charMap[c]] != 1) {
+				*p++ = charMap['G'];
+				*q++ = complementMap['G'];
+				ns++;
+			} else {
+				*p++ = charMap[c];
+				*q++ = complementMap[c];
+			}
+			seq->qual[l] = 'h';
+			l++;
+		}
+	}
+
+	seq->qual[l] = *p   = *q = '\0';
+	seq->l       = l;
+	seq->max     = max;
+	seq->ns      = ns;
+	if (c == '>') ungetc(c,fp);
+	return l;
+}		/* -----  end of function fasta  ----- */
+
+int fastq (FILE *fp, seq_t *seq, const int CONV) {
+
+#ifdef DEBUG
+//	fprintf(stderr, "get read\n");
+#endif
+	int l, max, ns;
+	int c;
+	char *p;
+	l = ns = 0;
+	max = seq->max;
+	while (!feof(fp) && getc(fp)!= '@');
+	if (feof(fp)) return -1;
+
+	l = 0;
+	p=seq->name;
+	while ((c = getc(fp)) != '\t' && c != ' ' && c != '\n' && c != '\r' && l++ < MAX_NAME_LEN) *p++ = c;
+	*p = '\0';
+//	fprintf(stderr, "%s\n", seq->name);
+
+	/* RG ID for SAM
+	if(o->SAM){
+		if(p[l-1] == '1' && p[l-2] == '/') {r->RG_ID = 1; l-=2;}
+		else if(p[l-1] == '1' && p[l-2] == '/'){r->RG_ID = 2; l-=2;}
+		else {r->RG_ID = 1;}
+	}
+		//*/
+
+	while (c != '\n') c = getc(fp);
+
+	if (feof(fp)) {
+		fprintf(stderr, "\nFile Error: unexpected feof\n");
+		exit(EXIT_FAILURE);
+	}
+
+	l = 0;
+//	p = seq->seq; q = seq->rc;
+	while ((c = getc(fp)) != '+' && !feof(fp)) {
+		if (c != '\n' && c != '\r') {
+			if (l >= max) {
+				max += QUERY_LEN;
+				seq->seq  = (char *)realloc(seq->seq, sizeof(char)*max);
+				seq->rc   = (char *)realloc(seq->rc, sizeof(char)* max);
+				seq->qual = (char *)realloc(seq->qual, sizeof(char)*max);
+//				fprintf(stderr, "%d\n", max);
+//				p = seq->seq + l;
+//				q = seq->rc + l;
+			}
+//			fprintf(stdout, "%c", c);
+			if(ambiguityCount[charMap[c]] == 1){
+				seq->seq[l] = charMap[c];
+				seq->rc[l++] = complementMap[c];
+			}else{
+				seq->seq[l] = charMap['G'];
+				seq->rc[l++] = complementMap['G'];
+				ns++;
+			}
+		}
+	}
+//	*p     = '\0'; *q = '\0';
+//	fprintf(stderr, "\n");
+//	for(j=0; j<l;j++)fprintf(stderr, "%d", seq->seq[j]);
+//	fprintf(stderr, "\n");
+	seq->l = l;
+//	fprintf(stdout, "\n");
+	while (!feof(fp) && (c= getc(fp))!= '\n');
+	if (feof(fp)) {
+		fprintf(stderr, "\nFile Error: unexpected feof\n");
+		return 0;
+	}
+
+	l = 0;
+	p = seq->qual;
+	while ((c = (char) getc(fp)) != '\n' && c != '\r' && !feof(fp)) {
+		if (l > max) {
+			max += QUERY_LEN;
+			seq->qual = (char *)realloc(seq->qual, sizeof(char)*max);
+			p=seq->qual; p+=l;
+		}
+		*p++ = c;
+		l++;
+	}
+	*p = '\0';
+
+	if (l != seq->l) {
+		fprintf(stderr, "Length Error: incompitable seq and qual length\n");
+		fprintf(stderr, "       %s\n", seq->name);
+		return 0;
+	}
+	if (c == '@') ungetc(c,fp);
+	seq->max = max;
+	seq->ns  = ns;
+//	fprintf(stderr, "%d:%d\n", seq->l, l);
+	return seq->l;
+}
+
+/* test
+int main(int argc, char *argv[]){
+
+	int fd = open(argv[1])
+
+	return EXIT_SUCCESS;
+}				
+//*/
diff --git a/SeqIO.h b/SeqIO.h
new file mode 100644
index 0000000..b4c11e5
--- /dev/null
+++ b/SeqIO.h
@@ -0,0 +1,42 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  SeqIO.h
+ *
+ *    Description:  
+ *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+
+#ifndef  __SEQIO_H__
+#define  __SEQIO_H__			/*  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "HSP.h"
+#define MAX_NAME_LEN 256
+#define QUERY_LEN    256
+#define FASTA        0
+#define FASTQ        1
+
+typedef struct _SEQ_T_{
+	int max, l, ns;
+	char name[MAX_NAME_LEN];
+	char *seq, *rc, *qual;
+}seq_t;
+
+int CheckFast(int fd);
+int fasta(FILE *fp, seq_t *seq, const int CONV);
+int fastq(FILE *fp, seq_t *seq, const int CONV);
+
+#endif     /* -----  __SEQIO_H__  ----- */
diff --git a/TextConverter.c b/TextConverter.c
new file mode 100644
index 0000000..cb4a370
--- /dev/null
+++ b/TextConverter.c
@@ -0,0 +1,917 @@
+/*
+
+   TextConverter.c		Text Converter
+
+   This module contains miscellaneous text conversion functions.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "TextConverter.h"
+#include "MiscUtilities.h"
+#include "r250.h"
+
+
+unsigned int GetWordPackedText(const unsigned int *packedText, const unsigned int index, const unsigned int shift, const unsigned int numberOfBit, const unsigned int vacantBit) {
+
+	unsigned int text;
+	const static unsigned int mask[32] = { 0x00000000, 0x80000000, 0xC0000000, 0xE0000000,
+								  0xF0000000, 0xF8000000, 0xFC000000, 0xFE000000,
+								  0xFF000000, 0xFF800000, 0xFFC00000, 0xFFE00000,
+								  0xFFF00000, 0xFFF80000, 0xFFFC0000, 0xFFFE0000,
+								  0xFFFF0000, 0xFFFF8000, 0xFFFFC000, 0xFFFFE000,
+								  0xFFFFF000, 0xFFFFF800, 0xFFFFFC00, 0xFFFFFE00,
+								  0xFFFFFF00, 0xFFFFFF80, 0xFFFFFFC0, 0xFFFFFFE0,
+								  0xFFFFFFF0, 0xFFFFFFF8, 0xFFFFFFFC, 0xFFFFFFFE };
+
+	if (shift > 0) {
+		// packedText should be allocated with at least 1 Word buffer initialized to zero
+#ifdef DNA_ONLY
+		text = (packedText[index] << shift) | (packedText[index + 1] >> (BITS_IN_WORD - shift));
+#else
+		text = (packedText[index] << shift) | (packedText[index + 1] >> (BITS_IN_WORD - shift) << vacantBit);
+#endif
+	} else {
+		text = packedText[index];
+	}
+
+	if (numberOfBit < BITS_IN_WORD) {
+		// Fill unused bit with zero
+		text &= mask[numberOfBit];
+	}
+
+	return text;
+}
+
+
+unsigned int ReadCharMap(unsigned char *charMap, const char *inputFileName, const unsigned char defaultMapping) {
+
+	FILE *inputFile;
+	char c;
+	unsigned int v, alphabetSize;
+
+	inputFile = (FILE*)fopen64(inputFileName, "r");
+
+	if (inputFile == NULL) {
+		fprintf(stderr, "ReadCharMap() : Cannot open character map!\n");
+		exit(1);
+	}
+
+	for (v=0; v<CHAR_MAP_SIZE; v++) {
+		charMap[v] = defaultMapping;
+	}
+
+	alphabetSize = 0;
+
+	while (!feof(inputFile)) {
+		fscanf(inputFile, " %c %u \n", &c, &v);
+		if (v > CHAR_MAP_SIZE) {
+			fprintf(stderr, "ReadCharMap() : Invalid charMap!\n");
+			return 0;
+		}
+		charMap[(unsigned int)c] = (unsigned char)v;
+		if (v > alphabetSize) {
+			alphabetSize = v;
+		}
+	}
+
+	fclose(inputFile);
+
+	alphabetSize++;
+
+	return alphabetSize;
+
+}
+
+void GenerateReverseCharMap(const unsigned char *charMap, unsigned char *reverseCharMap) {
+
+	unsigned int i, j;
+
+	for (i=0; i<CHAR_MAP_SIZE; i++) {
+		reverseCharMap[i] = INVALID_CHAR;
+		for (j=0; j<CHAR_MAP_SIZE; j++) {
+			if (charMap[j] == i) {
+				reverseCharMap[i] = (unsigned char)j;
+				break;
+			}
+		}
+	}
+
+}
+
+unsigned int BitPerWordPackedChar(const unsigned int alphabetSize) {
+
+	#ifdef DEBUG
+	if (alphabetSize < 2) {
+		fprintf(stderr, "BitPerWordPackedChar() : alphabetSize < 2!\n");
+		exit(1);
+	}
+	#endif
+
+	return ceilLog2(alphabetSize);
+
+}
+
+unsigned int TextLengthFromWordPacked(unsigned int wordPackedLength, unsigned int bitPerChar, unsigned int lastWordLength) {
+
+	return (wordPackedLength - 1) * (BITS_IN_WORD / bitPerChar) + lastWordLength;
+
+}
+
+unsigned int WordPackedLengthFromText(unsigned int textLength, unsigned int bitPerChar) {
+
+	return (textLength + (BITS_IN_WORD / bitPerChar) - 1) / (BITS_IN_WORD / bitPerChar);
+
+}
+
+unsigned int LastWordLength(unsigned int textLength, unsigned int bitPerChar) {
+
+	return textLength % (BITS_IN_WORD / bitPerChar);
+
+}
+
+unsigned int BitPerBytePackedChar(const unsigned int alphabetSize) {
+
+	unsigned int bitPerChar;
+
+	#ifdef DEBUG
+	if (alphabetSize < 2) {
+		fprintf(stderr, "BitPerBytePackedChar() : alphabetSize < 2!\n");
+		exit(1);
+	}
+	#endif
+
+	bitPerChar = ceilLog2(alphabetSize);
+
+	#ifdef DEBUG
+	if (bitPerChar > BITS_IN_BYTE) {
+		fprintf(stderr, "BitPerBytePackedChar() : bitPerChar > BITS_IN_BYTE!\n");
+		exit(1);
+	}
+	#endif
+
+	// Return the largest number of bit that does not affect packing efficiency
+	if (BITS_IN_BYTE / (BITS_IN_BYTE / bitPerChar) > bitPerChar) {
+		bitPerChar = BITS_IN_BYTE / (BITS_IN_BYTE / bitPerChar);
+	}
+	return bitPerChar;
+}
+
+unsigned int TextLengthFromBytePacked(unsigned int bytePackedLength, unsigned int bitPerChar, unsigned int lastByteLength) {
+
+	if (bytePackedLength > ALL_ONE_MASK / (BITS_IN_BYTE / bitPerChar)) {
+		fprintf(stderr, "TextLengthFromBytePacked(): text length > 2^32!\n");
+		exit(1);
+	}
+	return (bytePackedLength - 1) * (BITS_IN_BYTE / bitPerChar) + lastByteLength;
+
+}
+
+unsigned int BytePackedLengthFromText(unsigned int textLength, unsigned int bitPerChar) {
+
+	return (textLength + (BITS_IN_BYTE / bitPerChar) - 1) / (BITS_IN_BYTE / bitPerChar);
+
+}
+
+unsigned char LastByteLength(unsigned int textLength, unsigned int bitPerChar) {
+
+	return (unsigned char)(textLength % (BITS_IN_BYTE / bitPerChar));
+
+}
+
+void ConvertTextToWordPacked(const unsigned char *input, unsigned int *output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned int textLength) {
+
+	unsigned int bitPerChar, charPerWord;
+	unsigned int i, j, k;
+	unsigned int c;
+	unsigned int charValue;
+
+	bitPerChar = BitPerWordPackedChar(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerChar;
+
+	for (i=0; i<textLength/charPerWord; i++) {
+		c = 0;
+		j = i * charPerWord;
+		for (k=0; k<charPerWord; k++) {
+			charValue = charMap[input[j+k]];
+			if (charValue >= alphabetSize) {
+				charValue = 0;
+			}
+			c = c | (charValue << (BITS_IN_WORD - (k+1) * bitPerChar));
+		}
+		output[i] = c;
+	}
+	if (i * charPerWord < textLength) {
+		c = 0;
+		j = i * charPerWord;
+		for (k=0; j+k < textLength; k++) {
+			charValue = charMap[input[j+k]];
+			if (charValue >= alphabetSize) {
+				charValue = 0;
+			}
+			c = c | (charValue << (BITS_IN_WORD - (k+1) * bitPerChar));
+		}
+		output[i] = c;
+	}
+
+}
+
+void ConvertTextToBytePacked(const unsigned char *input, unsigned char *output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned int textLength) {
+
+	unsigned int bitPerChar, charPerByte;
+	unsigned int i, j, k;
+	unsigned char c;
+
+	bitPerChar = BitPerBytePackedChar(alphabetSize); 		//2
+	charPerByte = BITS_IN_BYTE / bitPerChar;			//4
+
+	for (i=0; i<textLength/charPerByte; i++) {
+		c = 0;
+		j = i * charPerByte;
+		for (k=0; k<charPerByte; k++) {
+			c = c | (unsigned char)(charMap[input[j+k]] << (BITS_IN_BYTE - (k+1) * bitPerChar));
+		}
+		output[i] = c;
+	}
+	if (i * charPerByte < textLength) {
+		c = 0;
+		j = i * charPerByte;
+		for (k=0; j+k < textLength; k++) {
+			c = c | (unsigned char)(charMap[input[j+k]] << (BITS_IN_BYTE - (k+1) * bitPerChar));
+		}
+		output[i] = c;
+	}
+
+}
+
+void ConvertWordPackedToText(const unsigned int *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned int alphabetSize, const unsigned int textLength) {
+
+	unsigned int bitPerChar, charPerWord;
+	unsigned int i, j, k;
+	unsigned int c;
+
+	bitPerChar = BitPerWordPackedChar(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerChar;
+
+	for (i=0; i<textLength/charPerWord; i++) {
+		c = input[i];
+		j = i * charPerWord;
+		for (k=0; k<charPerWord; k++) {
+			output[j+k] = reverseCharMap[c >> (BITS_IN_WORD - bitPerChar)];
+			c <<= bitPerChar;
+		}
+	}
+	if (i * charPerWord < textLength) {
+		c = input[i];
+		j = i * charPerWord;
+		for (k=0; j+k<textLength; k++) {
+			output[j+k] = reverseCharMap[c >> (BITS_IN_WORD - bitPerChar)];
+			c <<= bitPerChar;
+		}
+	}
+
+}
+
+void ConvertBytePackedToText(const unsigned char *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned int alphabetSize, const unsigned int textLength) {
+
+	unsigned int bitPerChar, charPerByte;
+	unsigned int i, j, k;
+	unsigned char c;
+
+	bitPerChar = BitPerBytePackedChar(alphabetSize);
+	charPerByte = BITS_IN_BYTE / bitPerChar;
+
+	for (i=0; i<textLength/charPerByte; i++) {
+		c = input[i];
+		j = i * charPerByte;
+		for (k=0; k<charPerByte; k++) {
+			output[j+k] = reverseCharMap[c >> (BITS_IN_BYTE - bitPerChar)];
+			c <<= bitPerChar;
+		}
+	}
+	if (i * charPerByte < textLength) {
+		c = input[i];
+		j = i * charPerByte;
+		for (k=0; j+k<textLength; k++) {
+			output[j+k] = reverseCharMap[c >> (BITS_IN_BYTE - bitPerChar)];
+			c <<= bitPerChar;
+		}
+	}
+
+}
+
+void ConvertBytePackedToCode(const unsigned char *input, unsigned char *output, const unsigned int alphabetSize, const unsigned int textLength) {
+
+	unsigned int bitPerChar, charPerByte;
+	unsigned int i, j, k;
+	unsigned char c;
+
+	bitPerChar = BitPerBytePackedChar(alphabetSize);
+	charPerByte = BITS_IN_BYTE / bitPerChar;
+
+	for (i=0; i<textLength/charPerByte; i++) {
+		c = input[i];
+		j = i * charPerByte;
+		for (k=0; k<charPerByte; k++) {
+			output[j+k] = c >> (unsigned char)(BITS_IN_BYTE - bitPerChar);
+			c <<= bitPerChar;
+		}
+	}
+	if (i * charPerByte < textLength) {
+		c = input[i];
+		j = i * charPerByte;
+		for (k=0; j+k<textLength; k++) {
+			output[j+k] = c >> (unsigned char)(BITS_IN_BYTE - bitPerChar);
+			c <<= bitPerChar;
+		}
+	}
+
+}
+
+void ConvertWordPackedToBytePacked(const unsigned int *input, unsigned char *output, const unsigned int alphabetSize, const unsigned int textLength) {
+
+	unsigned int i, j, k;
+	unsigned int c;
+	unsigned int bitPerBytePackedChar;
+	unsigned int bitPerWordPackedChar;
+	unsigned int charPerWord;
+	unsigned int charPerByte;
+	unsigned int bytePerIteration;
+	unsigned int byteProcessed = 0;
+	unsigned int wordProcessed = 0;
+	unsigned int mask, shift;
+	
+	unsigned int buffer[BITS_IN_WORD];
+
+	bitPerBytePackedChar = BitPerBytePackedChar(alphabetSize);
+	bitPerWordPackedChar = BitPerWordPackedChar(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerBytePackedChar;
+	charPerByte = BITS_IN_BYTE / bitPerWordPackedChar;
+
+	bytePerIteration = charPerWord / charPerByte;
+	mask = truncateRight(ALL_ONE_MASK, BITS_IN_WORD - bitPerWordPackedChar);
+	shift = BITS_IN_WORD - bitPerWordPackedChar;
+
+	while ((wordProcessed + 1) * charPerWord < textLength) {
+
+		c = input[wordProcessed];
+		for (i=0; i<charPerWord; i++) {
+			buffer[i] = c >> shift;
+			c <<= bitPerWordPackedChar;
+		}
+		wordProcessed++;
+
+		k = 0;
+		for (i=0; i<bytePerIteration; i++) {
+			c = 0;
+			for (j=0; j<charPerByte; j++) {
+				c |= buffer[k] << (BITS_IN_BYTE - (j+1) * bitPerBytePackedChar);
+				k++;
+			}
+			output[byteProcessed] = (unsigned char)c;
+			byteProcessed++;
+		}
+
+	}
+
+	c = input[wordProcessed];
+	for (i=0; i < textLength - wordProcessed * charPerWord; i++) {
+		buffer[i] = c >> shift;
+		c <<= bitPerWordPackedChar;
+	}
+
+	k = 0;
+	while (byteProcessed * charPerByte < textLength) {
+		c = 0;
+		for (j=0; j < textLength - wordProcessed * charPerWord; j++) {
+			c |= buffer[k] << (BITS_IN_BYTE - (j+1) * bitPerBytePackedChar);
+			k++;
+		}
+		output[byteProcessed] = (unsigned char)c;
+		byteProcessed++;
+	}
+
+}
+
+void ConvertBytePackedToWordPacked(const unsigned char *input, unsigned int *output, const unsigned int alphabetSize, const unsigned int textLength) {
+
+	unsigned int i, j, k;
+	unsigned int c;
+	unsigned int bitPerBytePackedChar;
+	unsigned int bitPerWordPackedChar;
+	unsigned int charPerWord;
+	unsigned int charPerByte;
+	unsigned int bytePerIteration;
+	unsigned int byteProcessed = 0;
+	unsigned int wordProcessed = 0;
+	unsigned int mask, shift;
+	
+	unsigned int buffer[BITS_IN_WORD];
+
+	bitPerBytePackedChar = BitPerBytePackedChar(alphabetSize);
+	bitPerWordPackedChar = BitPerWordPackedChar(alphabetSize);
+	charPerByte = BITS_IN_BYTE / bitPerBytePackedChar;
+	charPerWord = BITS_IN_WORD / bitPerWordPackedChar;
+
+	bytePerIteration = charPerWord / charPerByte;
+	mask = truncateRight(ALL_ONE_MASK, BITS_IN_WORD - bitPerWordPackedChar);
+	shift = BITS_IN_WORD - BITS_IN_BYTE + bitPerBytePackedChar - bitPerWordPackedChar;
+
+	while ((wordProcessed + 1) * charPerWord < textLength) {
+
+		k = 0;
+		for (i=0; i<bytePerIteration; i++) {
+			c = (unsigned int)input[byteProcessed] << shift;
+			for (j=0; j<charPerByte; j++) {
+				buffer[k] = c & mask;
+				c <<= bitPerBytePackedChar;
+				k++;
+			}
+			byteProcessed++;
+		}
+
+		c = 0;
+		for (i=0; i<charPerWord; i++) {
+			c |= buffer[i] >> bitPerWordPackedChar * i;
+		}
+		output[wordProcessed] = c;
+		wordProcessed++;
+
+	}
+
+	k = 0;
+	for (i=0; i < (textLength - wordProcessed * charPerWord - 1) / charPerByte + 1; i++) {
+		c = (unsigned int)input[byteProcessed] << shift;
+		for (j=0; j<charPerByte; j++) {
+			buffer[k] = c & mask;
+			c <<= bitPerBytePackedChar;
+			k++;
+		}
+		byteProcessed++;
+	}
+
+	c = 0;
+	for (i=0; i<textLength - wordProcessed * charPerWord; i++) {
+		c |= buffer[i] >> bitPerWordPackedChar * i;
+	}
+	output[wordProcessed] = c;
+
+}
+
+void ConvertTextToCode(const unsigned char *input, unsigned char *output, const unsigned char *charMap, const unsigned int textLength) {
+
+	unsigned int i;
+
+	for (i=0; i< textLength; i++) {
+		output[i] = charMap[input[i]];
+	}
+
+}
+
+void ConvertCodeToText(const unsigned char *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned int textLength) {
+
+	unsigned int i;
+
+	for (i=0; i< textLength; i++) {
+		output[i] = reverseCharMap[input[i]];
+	}
+
+}
+
+void PackTextWithAllShift(const unsigned char *input, unsigned int **output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned int textLength) {
+
+	unsigned int bitPerChar;
+	unsigned int numberOfShift;
+	unsigned int numberOfWord;
+	unsigned int shift;
+
+	unsigned int i, j;
+
+	bitPerChar = BitPerWordPackedChar(alphabetSize);
+	numberOfShift = BITS_IN_WORD / bitPerChar;
+	numberOfWord = WordPackedLengthFromText(textLength, bitPerChar);
+
+	ConvertTextToWordPacked(input, output[0], charMap, alphabetSize, textLength);
+
+	for (i=1; i<numberOfShift; i++) {
+		shift = i * bitPerChar;
+		output[i][0] = output[0][0] >> shift;
+		for (j=1; j<=numberOfWord; j++) {
+			output[i][j] = (output[0][j] >> shift) | (output[0][j-1] << (BITS_IN_WORD - shift));
+		}
+	}
+
+}
+
+
+unsigned int ReadTextAsWordPacked(const char *inputFileName, const unsigned char *charMap, const unsigned int alphabetSize, unsigned int *targetAddress, const unsigned int maxTextLength) {
+
+	FILE *inputFile;
+	unsigned char *buffer;
+	unsigned int charPerWord;
+	unsigned int charRead;
+	unsigned int charProcessed = 0, wordProcessed = 0;
+	unsigned int charPerBuffer;
+
+	inputFile = (FILE*)fopen64(inputFileName, "rb");
+
+	if (inputFile == NULL) {
+		fprintf(stderr, "ReadTextAsWordPacked() : Cannot open inputFileName!\n");
+		exit(1);
+	}
+
+	charPerWord = BITS_IN_WORD / BitPerWordPackedChar(alphabetSize);
+	charPerBuffer = PACKED_BUFFER_SIZE / charPerWord * charPerWord;
+
+	buffer = MMUnitAllocate(charPerBuffer);
+
+	charRead = (unsigned int)fread(buffer, 1, charPerBuffer, inputFile);
+	while (charRead > 0 && charProcessed + charRead < maxTextLength) {
+		ConvertTextToWordPacked(buffer, targetAddress + wordProcessed, charMap, alphabetSize, charRead);
+		wordProcessed += charRead / charPerWord;
+		charProcessed += charRead;
+		charRead = (unsigned int)fread(buffer, 1, charPerBuffer, inputFile);
+	}
+
+	if (charRead > 0 && charProcessed < maxTextLength) {
+		ConvertTextToWordPacked(buffer, targetAddress + wordProcessed, charMap, alphabetSize, min(charRead, maxTextLength - charProcessed));
+		charProcessed += charRead;
+	}
+
+	MMUnitFree(buffer, charPerBuffer);
+
+	fclose(inputFile);
+
+	return charProcessed;
+
+}
+
+unsigned int ReadBytePackedAsWordPacked(const char *inputFileName, const unsigned int alphabetSize, unsigned int *targetAddress, const unsigned int maxTextLength) {
+
+	FILE *inputFile;
+	unsigned char *buffer1, *buffer2;
+	unsigned int charPerByte, charPerWord;
+	unsigned int charPerBuffer, wordPerBuffer;
+	unsigned int charProcessed = 0, wordProcessed = 0;
+	unsigned int byteRead, tempByteRead;
+	unsigned int charInLastBuffer;
+	unsigned int bufferSize;
+
+	inputFile = (FILE*)fopen64(inputFileName, "rb");
+
+	if (inputFile == NULL) {
+		fprintf(stderr, "ReadBytePackedAsWordPacked() : Cannot open inputFileName!\n");
+		exit(1);
+	}
+
+	charPerByte = BITS_IN_BYTE / BitPerBytePackedChar(alphabetSize);
+	charPerWord = BITS_IN_WORD / BitPerWordPackedChar(alphabetSize);
+	bufferSize = PACKED_BUFFER_SIZE / charPerByte / charPerWord * charPerByte * charPerWord;
+
+	charPerBuffer = bufferSize * charPerByte;
+	wordPerBuffer = charPerBuffer / charPerWord;
+
+	buffer1 = MMUnitAllocate(bufferSize);
+	buffer2 = MMUnitAllocate(bufferSize);
+
+	byteRead = (unsigned int)fread(buffer1, 1, bufferSize, inputFile);
+	tempByteRead = (unsigned int)fread(buffer2, 1, bufferSize, inputFile);
+
+	while (tempByteRead > 1 && charProcessed + charPerBuffer < maxTextLength) {
+		ConvertBytePackedToWordPacked(buffer1, targetAddress + wordProcessed, alphabetSize, charPerBuffer);
+		charProcessed += charPerBuffer;
+		wordProcessed += wordPerBuffer;
+		memcpy(buffer1, buffer2, bufferSize);
+		byteRead = tempByteRead;
+		tempByteRead = (unsigned int)fread(buffer2, 1, bufferSize, inputFile);
+	}
+
+	if (tempByteRead > 1) {
+		ConvertBytePackedToWordPacked(buffer1, targetAddress + wordProcessed, alphabetSize, maxTextLength - charProcessed);
+		charProcessed += charPerBuffer;
+	} else {
+		if (tempByteRead == 1) {
+			charInLastBuffer = charPerBuffer - charPerByte + buffer2[0];
+		} else {
+			charInLastBuffer = (byteRead - 2) * charPerByte + buffer1[byteRead - 1];
+		}
+		ConvertBytePackedToWordPacked(buffer1, targetAddress + wordProcessed, alphabetSize, min(maxTextLength - charProcessed, charInLastBuffer));
+		charProcessed += charInLastBuffer;
+	}
+
+	MMUnitFree(buffer1, bufferSize);
+	MMUnitFree(buffer2, bufferSize);
+
+	fclose(inputFile);
+
+	return charProcessed;
+
+}
+/*
+void *DNALoadPacked_bit64(const char *inputFileName, unsigned int *textLength){
+	FILE *inputFile;
+	unsigned char tempChar[8];
+	unsigned long long *packedText;
+	unsigned int packedFileLen;
+	unsigned char lastByteLength;
+	unsigned int wordToProcess;
+	unsigned int i;
+
+	inputFile = (FILE*)(FILE*)fopen64(inputFileName, "rb");
+
+	if (inputFile == NULL) {
+		fprintf(stderr, "DNALoadPacked() : Cannot open inputFileName!\n");
+		exit(1);
+	}
+
+	fseek(inputFile, -1, SEEK_END);
+	packedFileLen = ftell(inputFile);
+	if ((int)packedFileLen < 0) {
+		fprintf(stderr, "DNALoadPacked(): Cannot determine file length!\n");
+		exit(1);
+	}
+	fread(&lastByteLength, sizeof(unsigned char), 1, inputFile);
+
+	*textLength = (packedFileLen - 1) * 4 + lastByteLength;
+
+	wordToProcess = (*textLength + 32 - 1) / 32;
+	packedText = malloc((wordToProcess + 1) * sizeof(unsigned long long));	// allocate 1 more word at end
+	packedText[wordToProcess - 1] = 0;
+	packedText[wordToProcess] = 0;
+
+	fseek(inputFile, 0, SEEK_SET);
+	fread(packedText, 1, packedFileLen, inputFile);
+	fclose(inputFile);
+
+	if (convertToWordPacked) {
+
+		for (i=0; i<wordToProcess; i++) {
+	
+			*(unsigned long long*)tempChar = packedText[i];
+			packedText[i] = (tempChar[0] << 56) | (tempChar[1] << 48) | (tempChar[2] << 40) | (tempChar[3] << 32) | (tempChar[4]<<24) | (tempChar[5]<<16) | (tempChar[6]<<8) | tempChar[7];
+		}
+	}
+	return (void*)packedText;
+}
+//*/
+
+// Alphabet size of DNA must be 4
+void *DNALoadPacked(const char *inputFileName, unsigned int *textLength, const unsigned int convertToWordPacked) {
+
+	FILE *inputFile;
+	unsigned char tempChar[4];
+	unsigned int *packedText;
+	unsigned int packedFileLen;
+	unsigned char lastByteLength;
+	unsigned int wordToProcess;
+	unsigned int i;
+
+	inputFile = (FILE*)(FILE*)fopen64(inputFileName, "rb");
+
+	if (inputFile == NULL) {
+		fprintf(stderr, "DNALoadPacked() : Cannot open inputFileName!\n");
+		exit(1);
+	}
+
+	fseek(inputFile, -1, SEEK_END);
+	packedFileLen = ftell(inputFile);
+	if ((int)packedFileLen < 0) {
+		fprintf(stderr, "DNALoadPacked(): Cannot determine file length!\n");
+		exit(1);
+	}
+	fread(&lastByteLength, sizeof(unsigned char), 1, inputFile);
+
+	*textLength = (packedFileLen - 1) * 4 + lastByteLength;
+
+	wordToProcess = (*textLength + 16 - 1) / 16;
+	packedText = MMUnitAllocate((wordToProcess + 1) * sizeof(unsigned int));	// allocate 1 more word at end
+	packedText[wordToProcess - 1] = 0;
+	packedText[wordToProcess] = 0;
+
+	fseek(inputFile, 0, SEEK_SET);
+	fread(packedText, 1, packedFileLen, inputFile);
+	fclose(inputFile);
+
+	if (convertToWordPacked) {
+
+		for (i=0; i<wordToProcess; i++) {
+	
+			*(unsigned int*)tempChar = packedText[i];
+			packedText[i] = (tempChar[0] << 24) | (tempChar[1] << 16) | (tempChar[2] << 8) | tempChar[3];
+		}
+
+	}
+
+	return (void*)packedText;
+
+}
+
+void DNAFreePacked(void* packedDNA, const unsigned int textLength) {
+
+	MMUnitFree(packedDNA, ((textLength + 16 - 1) / 16 + 1) * sizeof(unsigned int));
+
+}
+
+void SaveText(const char *outputFileName, const unsigned char *text, const unsigned int textLength) {
+
+	FILE *outputFile;
+
+	outputFile = (FILE*)fopen64(outputFileName, "wb");
+
+	if (outputFile == NULL) {
+		fprintf(stderr, "SaveText() : Cannot open output file!\n");
+		exit(1);
+	}
+
+	fwrite(text, sizeof(unsigned char), textLength, outputFile);
+	fclose(outputFile);
+
+}
+
+void SaveBytePacked(const char *outputFileName, const unsigned char *bytePacked, const unsigned int textLength, const unsigned int alphabetSize) {
+
+	FILE *outputFile;
+	unsigned int bitPerChar, charPerByte, bytePackedLen;
+	unsigned char lastByteLen;
+	unsigned char zero = 0;
+
+	outputFile = (FILE*)fopen64(outputFileName, "wb");
+
+	if (outputFile == NULL) {
+		fprintf(stderr, "SaveBytePacked() : Cannot open output file!\n");
+		exit(1);
+	}
+
+	bitPerChar = BitPerBytePackedChar(alphabetSize);
+	charPerByte = BITS_IN_BYTE / bitPerChar;
+
+	bytePackedLen = BytePackedLengthFromText(textLength, bitPerChar);
+	lastByteLen = LastByteLength(textLength, bitPerChar);
+
+	fwrite(bytePacked, sizeof(unsigned char), bytePackedLen, outputFile);
+	if (lastByteLen == 0) {
+		fwrite(&zero, sizeof(unsigned char), 1, outputFile);
+	}
+	fwrite(&lastByteLen, sizeof(unsigned char), 1, outputFile);
+	fclose(outputFile);
+
+}
+
+void SaveWordPacked(const char *outputFileName, const unsigned int *wordPacked, const unsigned int textLength, const unsigned int alphabetSize) {
+
+	FILE *outputFile;
+	unsigned int bitPerChar, charPerWord, wordPackedLen;
+	unsigned int lastWordLen;
+	unsigned int zero = 0;
+
+	outputFile = (FILE*)fopen64(outputFileName, "wb");
+
+	if (outputFile == NULL) {
+		fprintf(stderr, "SaveWordPacked() : Cannot open output file!\n");
+		exit(1);
+	}
+
+	bitPerChar = BitPerWordPackedChar(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerChar;
+
+	wordPackedLen = WordPackedLengthFromText(textLength, bitPerChar);
+	lastWordLen = LastWordLength(textLength, bitPerChar);
+
+	fwrite(wordPacked, sizeof(unsigned int), wordPackedLen, outputFile);
+	if (lastWordLen == 0) {
+		fwrite(&zero, sizeof(unsigned int), 1, outputFile);
+	}
+	fwrite(&lastWordLen, sizeof(unsigned int), 1, outputFile);
+	fclose(outputFile);
+
+}
+
+FILE *InitialLoadPackedIncFromEnd(const char* inputFileName, unsigned char *packedOutput, const unsigned int alphabetSize, 
+								  const unsigned int packedLengthPerLoad, unsigned int *textLength, unsigned int *textLengthForThisLoad) {
+
+	FILE *packedFile;
+	unsigned int len, packedFileLenForThisLoad, packedFileLen;
+	unsigned char lastByteLength;
+	unsigned int bitPerChar, charPerWord;
+
+	packedFile = (FILE*)fopen64(inputFileName, "rb");
+
+	if (packedFile == NULL) {
+		fprintf(stderr, "InitialLoadPackedIncFromEnd() : Cannot open inputFileName!\n");
+		exit(1);
+	}
+
+	bitPerChar = BitPerBytePackedChar(alphabetSize);
+	charPerWord = BITS_IN_WORD / bitPerChar;
+
+	fseek(packedFile, -1, SEEK_END);
+	packedFileLen = ftell(packedFile);
+	if ((int)packedFileLen < 0) {
+		fprintf(stderr, "InitialLoadPackedIncFromEnd(): Cannot determine file length!\n");
+		exit(1);
+	}
+	fread(&lastByteLength, sizeof(unsigned char), 1, packedFile);
+
+	len = TextLengthFromBytePacked(packedFileLen, bitPerChar, lastByteLength);
+
+	if (lastByteLength == 0 && (packedFileLen - 1) % packedLengthPerLoad == 0) {
+		packedFileLenForThisLoad = 0;
+		fseek(packedFile, -((int)(2+packedLengthPerLoad)), SEEK_END);
+		*textLength = len;
+		*textLengthForThisLoad = 0;
+		return packedFile;
+	}
+
+	if (packedFileLen % packedLengthPerLoad == 0) {
+		packedFileLenForThisLoad = packedLengthPerLoad;
+	} else {
+		packedFileLenForThisLoad = packedFileLen % packedLengthPerLoad;
+	}
+	fseek(packedFile, -1, SEEK_END);
+
+	fseek(packedFile, -((int)packedFileLenForThisLoad), SEEK_CUR);
+	fread(packedOutput, sizeof(unsigned char), packedFileLenForThisLoad, packedFile);
+	fseek(packedFile, -((int)packedFileLenForThisLoad), SEEK_CUR);
+	if (packedFileLen > packedFileLenForThisLoad) {
+		fseek(packedFile, -((int)packedLengthPerLoad), SEEK_CUR);
+	}
+
+	*textLength = len;
+	*textLengthForThisLoad = TextLengthFromBytePacked(packedFileLenForThisLoad, bitPerChar, lastByteLength);
+
+	return packedFile;
+
+}
+
+void LoadPackedIncFromEnd(FILE *packedFile, unsigned char *packedOutput, const unsigned int packedLengthPerLoad) {
+	
+	fread(packedOutput, sizeof(unsigned char), packedLengthPerLoad, packedFile);
+	fseek(packedFile, -(2*(int)packedLengthPerLoad), SEEK_CUR);
+
+}
+
+
+FILE *InitialLoadTextIncFromEnd(const char* inputFileName, unsigned char *textOutput, const unsigned int textLengthPerLoad, unsigned int *textLength, unsigned int *textLengthForThisLoad) {
+
+	FILE *textFile;
+	unsigned int len, textLenForThisLoad;
+
+	textFile = (FILE*)fopen64(inputFileName, "rb");
+
+	if (textFile == NULL) {
+		fprintf(stderr, "InitialLoadTextIncFromEnd() : Cannot open inputFileName!\n");
+		exit(1);
+	}
+
+	fseek(textFile, 0, SEEK_END);
+	len = ftell(textFile);
+	if ((int)len < 0) {
+		fprintf(stderr, "InitialLoadTextIncFromEnd(): Cannot determine file length!\n");
+		exit(1);
+	}
+
+	textLenForThisLoad = len % textLengthPerLoad;
+
+	if (textLenForThisLoad > 0) {
+		fseek(textFile, -((int)textLenForThisLoad), SEEK_END);
+		fread(textOutput, sizeof(unsigned char), textLenForThisLoad, textFile);
+		fseek(textFile, -((int)textLenForThisLoad), SEEK_END);
+	}
+
+	*textLength = len;
+	*textLengthForThisLoad = textLenForThisLoad;
+
+	return textFile;
+}
+
+void LoadTextIncFromEnd(FILE *textFile, unsigned char *textOutput, const unsigned int textLengthPerLoad) {
+
+	if (ftell(textFile) < (int)textLengthPerLoad) {
+		fprintf(stderr, "LoadTextIncFromEnd(): file pointer is not correctly placed!\n");
+		exit(1);
+	}
+
+	fseek(textFile, -((int)textLengthPerLoad), SEEK_CUR);
+	fread(textOutput, sizeof(unsigned char), textLengthPerLoad, textFile);
+	fseek(textFile, -((int)textLengthPerLoad), SEEK_CUR);
+
+}
+
diff --git a/TextConverter.h b/TextConverter.h
new file mode 100644
index 0000000..8bad945
--- /dev/null
+++ b/TextConverter.h
@@ -0,0 +1,93 @@
+/*
+
+   TextConverter.h		Text Converter
+
+   This module contains miscellaneous text conversion functions.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#ifndef __TEXTCONVERTOR_H__
+#define __TEXTCONVERTOR_H__
+
+#include "TypeNLimit.h"
+#include "MemManager.h"
+
+#define INVALID_CHAR 0xFF
+#define CHAR_MAP_SIZE 256
+#define PACKED_BUFFER_SIZE			(PACKED_BUFFER_SIZE_IN_WORD * BYTES_IN_WORD)
+#define PACKED_BUFFER_SIZE_IN_WORD	65536
+#define MAX_SEQ_NAME_LENGTH			256
+#define RANDOM_SUBSTITUTE			'R'
+
+// charMap is a char array of size 256. The index of the array is the input text value
+// and the content of the array is the output text value. e.g. A -> 0, C -> 1
+// If the value of an entry = INVALID_CHAR, the indexed text value is an invalid input
+
+// Retrieve word packed text
+unsigned int GetWordPackedText(const unsigned int *packedText, const unsigned int index, const unsigned int shift, const unsigned int numberOfBit, const unsigned int vacantBit);
+
+// Character map functions
+unsigned int ReadCharMap(unsigned char *charMap, const char *inputFileName, const unsigned char defaultMapping);
+void GenerateReverseCharMap(const unsigned char *charMap, unsigned char *reverseCharMap);
+
+// Word packed text functions
+unsigned int BitPerWordPackedChar(const unsigned int alphabetSize);
+unsigned int TextLengthFromWordPacked(unsigned int wordPackedLength, unsigned int bitPerChar, unsigned int lastWordLength);
+unsigned int WordPackedLengthFromText(unsigned int textLength, unsigned int bitPerChar);
+unsigned int LastWordLength(unsigned int textLength, unsigned int bitPerChar);
+
+// Byte packed text functions
+unsigned int BitPerBytePackedChar(const unsigned int alphabetSize);
+unsigned int TextLengthFromBytePacked(unsigned int bytePackedLength, unsigned int bitPerChar, unsigned int lastByteLength);
+unsigned int BytePackedLengthFromText(unsigned int textLength, unsigned int bitPerChar);
+unsigned char LastByteLength(unsigned int textLength, unsigned int bitPerChar);
+
+// Conversion functions
+void ConvertTextToWordPacked(const unsigned char *input, unsigned int *output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned int textLength);
+void ConvertTextToBytePacked(const unsigned char *input, unsigned char *output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned int textLength);
+void ConvertWordPackedToText(const unsigned int *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned int alphabetSize, const unsigned int textLength);
+void ConvertBytePackedToText(const unsigned char *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned int alphabetSize, const unsigned int textLength);
+void ConvertBytePackedToCode(const unsigned char *input, unsigned char *output, const unsigned int alphabetSize, const unsigned int textLength);
+void ConvertWordPackedToBytePacked(const unsigned int *input, unsigned char *output, const unsigned int alphabetSize, const unsigned int textLength);
+void ConvertBytePackedToWordPacked(const unsigned char *input, unsigned int *output, const unsigned int alphabetSize, const unsigned int textLength);
+void ConvertTextToCode(const unsigned char *input, unsigned char *output, const unsigned char *charMap, const unsigned int textLength);
+void ConvertCodeToText(const unsigned char *input, unsigned char *output, const unsigned char *reverseCharMap, const unsigned int textLength);
+
+// Pack text with all shift
+void PackTextWithAllShift(const unsigned char *input, unsigned int **output, const unsigned char *charMap, const unsigned int alphabetSize, const unsigned int textLength);
+
+// Full load function
+unsigned int ReadTextAsWordPacked(const char *inputFileName, const unsigned char *charMap, const unsigned int alphabetSize, unsigned int *targetAddress, const unsigned int maxTextLength);
+unsigned int ReadBytePackedAsWordPacked(const char *inputFileName, const unsigned int alphabetSize, unsigned int *targetAddress, const unsigned int maxTextLength);
+void *DNALoadPacked(const char *inputFileName, unsigned int *textLength, const unsigned int convertToWordPacked);
+void DNAFreePacked(void* packedDna, const unsigned int textLength);
+
+// Save functions
+void SaveText(const char *outputFileName, const unsigned char *text, const unsigned int textLength);
+void SaveBytePacked(const char *outputFileName, const unsigned char *wordPacked, const unsigned int textLength, const unsigned int alphabetSize);
+void SaveWordPacked(const char *outputFileName, const unsigned int *wordPacked, const unsigned int textLength, const unsigned int alphabetSize);
+
+// Incremental load functions (start from end of text)
+FILE *InitialLoadPackedIncFromEnd(const char* inputFileName, unsigned char *packedOutput, const unsigned int alphabetSize, const unsigned int packedLengthPerLoad, unsigned int *textLength, unsigned int *textLengthForThisLoad);
+void LoadPackedIncFromEnd(FILE *packedFile, unsigned char *packedOutput, const unsigned int packedLengthPerLoad);
+FILE *InitialLoadTextIncFromEnd(const char* inputFileName, unsigned char *textOutput, const unsigned int textLengthPerLoad, unsigned int *textLength, unsigned int *textLengthForThisLoad);
+void LoadTextIncFromEnd(FILE *textFile, unsigned char *textOutput, const unsigned int textLengthPerLoad);
+
+
+#endif
diff --git a/Timing.c b/Timing.c
new file mode 100644
index 0000000..732c15d
--- /dev/null
+++ b/Timing.c
@@ -0,0 +1,169 @@
+/*
+
+   Timing.c		Measuring Program running time
+
+   This module contains functions for measuring program running time.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#ifdef RUSAGE
+#include <sys/resource.h>
+#else
+#ifdef TIME_BY_CLOCK
+#include <time.h>
+#else
+#include <sys/time.h>
+#endif
+#endif
+
+#include "Timing.h"
+
+void asciiTime(const char *c){
+	time_t now = time(NULL);
+	fprintf(stderr, "\n%s\n%s", c, asctime(localtime(&now)));
+}
+
+
+double setStartTime() {
+
+#ifdef RUSAGE
+
+	double usertime, systime;
+	struct rusage usage;
+
+	getrusage(RUSAGE_SELF, &usage);
+
+	usertime = (double)usage.ru_utime.tv_sec + (double)usage.ru_utime.tv_usec / 1000000.0;
+	systime = (double)usage.ru_stime.tv_sec + (double)usage.ru_stime.tv_usec / 1000000.0;
+
+	return(usertime + systime);
+
+#else
+#ifdef TIME_BY_CLOCK
+
+	return (double)clock() / (double)CLOCKS_PER_SEC;
+
+#else
+
+	struct timeval tp;
+	gettimeofday(&tp, NULL);
+	return (double)tp.tv_sec + (double)tp.tv_usec / (double)1000000;
+
+#endif
+#endif
+
+}
+
+double getElapsedTime(double startTime) {
+
+#ifdef RUSAGE
+
+	double usertime, systime;
+	struct rusage usage;
+
+	getrusage(RUSAGE_SELF, &usage);
+
+	usertime = (double)usage.ru_utime.tv_sec + (double)usage.ru_utime.tv_usec / 1000000.0;
+	systime = (double)usage.ru_stime.tv_sec + (double)usage.ru_stime.tv_usec / 1000000.0;
+
+	return (usertime + systime) - startTime;
+
+#else
+#ifdef TIME_BY_CLOCK
+
+	return (double)clock() / (double)CLOCKS_PER_SEC - startTime;
+
+#else
+
+	struct timeval tp;
+	gettimeofday(&tp, NULL);
+	return (double)tp.tv_sec + (double)tp.tv_usec / (double)1000000 - startTime;
+
+#endif
+#endif
+
+}
+
+void printElapsedTime(FILE *file, const int printHour, const int printMin, const int printSec,
+					  const int secNumberOfDecimal, const double seconds) {
+
+	printElapsedTimeNoNewLine(file, printHour, printMin, printSec, 0, secNumberOfDecimal, seconds);
+	fprintf(file, "\n");
+
+}
+
+void printElapsedTimeNoNewLine(FILE *file, const int printHour, const int printMin, const int printSec,
+					  const int secMinPrintLength, const int secNumberOfDecimal, const double seconds) {
+
+	int hour, min;
+	double sec;
+	char secondDisplay[8] = "%0.0f s";
+
+	#ifdef DEBUG
+	if (printHour && !printMin && printSec) {
+		fprintf(stderr, "printElapsedTime(): Cannot skip minute only!\n");
+		exit(1);
+	}
+	if (secNumberOfDecimal > 9) {
+		fprintf(stderr, "printElapsedTime(): secNumberOfDecimal > 9!\n");
+		exit(1);
+	}
+	#endif
+
+	secondDisplay[1] = secondDisplay[1] + (char)secMinPrintLength;
+	secondDisplay[3] = secondDisplay[3] + (char)secNumberOfDecimal;
+
+	sec = seconds;
+	min = (int)(seconds / 60);
+	if (!printSec && printMin) {
+		if (seconds - min * 60 >= 30) {
+			min++;
+		}
+	}
+	if (printMin) {
+		sec -= min * 60;
+	}
+
+	hour = min / 60;
+	if (!printMin) {
+		min = hour * 60;
+		if (min >= 30) {
+			hour++;
+		}
+	}
+	if (printHour) {
+		min -= hour * 60;
+	}
+
+	if (printHour) {
+        fprintf(file, "%d h ", hour);
+	}
+	if (printMin) {
+		fprintf(file, "%d m ", min);
+	}
+	if (printSec) {
+		fprintf(file, secondDisplay, sec);
+	}
+
+}
+
diff --git a/Timing.h b/Timing.h
new file mode 100644
index 0000000..aa4fad2
--- /dev/null
+++ b/Timing.h
@@ -0,0 +1,37 @@
+/*
+
+   Timing.h		Measuring Program running time
+
+   This module contains functions for measuring program running time.
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#ifndef __TIMING_H__
+#define __TIMING_H__
+
+void asciiTime(const char *c);
+double setStartTime();
+double getElapsedTime(double startTime);
+void printElapsedTime(FILE *file, const int printHour, const int printMin, const int printSec,
+					  const int secNumberOfDecimal, const double seconds);
+void printElapsedTimeNoNewLine(FILE *file, const int printHour, const int printMin, const int printSec,
+					  const int secMinPrintLength, const int secNumberOfDecimal, const double seconds);
+
+#endif
+
diff --git a/TypeNLimit.h b/TypeNLimit.h
new file mode 100644
index 0000000..8449502
--- /dev/null
+++ b/TypeNLimit.h
@@ -0,0 +1,78 @@
+/*
+
+   TypeNLimit.h		Miscellaneous Constants
+
+   Copyright (C) 2004, Wong Chi Kwong.
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License
+   as published by the Free Software Foundation; either version 2
+   of the License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+*/
+
+#ifndef __TYPENLIMIT_H__
+#define __TYPENLIMIT_H__
+
+#include <limits.h>
+
+#define BITS_IN_WORD 32
+#define BITS_IN_WORD_MINUS_1 31
+#define BITS_IN_WORD_MASK 0x0000001F
+#define BITS_IN_WORD_SHIFT 5
+#define BITS_IN_HALF_WORD 16
+#define BITS_IN_4_WORD 128
+#define BITS_IN_4_WORD_MINUS_1 127
+#define BITS_IN_4_WORD_SHIFT 7
+#define FIRST_BIT_MASK 0x80000000
+#define ALL_BUT_FIRST_BIT_MASK 0x7FFFFFFF
+#define ALL_ONE_MASK 0xFFFFFFFF
+#define FOUR_MULTIPLE_MASK 0xFFFFFFFC
+#define BITS_IN_BYTE 8
+#define BITS_IN_BYTE_SHIFT 3
+#define BYTES_IN_WORD 4
+
+#define TRUE    1
+#define FALSE   0
+
+// Compatibilities
+
+#ifdef _WIN32
+
+#define fopen64		fopen
+#define ftello64	ftell
+#define INLINE		__inline
+#define ALIGN_16	__declspec(align(16))
+#define ALIGN_32	__declspec(align(32))
+#define ALIGN_64	__declspec(align(64))
+#define MEMALIGN(a, b)	_aligned_malloc(a, b)
+#define FREEALIGN(a)	_aligned_free(a)
+
+#else
+
+#define fopen64		fopen
+#define ftello64	ftell
+#define INLINE		__inline
+#define ALIGN_16	__attribute__((aligned(16)))
+#define ALIGN_32	__attribute__((aligned(32)))
+#define ALIGN_64	__attribute__((aligned(64)))
+#define MEMALIGN(a, b)	_mm_malloc(a, b)
+#define FREEALIGN(a)	_mm_free(a)
+
+#endif
+
+// To make sure that LONG means 64 bit integer
+#define LONG		long long		// For 32 & 64 bits compatibility on Windows and Linux
+
+//#define MAX_FILENAME_LEN 256
+
+#endif
diff --git a/debian/README.Debian b/debian/README.Debian
deleted file mode 100644
index 82796e0..0000000
--- a/debian/README.Debian
+++ /dev/null
@@ -1,6 +0,0 @@
-soap2 for Debian
-----------------
-
-The authors of this package demand the amd64 platform.
-
- -- Steffen Moeller <moeller at debian.org>  Sat, 21 Apr 2012 22:45:08 +0200
diff --git a/debian/README.source b/debian/README.source
deleted file mode 100644
index be7a7b5..0000000
--- a/debian/README.source
+++ /dev/null
@@ -1,5 +0,0 @@
-soap2 for Debian
-----------------
-
-Only -O2 compilation is performed to keep maximal compatibility across platforms. The patch changes the Makefile accordingly.
-
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 68a3fd8..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-soapaligner (2.20-1) UNRELEASED; urgency=low
-
-  * Initial release (Closes: #669917)
-
- -- Steffen Moeller <moeller at debian.org>  Sat, 21 Apr 2012 22:45:08 +0200
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f599e28..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-10
diff --git a/debian/control b/debian/control
deleted file mode 100644
index c70765c..0000000
--- a/debian/control
+++ /dev/null
@@ -1,36 +0,0 @@
-Source: soapaligner
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Steffen Moeller <moeller at debian.org>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 10)
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/soap/soap2/trunk
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/soap/soap2/trunk
-Homepage: http://soap.genomics.org.cn/soapaligner.html
-
-Package: soapaligner
-Architecture: any
-Depends: ${shlibs:Depends},
-         ${misc:Depends}
-Description: aligner of short reads of next generation sequencers
- This package addresses a common problem in bioinformatics that has
- become routine now also in clinical research: the assembly and 
- comparison of the very long genomic DNA sequences from many
- short reads that the machines provide.
- .
- SOAPaligner/soap2 is a member of the Short Oligonucleotide Analysis
- Package (SOAP) and an updated version of SOAP software for short
- oligonucleotide alignment (soap v1). The new program features in super
- fast and accurate alignment for huge amounts of short reads generated by
- Illumina/Solexa Genome Analyzer. Compared to soap v1, it is one order
- of magnitude faster. It require only 2 minutes aligning one million
- single-end reads onto the human reference genome. Another remarkable
- improvement of SOAPaligner is that it now supports a wide range of the
- read length.
- .
- SOAPaligner/soap2 benefitted in time and space efficiency by a revolution
- in the basic data structures and algorithms used. The core algorithms and
- the indexing data structures (2way-BWT) are developed by the algorithms
- research group of the Department of Computer Science, the University
- of Hong Kong (T.W. Lam, Alan Tam, Simon Wong, Edward Wu and S.M. Yiu).
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index df2efd4..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,56 +0,0 @@
-Format: http://dep.debian.net/deps/dep5
-Upstream-Name: soap2
-Source: http://soap.genomics.org.cn/soapaligner.html
-
-Files: *
-Copyright: 2004 Wong Chi Kwong
-	   2009 BGI Shenzhen, Chang Yu <yuchang at genomics.org.cn>
-License: GPL-3.0+
-
-Files: stdaln.c
-Copyright: 2003-2006, 2008 Heng Li <lh3lh3 at gmail.com>
-License: MIT
-
-Files: debian/*
-Copyright: 2012 Steffen Moeller <moeller at debian.org>
-License: GPL-3.0+
-
-License: GPL-3.0+
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- .
- This package is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
- .
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- .
- On Debian systems, the complete text of the GNU General
- Public License version 3 can be found in "/usr/share/common-licenses/GPL-3".
-
-
-License: MIT
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
- .
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
- .
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-
diff --git a/debian/docs b/debian/docs
deleted file mode 100644
index edc0071..0000000
--- a/debian/docs
+++ /dev/null
@@ -1 +0,0 @@
-NEWS
diff --git a/debian/genomics.cn-soap2.install b/debian/genomics.cn-soap2.install
deleted file mode 100644
index ad6f5cc..0000000
--- a/debian/genomics.cn-soap2.install
+++ /dev/null
@@ -1 +0,0 @@
-soap	usr/bin
diff --git a/debian/genomics.cn-soap2.manpages b/debian/genomics.cn-soap2.manpages
deleted file mode 100644
index e6b9541..0000000
--- a/debian/genomics.cn-soap2.manpages
+++ /dev/null
@@ -1 +0,0 @@
-soap.1
diff --git a/debian/install b/debian/install
deleted file mode 100644
index ad6f5cc..0000000
--- a/debian/install
+++ /dev/null
@@ -1 +0,0 @@
-soap	usr/bin
diff --git a/debian/manpages b/debian/manpages
deleted file mode 100644
index e6b9541..0000000
--- a/debian/manpages
+++ /dev/null
@@ -1 +0,0 @@
-soap.1
diff --git a/debian/patches/compiler_options.patch b/debian/patches/compiler_options.patch
deleted file mode 100644
index fc02902..0000000
--- a/debian/patches/compiler_options.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-Index: soap2-2.20/Makefile
-===================================================================
---- soap2-2.20.orig/Makefile	2009-07-23 00:49:31.000000000 +0200
-+++ soap2-2.20/Makefile	2012-04-22 00:01:01.446041368 +0200
-@@ -6,7 +6,7 @@
- CC            = gcc
- DEBUG_FLAGS   = -g3 -Wall -O2
- PROFILE_FLAGS = -fprofile-arcs -ftest-coverage -pg 
--RELEASE_FLAGS = -msse3 -O3 -static -funroll-loops -maccumulate-outgoing-args -fomit-frame-pointer 
-+RELEASE_FLAGS = $(DEBUG_FLAGS)
- STATIC_FLAGS  = -static
- DFLAGS        = -DMAKE_TIME=\""`date`"\"
- LIBS          = -lm
-@@ -18,7 +18,7 @@
-         DFLAGS  += -DDEBUG
- #        PTHREADS = NO
- else
--        CFLAGS   = $(RELEASE_FLAGS) $(STATIC_FLAGS)
-+        CFLAGS   = $(RELEASE_FLAGS)
- endif
- 
- ifeq (YES, $(PTHREADS))
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index a122bcc..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1 +0,0 @@
-compiler_options.patch
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 79fd842..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/make -f
-# -*- makefile -*-
-
-# Uncomment this to turn on verbose mode.
-#export DH_VERBOSE=1
-
-%:
-	dh $@ 
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index ecbbfa2..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,14 +0,0 @@
-Contact: soap at genomics.org.cn
-Homepage: http://soap.genomics.org.cn/soapaligner.html
-Reference:
- DOI: 10.1093/bioinformatics/btn025
- Eprint: http://bioinformatics.oxfordjournals.org/content/24/5/713.full.pdf+html
- Journal: Genome Res.
- PMID: 18227114
- Author: Ruiqiang Li and Yingrui Li and Karsten Kristiansen and Jun Wang
- Title: "SOAP: short oligonucleotide alignment program"
- URL: http://bioinformatics.oxfordjournals.org/content/24/5/713.full
- Volume: 24
- Number: 5
- Pages: 713-714
- Year: 2008
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 66825a9..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,3 +0,0 @@
-version=4
-
-http://soap.genomics.org.cn/soapaligner.html down/SOAPaligner-v(\d[.\d]+)-src.tar.gz
diff --git a/extratools.c b/extratools.c
new file mode 100644
index 0000000..914b83c
--- /dev/null
+++ b/extratools.c
@@ -0,0 +1,227 @@
+#include "extratools.h"
+
+//This file includes the implementations of all the extra tools adding to all steps
+//e.g. Look Up Table
+//     Hash Table
+//     All things like those
+
+void LoadLookupTable(LOOKUPTABLE * lookupTable, const char * fileName, const int tableSize)  {
+    (*lookupTable).tableSize = tableSize;
+	unsigned long long NR_TOP = 1 << (tableSize * 2);
+	(*lookupTable).table = malloc(sizeof(unsigned) * NR_TOP);
+	int fin = open(fileName, O_RDONLY);
+	unsigned step = 1048576;
+	unsigned int i;
+	for (i = 0; i < NR_TOP; i += step) {
+		read(fin, (*lookupTable).table + i, step * sizeof(*(*lookupTable).table));
+	}
+	close(fin);
+}
+
+unsigned int LookupSafe(LOOKUPTABLE lookupTable, BWT * bwt,
+                        unsigned long long lKey, unsigned long long rKey,
+                        unsigned int *l, unsigned int *r) {
+
+	*l = lKey ? lookupTable.table[lKey-1]+1 : 1;
+	*r = lookupTable.table[rKey];
+
+	if (*l == bwt->inverseSa0) {
+		(*l)++;
+	}
+
+	return *r-*l+1;
+}
+
+unsigned int retrieveSA=0,retrieveHASH=0;
+double textPositionTime, textPositionTimeTotal = 0;
+unsigned int writeQIndex;
+
+double getTextPositionTime() {return textPositionTimeTotal;}
+unsigned int getSARetrieved() {return retrieveSA;}
+unsigned int getHASHRetrieved() {return retrieveHASH;}
+
+void FreeLookupTable(LOOKUPTABLE * lookupTable) {
+     free((*lookupTable).table);
+}
+
+void LoadHashTable(HASHTABLE * hashTable, const char * fileName) {
+unsigned int ttlOccurrence=0;
+unsigned int ttlItem=0;
+
+    FILE *inFile;
+    if(!(inFile = fopen(fileName, "r"))) return;
+    fread((unsigned int *)&((*hashTable).tableSize),sizeof(unsigned int),1,inFile);
+    fread((unsigned int *)&((*hashTable).a),sizeof(unsigned int),1,inFile);
+    fread((unsigned int *)&((*hashTable).b),sizeof(unsigned int),1,inFile);
+    fread((unsigned int *)&((*hashTable).prime),sizeof(unsigned int),1,inFile);
+    fread((unsigned int *)&ttlItem,sizeof(unsigned int),1,inFile);
+    fread((unsigned int *)&ttlOccurrence,sizeof(unsigned int),1,inFile);
+
+    //printf("Initializing the hash table..(n=%u)\n",(*hashTable).tableSize);
+    (*hashTable).table = (HASHCELL*) malloc(sizeof(HASHCELL)*((*hashTable).tableSize));
+    (*hashTable).itemList = (HASHITEM*) malloc(sizeof(HASHITEM)*ttlItem);
+    (*hashTable).occList = (OCC*) malloc(sizeof(OCC)*ttlOccurrence);
+    //printf("Initialized the hash table..\n");
+    unsigned int i;
+    for (i=0;i<((*hashTable).tableSize);i++) {
+        char mk;
+        fread((char *) &mk,1,1,inFile);
+        if (mk==0) {
+            //Empty cell
+            (*hashTable).table[i].index=0;
+            (*hashTable).table[i].count=0;
+        } else {
+            fread((unsigned int *)&((*hashTable).table[i].index),sizeof(unsigned int),1,inFile);
+            fread((unsigned int *)&((*hashTable).table[i].count),sizeof(unsigned int),1,inFile);
+        }
+    }
+    for (i=0;i<ttlItem;i++) {
+        fread((unsigned int *)&((*hashTable).itemList[i].l),sizeof(unsigned int),1,inFile);
+        fread((unsigned int *)&((*hashTable).itemList[i].r),sizeof(unsigned int),1,inFile);
+        fread((unsigned int *)&((*hashTable).itemList[i].occIndex),sizeof(unsigned int),1,inFile);
+    }
+    for (i=0;i<ttlOccurrence;i++) {
+        fread((unsigned int *)&((*hashTable).occList[i]),sizeof(unsigned int),1,inFile);
+    }
+    fclose(inFile);
+
+}
+
+void FreeHashTable(HASHTABLE * hashTable) {
+     free((*hashTable).table);
+     free((*hashTable).itemList);
+     free((*hashTable).occList);
+}
+
+unsigned int Hash(HASHTABLE * hashTable, unsigned int key) {
+    //g(x)=(ax+b) mod p
+    unsigned long long multipleA=(key * (*hashTable).a)% (*hashTable).prime;
+    unsigned int g=(unsigned int) (( multipleA + (*hashTable).b ) % (*hashTable).prime);
+
+    //f(x)=g(x) mod 2n
+    unsigned int f=g % ((*hashTable).tableSize);
+
+    return f;
+}
+
+HASHITEM * HashFind(HASHTABLE * hashTable, unsigned int l,unsigned int r) {
+    unsigned int hashedIndex=Hash(hashTable,l);
+    unsigned int index=(*hashTable).table[hashedIndex].index;
+    unsigned int count=(*hashTable).table[hashedIndex].count;
+
+    if (hashedIndex>=0 && hashedIndex<(*hashTable).tableSize) {
+        unsigned int i=0;
+        while (i<count && ((*hashTable).itemList[index+i].l!=l || (*hashTable).itemList[index+i].r!=r)) {
+            i++;
+        }
+        if (i>=count) return NULL;
+        return &((*hashTable).itemList[index+i]);
+    }
+    return NULL;
+}
+
+void RegisterDecoder(BWT * bwt,HASHTABLE * hashTable) {
+     occBwt=bwt;
+     occHashtable=hashTable;
+     occCollected=0;
+     retrieveSA=0;
+     retrieveHASH=0;
+     textPositionTimeTotal = 0;
+     writeQIndex = 0;
+     //occCollector = malloc(sizeof (unsigned int) * 1024*1024);
+}
+
+unsigned int allOne = 0;
+unsigned int OCCSection=0;
+
+inline int CalMismatch(const char *seq, const unsigned int *ref, const unsigned int occPosCord, const unsigned int seqLen, const unsigned int dnaLength){
+	unsigned int i, l;
+	int match = 0;
+//	fprintf(stderr, "%u\t%u\t%u\n", occPosCord, seqLen, dnaLength);
+//	fprintf(stderr, "%u\n", ref[0]);
+	for(i =0, l=occPosCord; i < seqLen && l < dnaLength; ++i, ++l){
+//		fprintf(stderr, "%d,%u ", i, l);
+		if(!(((*(seq+i))&0x3) ^ ((((*(ref+(l>>4)))>>(((~l)&0xf)<<1)))&0x3)))
+			++match;
+	}
+//	fprintf(stderr, "match %d\n", match);
+	return (seqLen-match);
+}
+
+#include <assert.h>
+
+int OCCProcess(const unsigned int l,const unsigned int r, const BWTOPT *bo, const unsigned int info, HITTABLE *hits) {
+#if TRUE
+//	fprintf(stderr, "OCC Process, n occ %d\n", r-l+1);
+#endif
+	const unsigned int cutoff = bo->cutoff;
+	if(hits->n >= bo->cutoff) return 0;
+	unsigned int n = hits->n;
+	HITITEM *hit = hits->itemList+n;
+	ChrBlock *blockList   = bo->blockList;
+	const unsigned int nblock   = bo->nblock;
+	const unsigned int seqLen   = bo->seqLen;
+	const unsigned int alnLen  = bo->alnLen;
+	const unsigned int extLen = seqLen-alnLen;
+	const unsigned int max_mm   = bo->max_mm+(info>>25 & 0x7);
+	const unsigned int *pacRef  = bo->pacRef;
+	const unsigned int dnaLength = bo->dnaLen;
+	const unsigned int strain = (info>>24)&1;
+	char *seq    =  strain?bo->rc:bo->fw;
+//	for(i=0; i<extLen; ++i) fprintf(stderr,"%c","ACGT"[*(seq+i)]);
+//	fprintf(stderr, "%d\n", extLen);
+	unsigned int occ_pos  = 0;
+	if (r-l+1 < 4) {
+			//SA
+		unsigned int k;
+		for (k=l;k<=r && n < cutoff;k++) {
+			occ_pos = BWTSaValue(occBwt,k);
+			HitInc(n);
+		}
+		int inc = n - hits->n;
+		hits->n = n;
+		return inc;
+	} else {
+			//Hash
+		HASHITEM *item = HashFind(occHashtable,l,r);
+		if (item != NULL) {
+			unsigned int k;
+			for (k=0;k<item->r-item->l+1 && n < cutoff;k++) {
+				occ_pos = occHashtable->occList[item->occIndex+k];
+				HitInc(n);
+			}
+			int inc = n - hits->n;
+			hits->n = n;
+			return inc;
+		} else {
+			unsigned int k;
+			for (k=l; k<=r && n < cutoff; k++) {
+				occ_pos = BWTSaValue(occBwt,k);
+				HitInc(n);
+			}
+			int inc = n - hits->n;
+			hits->n = n;
+			return inc;
+		}
+	}
+}
+
+void registerTPFile(FILE * filePtr,unsigned int searchMode) {
+    textPositionFile=filePtr;
+    fwrite(&searchMode,sizeof(unsigned int),1,textPositionFile);
+	allOne=(1U<<31)-1;
+	allOne<<=1;
+	allOne+=1;
+}
+
+void registerQIndex(unsigned int index) {
+    writeQIndex=index;
+    OCCSection=0;
+}
+void registerQSection() {
+    if (writeQIndex==0) {
+        fwrite(&allOne,sizeof(unsigned int),1,textPositionFile);
+    } else {
+        OCCSection++;
+    }
+}
diff --git a/extratools.h b/extratools.h
new file mode 100644
index 0000000..1bf95eb
--- /dev/null
+++ b/extratools.h
@@ -0,0 +1,165 @@
+#ifndef _EXTRATOOLS_H_
+#define _EXTRATOOLS_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "MiscUtilities.h" 
+#include "MemManager.h"
+#include "TextConverter.h"
+#include "Timing.h"
+#include "BWT.h"
+#include "kstring.h"
+#include <fcntl.h>
+#include <unistd.h>
+#define MAX_DIFF 32
+
+typedef struct LOOKUPTABLE_TYPE {
+       unsigned int tableSize;
+       unsigned int * table;
+}LOOKUPTABLE;
+
+typedef struct HASHCELL_TYPE {
+    unsigned int count;
+    unsigned int index;
+}HASHCELL;
+
+typedef struct HASHITEM_TYPE {
+    unsigned int l;
+    unsigned int r;
+    unsigned int occIndex;
+}HASHITEM;
+
+typedef unsigned int OCC;
+
+typedef struct HASHTABLE_TYPE {
+       unsigned int prime;
+       unsigned int a;
+       unsigned int b;
+       unsigned int tableSize;
+       HASHCELL * table;
+       HASHITEM * itemList;
+       OCC * occList;
+}HASHTABLE;
+
+typedef struct _HITITEM_TYPE_{
+	int info;
+	int strain;
+	int chr;
+	unsigned int occ_pos, pos, blockid;
+	int n_diff;
+	int n_mm;
+	int n_gapo, n_gape, gap_beg;
+	int n_cigar;
+	char *md;
+	unsigned short *cigar;
+}HITITEM;
+
+typedef struct _HITTABLE_TYPE_{
+	int n;
+	HITITEM *itemList;
+}HITTABLE;
+
+BWT * occBwt;
+HASHTABLE * occHashtable;
+unsigned int * occCollector;
+unsigned int occCollected;
+
+FILE * textPositionFile;
+void registerTPFile(FILE * filePtr,unsigned int searchMode);
+
+void registerQIndex(unsigned int queryIndex);
+void registerQSection();
+
+void LoadLookupTable(LOOKUPTABLE * lookupTable, const char * fileName, const int tableSize);
+void FreeLookupTable(LOOKUPTABLE * lookupTable);
+unsigned int LookupSafe(LOOKUPTABLE lookupTable, BWT * bwt,unsigned long long lKey, unsigned long long rKey,unsigned int *l, unsigned int *r);
+void LoadHashTable(HASHTABLE * hashTable, const char * fileName);
+HASHITEM * HashFind(HASHTABLE * hashTable, unsigned int l,unsigned int r);
+void FreeHashTable(HASHTABLE * hashTable);
+void RegisterDecoder(BWT * bwt,HASHTABLE * hashTable);
+
+//void OCCClean();
+//void OCCProcess(unsigned int l,unsigned int r);
+inline int altCalMM(unsigned int x);
+inline int CalMismatch(const char *,const unsigned int *,const unsigned int , const unsigned int, const unsigned int);
+int OCCProcess(unsigned int l, unsigned int r, const BWTOPT *bo, const unsigned int info, HITTABLE *hits);
+
+#define GenOCCArr(arr) do{ \
+	int occ = 0;			\
+	if (r-l+1 >= 4) {	\
+		HASHITEM *item = HashFind(occHashtable,(l),(r));	\
+		if (item==NULL) {	\
+			unsigned int k;	\
+			for (k=l;k<=r;++k) {	\
+				arr[occ++] = BWTSaValue(occBwt,k);	\
+			}\
+		} else {\
+			unsigned int k;\
+			for (k=0;k<item->r-item->l+1;++k) {\
+				arr[occ++] = occHashtable->occList[item->occIndex+k];\
+			}\
+		}\
+	} else {\
+		unsigned int k;\
+		for (k=l;k<=r;++k) {\
+			arr[occ++] = BWTSaValue(occBwt,k);\
+		}\
+	}\
+}while(0);
+
+#define OrientPacPos(){\
+	int start, end;		\
+	start = end = 0;			\
+	int l, m, h;		\
+	l = 0; h = nblock; m = nblock/2;	\
+	/*			\
+	fprintf(stderr, "pacPos %u\n",occ_pos);			\
+	*/			\
+	while(l<=h){		\
+		m = (h+l)>>1;	\
+		if((start=blockList[m].blockStart)>occ_pos){ 	\
+			h = m - 1;				\
+		}		\
+		else if((end = blockList[m].blockEnd)<occ_pos){	\
+			l = m + 1;		\
+		}			\
+		else if ((start <= (occ_pos-(strain?extLen:0))) && (end >= (occ_pos + (strain?alnLen:seqLen)))){	\
+			chr = blockList[m].chrID; 			\
+			pos = occ_pos - start + (blockList+m)->ori + 1;			\
+			blockid = m;			\
+			break;						\
+		}else break;					\
+	}				\
+}
+
+#define MAX_MD_LEN 1024
+#define HitInc(n) {	\
+	/*		\
+	if(cutoff == n)		\
+		fprintf(stderr, "max %d->n %d\n", cutoff, n);		\
+		*/		\
+	int chr = -1;int pos = -1;int blockid = 0;int mm = 0;		\
+	OrientPacPos();		\
+/*	if(chr > -1 && (pos-(strain?extLen:0)) > 0 && (!extLen || max_mm >= ((info>>24)&0x7>3?0:((info>>24)&0x7))+(mm=CalMismatch(seq, pacRef, strain?(occ_pos-extLen):(occ_pos+alnLen), extLen, errTmp, strain?0:alnLen, allErr)))) {		*/ \
+	if(chr > -1 && (pos-(strain?extLen:0)) > 0 && (max_mm >= (mm=CalMismatch(seq, pacRef, strain?(occ_pos-extLen):(occ_pos), seqLen, dnaLength)))) {		\
+		hit->strain = strain;			\
+		hit->chr = chr;		\
+		hit->pos = pos-(strain?extLen:0);		\
+		if (hit->pos < 0 )  printf("%d\t%d\n", pos, extLen);				\
+		hit->blockid = blockid;			\
+		hit->occ_pos = occ_pos-(strain?extLen:0);	\
+		hit->info = info;		\
+		hit->n_cigar = 0;			\
+		hit->n_mm = mm + ((info>>24)&0x7);			\
+		n++; hit++;			\
+	}				\
+}
+
+//void CleanDecoder();
+
+double getTextPositionTime();
+unsigned int getSARetrieved();
+unsigned int getHASHRetrieved();
+
+#endif /*_EXTRATOOLS_H_*/
+
diff --git a/kstring.c b/kstring.c
new file mode 100644
index 0000000..de06552
--- /dev/null
+++ b/kstring.c
@@ -0,0 +1,35 @@
+#include <stdarg.h>
+#include <stdio.h>
+#include "kstring.h"
+
+int ksprintf(kstring_t *s, const char *fmt, ...)
+{
+	va_list ap;
+	int l;
+	va_start(ap, fmt);
+	l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
+	va_end(ap);
+	if (l + 1 > s->m - s->l) {
+		s->m = s->l + l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+		va_start(ap, fmt);
+		l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
+	}
+	va_end(ap);
+	s->l += l;
+	return l;
+}
+
+#ifdef KSTRING_MAIN
+#include <stdio.h>
+int main()
+{
+	kstring_t *s;
+	s = (kstring_t*)calloc(1, sizeof(kstring_t));
+	ksprintf(s, "abcdefg: %d", 100);
+	printf("%s\n", s->s);
+	free(s);
+	return 0;
+}
+#endif
diff --git a/kstring.h b/kstring.h
new file mode 100644
index 0000000..398901f
--- /dev/null
+++ b/kstring.h
@@ -0,0 +1,46 @@
+#ifndef KSTRING_H
+#define KSTRING_H
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+static inline int kputs(const char *p, kstring_t *s)
+{
+	int l = strlen(p);
+	if (s->l + l + 1 >= s->m) {
+		s->m = s->l + l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+	}
+	strcpy(s->s + s->l, p);
+	s->l += l;
+	return l;
+}
+
+static inline int kputc(int c, kstring_t *s)
+{
+	if (s->l + 1 >= s->m) {
+		s->m = s->l + 2;
+		kroundup32(s->m);
+		s->s = (char*)realloc(s->s, s->m);
+	}
+	s->s[s->l++] = c;
+	s->s[s->l] = 0;
+	return c;
+}
+
+int ksprintf(kstring_t *s, const char *fmt, ...);
+
+#endif
diff --git a/r250.c b/r250.c
new file mode 100644
index 0000000..9313a24
--- /dev/null
+++ b/r250.c
@@ -0,0 +1,128 @@
+/* r250.c	the r250 uniform random number algorithm
+
+		Kirkpatrick, S., and E. Stoll, 1981; "A Very Fast
+		Shift-Register Sequence Random Number Generator",
+		Journal of Computational Physics, V.40
+
+		also:
+
+		see W.L. Maier, DDJ May 1991
+
+
+
+*/
+
+#include <limits.h>
+#include "r250.h"
+
+// static functions
+static unsigned int randlcg();
+
+
+#define MSB          0x80000000L
+#define ALL_BITS     0xffffffffL
+#define HALF_RANGE   0x40000000L
+#define STEP         7
+#define BITS         32
+
+static unsigned int r250_buffer[ 250 ];
+static int r250_index;
+
+static unsigned int randlcg(int sd)       /* returns a random unsigned integer */
+{
+		static int quotient1  = LONG_MAX / 16807L;
+		static int remainder1 = LONG_MAX % 16807L;
+
+        if ( sd <= quotient1 )
+                sd = (sd * 16807L) % LONG_MAX;
+        else
+        {
+                int high_part = sd / quotient1;
+                int low_part  = sd % quotient1;
+
+                int test = 16807L * low_part - remainder1 * high_part;
+
+                if ( test > 0 )
+                        sd = test;
+                else
+                        sd = test + LONG_MAX;
+
+        }
+
+        return sd;
+}
+
+void r250_init(int sd)
+{
+	int j, k;
+	unsigned int mask, msb;
+
+	r250_index = 0;
+	for (j = 0; j < 250; j++)      /* fill r250 buffer with BITS-1 bit values */
+		sd = r250_buffer[j] = randlcg(sd);
+
+
+	for (j = 0; j < 250; j++)	/* set some MSBs to 1 */
+		if ( (sd=randlcg(sd)) > HALF_RANGE )
+			r250_buffer[j] |= MSB;
+
+
+	msb = MSB;	        /* turn on diagonal bit */
+	mask = ALL_BITS;	/* turn off the leftmost bits */
+
+	for (j = 0; j < BITS; j++)
+	{
+		k = STEP * j + 3;	/* select a word to operate on */
+		r250_buffer[k] &= mask; /* turn off bits left of the diagonal */
+		r250_buffer[k] |= msb;	/* turn on the diagonal bit */
+		mask >>= 1;
+		msb  >>= 1;
+	}
+
+}
+
+unsigned int r250()		/* returns a random unsigned integer */
+{
+	register int	j;
+	register unsigned int new_rand;
+
+	if ( r250_index >= 147 )
+		j = r250_index - 147;	/* wrap pointer around */
+	else
+		j = r250_index + 103;
+
+	new_rand = r250_buffer[ r250_index ] ^ r250_buffer[ j ];
+	r250_buffer[ r250_index ] = new_rand;
+
+	if ( r250_index >= 249 )	/* increment pointer for next time */
+		r250_index = 0;
+	else
+		r250_index++;
+
+	return new_rand;
+
+}
+
+
+double dr250()		/* returns a random double in range 0..1 */
+{
+	register int	j;
+	register unsigned int new_rand;
+
+	if ( r250_index >= 147 )
+		j = r250_index - 147;	/* wrap pointer around */
+	else
+		j = r250_index + 103;
+
+	new_rand = r250_buffer[ r250_index ] ^ r250_buffer[ j ];
+	r250_buffer[ r250_index ] = new_rand;
+
+	if ( r250_index >= 249 )	/* increment pointer for next time */
+		r250_index = 0;
+	else
+		r250_index++;
+
+	return (double)new_rand / ALL_BITS;
+
+}
+
diff --git a/r250.h b/r250.h
new file mode 100644
index 0000000..f6190c8
--- /dev/null
+++ b/r250.h
@@ -0,0 +1,31 @@
+/* r250.h	prototypes for r250 random number generator,
+
+		Kirkpatrick, S., and E. Stoll, 1981; "A Very Fast
+		Shift-Register Sequence Random Number Generator",
+		Journal of Computational Physics, V.40
+
+		also:
+
+		see W.L. Maier, DDJ May 1991
+
+
+*/
+
+#ifndef _R250_H_
+#define _R250_H_ 1.2
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+void         r250_init(int seed);
+unsigned int r250( void );
+double       dr250( void );
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/soap.1 b/soap.1
new file mode 100644
index 0000000..4544bb3
--- /dev/null
+++ b/soap.1
@@ -0,0 +1,142 @@
+.TH SOAPaligner/soap2 1 "25 May 2009" SOAPaligner-2.1X "Bioinformatics tool"
+.SH NAME
+.PP
+SOAPaligner/soap2 \- Short Oligonucleotide Analysis Package aligner
+.SH SYNOPSIS
+.PP
+soap reference.index short_reads.fast[a|q] alignment.out [options]
+.SH DESCRIPTION
+.PP
+SOAPaligner/soap2 is a member of the SOAP (Short Oligonucleotide Analysis Package). It is an updated version of SOAP software for short oligonucleotide alignment. The new program features in super fast and accurate alignment for huge amounts of short reads generated by Illumina/Solexa Genome Analyzer. Compared to soap v1, it is one order of magnitude faster. It require only 2 minutes aligning one million single-end reads onto the human reference genome. Another remarkable improvement of  [...]
+.PP
+SOAPaligner benefitted in time and space efficiency by a revolution in the basic data structures and algorithms used.The core algorithms and the indexing data structures (2way-BWT) are developed by the algorithms research group of the Department of Computer Science, the University of Hong Kong (T.W. Lam, Alan Tam, Simon Wong, Edward Wu and S.M. Yiu).
+.SH COMMAND AND OPTIONS
+.PP
+.B soap
+-D <in.fasta.index> -a <query.file.a> [-b <query.file.b>] -o <alignment.output> [-2 <unpaired.output>] [options]
+.P
+.B OPTIONS:
+.RS
+.TP
+.B -D STR
+Prefix name for reference index [*.index]. See 
+.B APPENDIX
+How to build the reference index
+.TP
+.B -a STR
+Query file, for SE reads alignment or one end of PE reads
+.TP
+.B -b STR
+Query b file, one end of PE reads
+.TP
+.B -o STR
+Output file for alignment results
+.TP
+.B -2 STR
+Output file contains mapped but unpaired reads when do PE alignment
+.TP
+.B -u STR
+Output file for unmapped reads, [none]
+.TP
+.B -m INT
+Minimal insert size INT allowed for PE, [400]
+.TP
+.B -x INT
+Maximal insert size INT allowed for PE, [600]
+.TP
+.B -n INT
+Filter low quality reads containing more INT bp Ns, [5]
+.TP
+.B -t
+Output reads id instead reads name, [none]
+.TP
+.B -r INT
+How to report repeat hits, 0=none; 1=random one; 2=all, [1]
+.TP
+.B -R
+RF alignment for long insert size(>= 2k bps) PE data, [none] FR alignment
+.TP
+.B -l INT
+For long reads with high error rate at 3'-end, those can't align whole length, then first align 5' INT bp subsequence as a seed, [256] use whole length of the read
+.TP
+.B -s INT
+minimal alignment length (for soft clip) 
+.TP
+.B -v INT
+Totally allowed mismatches in one read, when use subsequence as a seed, [5]
+.TP
+.B -g INT
+Allow gap size in one read, [0]
+.TP
+.B -M INT
+Match mode for each read or the seed part of read, which shouldn't contain more than 2 mismaches, [4]
+.RS
+.TP
+0: exact match only
+.TP
+1: 1 mismatch match only
+.TP
+2: 2 mismatch match only
+.TP
+4: find the best hits
+.RE
+.B -p INT
+Multithreads, n threads, [1]
+.SH OUTPUT FORMAT
+.PP
+SOAP2 output format contains following column information:
+.PP
+1. reads name / reads ID (if -t is available)
+.P
+2. reads sequence (if read align to reverse strand, here is the reverse sequence of orignal read)
+.P
+3. quality sequence (if input is fasta reads, the column will be all 'h', and the sequence is backward if reads mapping reverse )
+.P
+4. 
+.SH APPENDIX
+.PP
+Before use soap2 to do alignment, the reference index must be generated by 2bwt-builder.
+.P
+.RS
+.B 2bwt-builder
+<reference.fasta>
+.P
+.B NOTE:
+1. the reference input should only be FASTA format; 2. the program wil auto generate the index files in the directory where the fasta file is located, so confirm the permission at first.
+.RE
+.SH ENVIRONMENT
+.PP
+The datastructure is imcompatible with 32bit, so it can't be migrated on any 32bit platforms.
+Due to using the MMX instruction to opitimize parts of code, the current version can only run on 
+.B x86_64 platform.
+We will provide a universal version for most of the 64bit platform later.
+.TP
+.B HARDWARE REQUIREMENT
+.RS
+1.8Gb RAM (for a genome as large as human's)
+.P
+2.at least 8Gb hard disk to store index (for a genome as large as human's)
+.RE
+.TP
+.B SYSTEM REQUIREMENT
+.RS
+Linux x86_64
+.RE
+.SH SEE ALSO
+.PP
+Website for SOAP <http://soap.genomics.org.cn>,
+.P
+Google Group for SOAP <http://groups.google.com/group/bgi-soap>
+.TP
+.BR Publication:
+"SOAP: short oligonucleotide alignment program" (2008) BIOINFORMATICS,Vol. 24 no.5 2008, pages 713\-714
+.SH ATHOUR
+.PP
+.B BGI Shenzhen
+SOAP team. The core algorithm Bidirect-BWT is wrotten by Prof. T.W. Lam and his team at HongKong University.
+.SH REPORT BUGS
+.PP
+Report bugs to <soap at genomics.org.cn>
+.SH ACKNOWLEDGEMENTS
+.PP
+We appreciate Prof. T.W. Lam, Alan Tam, Simon Wong, Edward Wu and S.M. Yiu prominent work on Bidirect-BWT.
diff --git a/soap.c b/soap.c
new file mode 100644
index 0000000..7ba12b5
--- /dev/null
+++ b/soap.c
@@ -0,0 +1,391 @@
+/* * =============================================================================
+ *
+ *       Filename:  soap.c
+ *
+ *    Description:  
+ *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include "Match.h"
+#include "HSP.h"
+#include "BWT.h"
+#include "TypeNLimit.h"
+#include "extratools.h"
+#include "MemManager.h"
+
+#ifndef MAKE_TIME
+#define MAKE_TIME "00:00:00"
+#endif
+
+#ifndef VID
+//#define VID "2.11"
+//#define VID "2.15"		// with gap and extend cigar
+//#define VID "2.16"		// -r 2 segment fault
+//#define VID "2.17"		// mm in rc
+//#define VID "2.18"		// 07/05/2009
+//#define VID "2.19"			// 13/07/2009
+#define VID "2.20"			// 23/07/2009 gap missed in forward strand
+#endif
+
+#define MAX_FILENAME_LEN 1024
+#define MAX_SUFFIX_LEN 255
+
+const char *PROGRAM = "SOAPaligner/soap2";
+const char *AUTHOR  = "BGI shenzhen";
+const char *VERSION = VID;			/*release date: 14/01/2009*/
+const char *CONTACT = "soap at genomics.org.cn";
+
+char readAFileName[MAX_FILENAME_LEN]                           = "";
+char readBFileName[MAX_FILENAME_LEN]                           = "";
+char outFileName[MAX_FILENAME_LEN]                             = "";
+char outUnpairFileName[MAX_FILENAME_LEN]                       = "";
+char outUnmapFileName[MAX_FILENAME_LEN]                        = "";
+
+char database_prefix[MAX_FILENAME_LEN]                         = "";
+
+char AnnotationSuffix[MAX_SUFFIX_LEN]                          = ".ann";
+char PackedDNASuffix[MAX_SUFFIX_LEN]                           = ".pac";
+char BWTCodeSuffix[MAX_SUFFIX_LEN]                             = ".bwt";
+char BWTOccValueSuffix[MAX_SUFFIX_LEN]                         = ".fmv";
+char SaValueSuffix[MAX_SUFFIX_LEN]                             = ".sa";
+
+char RevPackedDNASuffix[MAX_SUFFIX_LEN]                        = ".rev.pac";
+char RevBWTCodeSuffix[MAX_SUFFIX_LEN]                          = ".rev.bwt";
+char RevBWTOccValueSuffix[MAX_SUFFIX_LEN]                      = ".rev.fmv";
+
+char LookupTableSuffix[MAX_SUFFIX_LEN]                         = ".lkt";
+char RevLookupTableSuffix[MAX_SUFFIX_LEN]                      = ".rev.lkt";
+char HighOccHashTableSuffix[MAX_SUFFIX_LEN]                    = ".hot";
+
+// DatabaseFiles parameters
+char AnnotationFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]       = "";
+char PackedDNAFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]        = "";
+char BWTCodeFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]          = "";
+char BWTOccValueFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]      = "";
+char SaValueFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]          = "";
+
+//For Reversed BWT
+char RevPackedDNAFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]     = "";
+char RevBWTCodeFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]       = "";
+char RevBWTOccValueFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]   = "";
+
+//For Extra Data Structures
+char LookupTableFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]      = "";
+char RevLookupTableFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN]   = "";
+char HighOccHashTableFileName[MAX_FILENAME_LEN+MAX_SUFFIX_LEN] = "";
+unsigned int LookUpTableSize  = 13;
+unsigned int RevLookUpTableSize = 13;
+
+// Memory parameters
+	/*
+	int PoolSize = 20971520;				// 2M  - fixed; not configurable through ini
+	int WorkingMemorySize = 67108864;	// 64M - good for 8M hit; configurable through ini
+	int AlignmentMemorySize = 4194304;	// 4M
+//*/
+
+int PoolSize            = 20971520;
+int WorkingMemorySize   = 1073741824;
+int AlignmentMemorySize = 536870912;
+
+unsigned char charMap[256];
+unsigned char complementMap[256];
+
+static SOAPOPT *opt;
+
+void Usage(void) {
+	fprintf(stdout, "\nProgram: %s\n", PROGRAM);
+	fprintf(stdout, "Compile Date: "
+			MAKE_TIME"\n");
+	fprintf(stdout, "Author:  %s\n", AUTHOR);
+	fprintf(stdout, "Version: %s\n", VERSION);
+	fprintf(stdout, "Contact: %s\n", CONTACT);
+	fprintf(stdout, "\nUsage:\tsoap [options]\n");
+	fprintf(stdout,	"\t-a  <str>   query a file, *.fq, *.fa\n");
+	fprintf(stdout,	"\t-b  <str>   query b file\n");
+	fprintf(stdout,	"\t-D  <str>   reference sequences indexing table, *.index format\n");
+	fprintf(stdout,	"\t-o  <str>   output alignment file(txt)\n");
+	fprintf(stdout, "\t-M  <int>   match mode for each read or the seed part of read, which shouldn't contain more than 2 mismaches, [4]\n"
+			"\t            0: exact match only\n"
+			"\t            1: 1 mismatch match only\n"
+			"\t            2: 2 mismatch match only\n"
+			"\t            4: find the best hits\n");
+	fprintf(stdout,	"\t-u  <str>   output unmapped reads file\n");
+	fprintf(stdout, "\t-t          output reads id instead reads name, [none]\n");
+	fprintf(stdout, "\t-l  <int>   align the initial n bps as a seed [%d] means whole length of read\n", opt->aln_len);
+	fprintf(stdout,	"\t-n  <int>   filter low-quality reads containing >n Ns before alignment, [%d]\n", opt->ns);
+	fprintf(stdout,	"\t-r  [0,1,2] how to report repeat hits, 0=none; 1=random one; 2=all, [%d]\n", opt->rr);
+	fprintf(stdout,	"\t-m  <int>   minimal insert size allowed, [%d]\n", opt->min_ins); //minimal insert size
+	fprintf(stdout,	"\t-x  <int>   maximal insert size allowed, [%d]\n", opt->max_ins); //max_insert_size
+	fprintf(stdout,	"\t-2  <str>   output file of unpaired alignment hits\n");
+	fprintf(stdout,	"\t-v  <int>   maximum number of mismatches allowed on a read. [%d] bp\n", opt->max_mm);
+	fprintf(stdout,	"\t-s  <int>   minimal alignment length (for soft clip) [%d] bp\n", opt->min_len);
+//	fprintf(stdout,	"\t-U          only find uniq mapped reads with n mismatches for single-end, [%d]\n", opt->uniq);
+//	fprintf(stdout,	"\t-A          report all mismatches reads in SOAP Format, default [none] report number \n");
+	fprintf(stdout,	"\t-g  <int>   one continuous gap size allowed on a read. [%d] bp\n", opt->gap_len );//max_gap, allowed_gap
+	fprintf(stdout,	"\t-R          for long insert size of pair end reads RF. [none](means FR pair)\n");
+	fprintf(stdout,	"\t-e  <int>   will not allow gap exist inside n-bp edge of a read, default=5\n");     //gap_edge
+//fprintf(stdout,	"\t\t-z  <char>  initial quality, default=@ [Illumina is using '@', Sanger Institute is using '!']\n");//zero_quality
+//	fprintf(stdout,	"\t\t-c  [0,1,2] do alignment on which reference chain? 0:both; 1:forward only; 2:reverse only. default=%d");//chains
+#ifdef PTHREADS
+	fprintf(stdout,	"\t-p  <int>   number of processors to use, [%d]\n", opt->nthreads);   //number of processors
+#endif
+	fprintf(stdout,	"\n\t-h          this help\n\n");
+	exit(1);
+	//*/
+}		/* -----  end of function Usage  ----- */
+
+SOAPOPT *OptIni(){
+	SOAPOPT *o;
+	o = (SOAPOPT *) malloc (1 * sizeof(SOAPOPT));
+	o->fast     = FASTQ;
+	o->aln_len  = 256;
+	o->ns       = 5;
+	o->max_mm   = 5;
+	o->gap_len  = 5;
+	o->gap_fb   = 5;
+	o->nthreads = 1;
+	o->min_ins  = 400;
+	o->max_ins  = 600;
+	o->unmapped = 0;
+	o->rr       = 1;
+	o->gap_len  = 0;
+	o->pe       = FALSE;
+	o->cutoff   = 1;
+	o->mode     = 4;
+	o->id       = FALSE;
+	o->FR       = TRUE;
+	o->allErr   = FALSE;
+	o->min_len  = 255;
+	o->uniq     = 0;
+	return o;
+}
+
+void ParseOpt(int argc, char *argv[]){
+	char c;
+	while((c = getopt(argc, argv, "a:b:D:o:2:u:m:x:M:AK:l:v:U:g:w:i:e:q:c:Rz:r:B:s:p:tn:h"))!=-1){
+		switch(c){
+			//basic IO
+			case 'a':
+				snprintf(readAFileName, MAX_FILENAME_LEN, "%s", optarg);
+				break;
+			case 'D':
+				snprintf(database_prefix, MAX_FILENAME_LEN, "%s", optarg);
+				break;
+			case 'o':
+				snprintf(outFileName, MAX_FILENAME_LEN, "%s", optarg);
+				break;
+			case 'b':
+				opt->pe = TRUE;
+				snprintf(readBFileName, MAX_FILENAME_LEN, "%s", optarg);
+				break;
+			case '2':
+				snprintf(outUnpairFileName, MAX_FILENAME_LEN, "%s", optarg);
+				break;
+			case 'm':
+				opt->min_ins = atoi(optarg);
+				break;
+			case 'x':
+				opt->max_ins = atoi(optarg);
+				break;
+
+//advance options
+			case 'u':
+				opt->unmapped = TRUE;
+				snprintf(outUnmapFileName, MAX_FILENAME_LEN, "%s", optarg);
+				break;
+			case 'l':
+				opt->aln_len = atoi(optarg);
+				break;
+			case 'M':
+				{
+					opt->mode = atoi(optarg);
+					
+					if(opt->mode == 4) opt->cutoff = 1;
+					else if(opt->mode == 5) opt->cutoff = MAX_ALN;
+					break;
+				}
+			case 'K':
+				opt->cutoff = min(atoi(optarg), MAX_ALN);
+				break;
+			case 'v':
+				opt->max_mm = min(atoi(optarg), MAX_MISMATCH);
+				break;
+			case 'g':
+				opt->gap_len = min(atoi(optarg), MAX_GAP_LEN);
+				break;
+			case 'e':
+				opt->gap_fb = atoi(optarg);
+				break;
+			case 'R':
+				opt->FR = 0;
+				break;
+			case 'z':
+				opt->zero_qual = atoi(optarg);
+				break;
+			case 'r':
+				opt->rr = atoi(optarg);
+				break;
+			case 't':
+				opt->id = 1;
+				break;
+			case 'n':
+				opt->ns = atoi(optarg);
+				break;
+			case 'B':
+				opt->bisulfite = atoi(optarg);
+				break;
+			case 'U':
+				opt->uniq = atoi(optarg);
+				break;
+			case 's':
+				opt->min_len = atoi(optarg);
+				break;
+#ifdef PTHREADS
+#define MAX_PTHREADS 20
+			case 'p':
+				opt->nthreads = min(atoi(optarg), MAX_PTHREADS);
+				break;
+#endif
+			case 'c':
+				opt->chain = atoi(optarg);
+				break;
+			//unrecognizable input
+			case 'h':
+			case '?':
+				Usage();
+		}
+	}
+}
+
+void FileNameIni(){
+	snprintf(AnnotationFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, AnnotationSuffix);
+	snprintf(PackedDNAFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, PackedDNASuffix);
+	snprintf(BWTCodeFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, BWTCodeSuffix);
+	snprintf(BWTOccValueFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, BWTOccValueSuffix);
+	snprintf(SaValueFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, SaValueSuffix);
+	snprintf(RevPackedDNAFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, RevPackedDNASuffix);
+	snprintf(RevBWTCodeFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, RevBWTCodeSuffix);
+	snprintf(RevBWTOccValueFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s",database_prefix , RevBWTOccValueSuffix);
+	snprintf(LookupTableFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, LookupTableSuffix);
+	snprintf(RevLookupTableFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, RevLookupTableSuffix);
+	snprintf(HighOccHashTableFileName, MAX_FILENAME_LEN+MAX_SUFFIX_LEN, "%s%s", database_prefix, HighOccHashTableSuffix);
+}
+#define MODE S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH
+void FileTest(FILEDS *fds){
+	if((fds->ifdA=open(readAFileName, O_RDONLY))==-1){
+		fprintf(stderr, "Query File Error: Can't read  %s\n", readAFileName);
+		exit(EXIT_FAILURE);
+	}
+	if((fds->ofdAln=creat(outFileName, MODE))==-1){
+		fprintf(stderr, "Output File Error: Can't write %s\n",outFileName);
+		exit(EXIT_FAILURE);
+	}
+	if(opt->pe){
+		if((fds->ifdB=open(readBFileName, O_RDONLY))==-1){
+			fprintf(stderr, "Query File Error: Can't read  %s\n", readBFileName);
+			exit(EXIT_FAILURE);
+		}
+		if( (fds->ofdSe=creat(outUnpairFileName, MODE))==-1){
+			fprintf(stderr, "Output File Error: Can't write %s\n", outUnpairFileName);
+			exit(EXIT_FAILURE);
+		}
+	}
+	fprintf(stderr, "Query File a: %s\n", readAFileName);
+	if (opt->pe)fprintf(stderr, "Query File b: %s\n", readBFileName);
+	fprintf(stderr, "Output File: %s\n", outFileName);
+	if (opt->pe)fprintf(stderr, "             %s\n", outUnpairFileName);
+	if(opt->unmapped){
+		if ((fds->ofdUn=creat(outUnmapFileName, MODE))==-1){
+			fprintf(stderr, "Output File Error: Can't write %s\n", outUnmapFileName);
+			exit(EXIT_FAILURE);
+		} else {
+			fprintf(stderr, "             %s\n", outUnmapFileName);
+		}
+	}
+}
+
+int main(int argc, char *argv[]){
+
+	opt = OptIni();
+	if (argc < 3) {
+		Usage();
+	}
+	
+	HSP *hsp;
+
+	BWT *bwt;
+	BWT *rev_bwt;
+
+	LOOKUPTABLE lookup;
+	LOOKUPTABLE rev_lookup;
+
+	HASHTABLE hashtable;
+
+	MMPool *mmPool;
+
+	MMMasterInitialize(3, 0, FALSE, NULL);
+	mmPool = MMPoolCreate(PoolSize);
+
+	HSPFillCharMap(charMap);
+	HSPFillComplementMap(complementMap);
+	asciiTime("Begin Program SOAPaligner/soap2");
+	double startTime = setStartTime();
+	double elapsedTime = 0;
+	double loadTime=0;
+	
+	ParseOpt(argc, argv);
+	FileNameIni();
+	fprintf(stderr, "Reference: %s\n", database_prefix);
+	FILEDS fds;
+	FileTest(&fds);
+	fprintf(stderr, "Load Index Table ...\n");
+//*
+	hsp = HSPLoad(mmPool, PackedDNAFileName, AnnotationFileName);
+	bwt = BWTLoad(mmPool, BWTCodeFileName, BWTOccValueFileName, SaValueFileName, NULL, NULL, NULL);
+	rev_bwt = BWTLoad(mmPool, RevBWTCodeFileName, RevBWTOccValueFileName, NULL, NULL, NULL, NULL);
+	LoadLookupTable(&lookup,LookupTableFileName,LookUpTableSize);
+	LoadLookupTable(&rev_lookup,RevLookupTableFileName,RevLookUpTableSize);
+	LoadHashTable(&hashtable,HighOccHashTableFileName);
+	RegisterDecoder(bwt,&hashtable);
+	loadTime = getElapsedTime(startTime);
+	//*/
+	fprintf(stderr, "Load Index Table OK\n");
+	fprintf(stderr, "Begin Alignment ...\n");
+	MatchProcess(&fds, bwt, rev_bwt, &lookup, &rev_lookup, hsp, opt);
+	//*/
+	elapsedTime = getElapsedTime(startTime);
+	fprintf(stderr, "Total Elapsed Time:       %9.2f\n"
+			"      - Load Index Table: %9.2f\n"
+			"      - Alignment:        %9.2f\n", elapsedTime, loadTime, (elapsedTime-loadTime));
+
+	FreeLookupTable(&lookup);
+	FreeLookupTable(&rev_lookup);
+	FreeHashTable(&hashtable);
+	HSPFree(mmPool, hsp);
+	BWTFree(mmPool, bwt);
+	BWTFree(mmPool, rev_bwt);
+	MMPoolFree(mmPool);
+	close(fds.ifdA);close(fds.ofdAln);
+	if(opt->pe){close(fds.ifdB); close(fds.ofdSe);}
+	if(opt->unmapped)close(fds.ofdUn);
+	free(opt);
+
+	asciiTime("SOAPaligner/soap2 End");
+	fprintf(stderr, "\n");
+	return EXIT_SUCCESS;
+}				/* ----------  end of function main  ---------- */
diff --git a/soap.h b/soap.h
new file mode 100644
index 0000000..6f15d89
--- /dev/null
+++ b/soap.h
@@ -0,0 +1,24 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  soap.h
+ *
+ *    Description:  
+ *
+ *        Version:  1.0
+ *        Created:  2009年02月16日 02时23分25秒
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+
+#ifndef __soap_H__
+#define __soap_H__
+
+
+#endif             /*           soap.h         */
diff --git a/soap.man b/soap.man
new file mode 100644
index 0000000..7a44042
--- /dev/null
+++ b/soap.man
@@ -0,0 +1,145 @@
+SOAPaligner/soap2(1)          Bioinformatics tool         SOAPaligner/soap2(1)
+
+
+
+NNAAMMEE
+       SOAPaligner/soap2 - Short Oligonucleotide Analysis Package aligner
+
+SSYYNNOOPPSSIISS
+       soap reference.index short_reads.fast[a|q] alignment.out [options]
+
+DDEESSCCRRIIPPTTIIOONN
+       SOAPaligner/soap2 is a member of the SOAP (Short Oligonucleotide Analy-
+       sis Package). It is an updated  version  of  SOAP  software  for  short
+       oligonucleotide  alignment.  The new program features in super fast and
+       accurate alignment for huge amounts of short reads generated  by  Illu-
+       mina/Solexa  Genome  Analyzer.  Compared to soap v1, it is one order of
+       magnitude faster. It require only 2 minutes aligning one  million  sin-
+       gle-end  reads  onto  the  human  reference  genome. Another remarkable
+       improvement of SOAPaligner is that it now supports a wide range of  the
+       read length.
+
+       SOAPaligner  benefitted in time and space efficiency by a revolution in
+       the basic data structures and algorithms used.The core  algorithms  and
+       the indexing data structures (2way-BWT) are developed by the algorithms
+       research group of the Department of Computer Science, the University of
+       Hong Kong (T.W. Lam, Alan Tam, Simon Wong, Edward Wu and S.M. Yiu).
+
+CCOOMMMMAANNDD AANNDD OOPPTTIIOONNSS
+       ssooaapp  -D  <in.fasta.index>  -a  <query.file.a>  [-b  <query.file.b>] -o
+       <alignment.output> [-2 <unpaired.output>] [options]
+
+       OOPPTTIIOONNSS::
+
+              --DD SSTTRR Prefix name for reference index [*.index].  See  AAPPPPEENNDDIIXX
+                     How to build the reference index
+
+              --aa SSTTRR Query file, for SE reads alignment or one end of PE reads
+
+              --bb SSTTRR Query b file, one end of PE reads
+
+              --oo SSTTRR Output file for alignment results
+
+              --22 SSTTRR Output file contains mapped but unpaired reads when do PE
+                     alignment
+
+              --uu SSTTRR Output file for unmapped reads, [none]
+
+              --mm IINNTT Minimal insert size INT allowed for PE, [400]
+
+              --xx IINNTT Maximal insert size INT allowed for PE, [600]
+
+              --nn IINNTT Filter low quality reads containing more INT bp Ns, [5]
+
+              --tt     Output reads id instead reads name, [none]
+
+              --rr IINNTT How  to  report repeat hits, 0=none; 1=random one; 2=all,
+                     [1]
+
+              --RR     RF alignment for long insert size(>=  2k  bps)  PE  data,
+                     [none] FR alignment
+
+              --ll IINNTT For  long  reads  with  high  error rate at 3'-end, those
+                     can't align whole length, then first align 5' INT bp sub-
+                     sequence as a seed, [256] use whole length of the read
+
+              --ss IINNTT minimal alignment length (for soft clip)
+
+              --vv IINNTT Totally  allowed  mismatches in one read, when use subse-
+                     quence as a seed, [5]
+
+              --gg IINNTT Allow gap size in one read, [0]
+
+              --MM IINNTT Match mode for each read or the seed part of read,  which
+                     shouldn't contain more than 2 mismaches, [4]
+
+                     0: exact match only
+
+                     1: 1 mismatch match only
+
+                     2: 2 mismatch match only
+
+                     4: find the best hits
+              --pp IINNTT Multithreads, n threads, [1]
+
+OOUUTTPPUUTT FFOORRMMAATT
+       SOAP2 output format contains following column information:
+
+       1. reads name / reads ID (if -t is available)
+
+       2. reads sequence (if read align to reverse strand, here is the reverse
+       sequence of orignal read)
+
+       3. quality sequence (if input is fasta reads, the column  will  be  all
+       'h', and the sequence is backward if reads mapping reverse )
+
+       4.
+
+AAPPPPEENNDDIIXX
+       Before use soap2 to do alignment, the reference index must be generated
+       by 2bwt-builder.
+
+              22bbwwtt--bbuuiillddeerr <reference.fasta>
+
+              NNOOTTEE:: 1. the reference input should only be FASTA format; 2. the
+              program wil auto generate the index files in the directory where
+              the fasta file is located, so confirm the permission at first.
+
+EENNVVIIRROONNMMEENNTT
+       The datastructure is imcompatible with 32bit, so it can't  be  migrated
+       on  any 32bit platforms.  Due to using the MMX instruction to opitimize
+       parts of code, the current version can only run on xx8866__6644 ppllaattffoorrmm..  We
+       will  provide a universal version for most of the 64bit platform later.
+
+       HHAARRDDWWAARREE RREEQQUUIIRREEMMEENNTT
+              1.8Gb RAM (for a genome as large as human's)
+
+              2.at least 8Gb hard disk to store index (for a genome  as  large
+              as human's)
+
+       SSYYSSTTEEMM RREEQQUUIIRREEMMEENNTT
+              Linux x86_64
+
+SSEEEE AALLSSOO
+       Website for SOAP <http://soap.genomics.org.cn>,
+
+       Google Group for SOAP <http://groups.google.com/group/bgi-soap>
+
+       PPuubblliiccaattiioonn::
+              "SOAP: short oligonucleotide alignment program" (2008) BIOINFOR-
+              MATICS,Vol. 24 no.5 2008, pages 713-714
+
+AATTHHOOUURR
+       BBGGII SShheennzzhheenn SOAP team. The core algorithm Bidirect-BWT is  wrotten  by
+       Prof. T.W. Lam and his team at HongKong University.
+
+RREEPPOORRTT BBUUGGSS
+       Report bugs to <soap at genomics.org.cn>
+
+AACCKKNNOOWWLLEEDDGGEEMMEENNTTSS
+       We  appreciate Prof. T.W. Lam, Alan Tam, Simon Wong, Edward Wu and S.M.
+       Yiu prominent work on Bidirect-BWT.
+
+
+
+SOAPaligner-2.1X                  25 May 2009             SOAPaligner/soap2(1)
diff --git a/soapio.c b/soapio.c
new file mode 100644
index 0000000..a1aa146
--- /dev/null
+++ b/soapio.c
@@ -0,0 +1,302 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  soapio.c
+ *
+ *    Description:  
+ *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+#include "soapio.h"
+#include <assert.h>
+
+#define BUF_SIZE 0xF00000
+#define BUF_PER_LINE 0x800
+       
+void FreeMultiSeq(MULTISEQ *mseqs){
+	int i; 
+	int n = mseqs->n;
+	ALNSEQ *p ;
+	for(i=0; i<n; ++i){
+		p = mseqs->seqList+i;
+		free(p->name); free(p->seq); free(p->rc); free(p->qual); free(p->rcqual);
+		if(p->report>0){
+			free(p->itemList->md);
+			free(p->itemList->cigar);
+			free(p->itemList);
+		}
+	}
+}
+
+static inline  char * reverse(const  char *seq, int len){
+	char *rc = ( char *)malloc(sizeof( char) * (len+1));
+	int i;
+	for(i=len;i>0;--i) *(rc+len-i) = *(seq+i-1);
+	return rc;
+}
+
+#define SEQDUP(dest, ori, pe) {         \
+	dest->flag = pe;		\
+	dest->tid = -1;			\
+	dest->id  = id;			\
+	dest->len = ori.l;		\
+	dest->nhits = dest->report = 0;		\
+	dest->ns = ori.ns;		\
+	dest->seq = (char *)malloc(sizeof(char)*ori.l);		\
+	memcpy(dest->seq, ori.seq, ori.l);		\
+	dest->rc = reverse(ori.rc, ori.l);		\
+	dest->qual = strdup(ori.qual);		\
+	dest->rcqual = reverse(ori.qual, ori.l);		\
+	dest->rcqual[ori.l] = '\0';			\
+	dest->name = strdup(ori.name);		\
+	dest++;			\
+}
+
+int GenMultiReads(const HSP *hsp, MULTISEQ *mseqs, const int len, const int pe, unsigned int *start, int *nb){
+	const unsigned int *pacRef = hsp->packedDNA;
+	char **chrName = hsp->chrName;
+	const unsigned int refLen = hsp->dnaLength;
+	const ChrBlock  *blockList = hsp->blockList;
+	ALNSEQ *alnSeq;
+	alnSeq = mseqs->seqList;
+	int num, i, j;
+	num = i = j = 0;
+	unsigned int st = *start;
+	int n = *nb ;
+	unsigned int ori = (blockList+n)->ori;
+	unsigned int blockStart = (blockList+n)->blockStart;
+	unsigned int blockEnd = (blockList+n)->blockEnd;
+	while (num < MAX_MULTI_READS && st < refLen-len) {
+		alnSeq->name = (char *)malloc(sizeof(char)*MAX_NAME_LEN);
+		alnSeq->seq = (char *)malloc(sizeof(char)*(len+1));
+		alnSeq->rc = (char *)malloc(sizeof(char)*(len+1));
+		alnSeq->qual = (char *)malloc(sizeof(char)*(len+1));
+		for (i=0; i<len; ++i) {
+			*(alnSeq->seq+i) = ((*(pacRef+((st+i)>>4)))>>(30-((st+i)&0xf)*2))&0x3;
+			*(alnSeq->rc+len-i-1) = (~((*(alnSeq->seq+i))&0x3))&0x3;
+			*(alnSeq->qual+i) = 'h';
+		}
+		if (st+len-1>blockEnd) {
+			n++;
+			ori = (blockList+n)->ori;
+			blockStart = (blockList+n)->blockStart;
+			blockEnd = (blockList+n)->blockEnd;
+			continue;
+		}
+		sprintf(alnSeq->name, ">%s_%d", chrName[(blockList+n)->chrID], st-blockStart+ori+1);
+		alnSeq->name[strlen(alnSeq->name)] = '\0';
+		alnSeq->report = 0;
+		alnSeq->nhits = 0;
+		alnSeq->qual[i] = '\0';
+		alnSeq->ns = 0;
+		alnSeq->tid = -1;
+		alnSeq->flag = 0;
+		alnSeq->len = len;
+		alnSeq->id = st++;
+		num++;
+		alnSeq++;
+	}
+	*start = st;
+	*nb = n;
+	mseqs->n = num;
+	return num;
+}
+
+int GetMultiSeq (InFileList *ifp, MULTISEQ *mseqs, const int pe, int(*get_read)(FILE * , seq_t * , const int)){
+#ifdef DEBUG
+//	fprintf(stderr, "Get Multi Seqs\n");
+#endif
+	ALNSEQ *alnSeq;
+	alnSeq= mseqs->seqList;
+	int num, len, id;
+	FILE * ifpA, * ifpB;
+	num = 0;
+	ifpA = ifp->ifpA; ifpB = ifp->ifpB;
+	id  = ifp->id;
+	seq_t tmp;
+	tmp.max = tmp.l = 0; 
+	tmp.seq = tmp.rc = tmp.qual = NULL;
+	while(num < MAX_MULTI_READS){
+		tmp.ns = 0;
+		if (feof(ifpA)||(pe && feof(ifpB))) break;
+		if ((len=get_read(ifpA, &tmp, TRUE))>0){
+			SEQDUP(alnSeq, tmp, pe);
+			++num;
+			if(pe &&(len=get_read(ifpB, &tmp, TRUE))>0){
+				SEQDUP(alnSeq, tmp, pe);
+				++num ;
+			}
+			++id;
+		}
+	}
+	mseqs->n = num;
+	ifp->id = id;
+//	fprintf(stderr, "%d\n", num);
+	/*
+#ifdef DEBUG
+	int j;
+	fprintf(stderr, "len :%d\n", len);
+	fprintf(stderr, "fw\n");
+	for(j = 0; j<len; j++) fprintf(stderr, "%d", *(tmp.seq+j));
+	fprintf(stderr, "\n");
+	fprintf(stderr, "rc\n");
+	for(j = 0; j<len; j++) fprintf(stderr, "%d", *(tmp.rc+j));
+	fprintf(stderr, "\n");
+	alnSeq--;
+	for(j = 0; j < alnSeq->len; j++) fprintf(stderr, "rc %d", *(alnSeq->rc+j));
+	fprintf(stderr, "\n");
+	fprintf(stderr, "%s\n%s\n", alnSeq->name, alnSeq->qual);
+	fprintf(stderr, "soap get multisequences ...\n");
+#endif
+	exit(0);
+	//*/
+	free(tmp.seq); free(tmp.qual);free(tmp.rc);
+	return num;
+}
+
+#define SOAPOUT(file){	\
+/*	fprintf (stderr, "generate format\n");		\
+	*/			\
+	int k = 0;		\
+	if(o->id)		\
+		ksprintf(str, "%d\t", alnSeq->id);\
+	else 		\
+		ksprintf(str, "%s\t", alnSeq->name);	\
+	int n_cigar = hit->n_cigar;			\
+	int beg=0, end=len;			\
+	if(hit->cigar[0]>>14 == 3) beg = hit->cigar[0]&0x3ff;		\
+	if(hit->cigar[n_cigar-1]>>14 == 3) end = len - (hit->cigar[n_cigar-1]&0x3ff);		\
+	if(strain){		\
+		for(k=beg; k<end; ++k){		\
+			kputc("ACGTN"[(int)*(rc+k)], str);		\
+		}				\
+		ksprintf(str, "\t");			\
+		for(k=beg; k<end; ++k){		\
+			kputc(alnSeq->rcqual[k], str);		\
+		}				\
+		ksprintf(str, "\t");			\
+	} else {		\
+		for(k=beg; k<end; ++k){		\
+			kputc("ACGTN"[(int)*(seq+k)], str);		\
+		}				\
+		ksprintf(str, "\t");			\
+		for(k=beg; k<end; ++k){		\
+			kputc(alnSeq->qual[k], str);		\
+		}				\
+		ksprintf(str, "\t");			\
+	}			\
+	/*			\
+	fprintf(stderr, "%s\n", alnSeq->qual);			\
+	fprintf(stderr, "%d\n", alnSeq->nhits);			\
+	fprintf(stderr, "%c\n", "ab"[file]); fprintf(stderr, "%d\n", alnSeq->len);fprintf(stderr,"%c\n", "+-"[strain]);fprintf(stderr, "%s\n", chrName[(alnSeq->itemList+j)->chr]);fprintf(stderr, "%u\n", alnSeq->itemList->pos);			\
+	*/		\
+	ksprintf(str, "%d\t%c\t%d\t%c\t%s\t%d\t", alnSeq->nhits,"ab"[file], end-beg, "+-"[strain], chrName[hit->chr], hit->pos);			\
+	if(!n_seedMM)ksprintf(str, "0\t");	\
+	else if (n_seedMM == 1) 			\
+		ksprintf(str, "1\t%c->%d%c%d\t", "ACGT"[(info_seedMM>>8)&3], info_seedMM&0xff, strain?"ACGT"[(int)rc[info_seedMM&0xff]]:"ACGT"[(int)seq[info_seedMM&0xff]], (strain?alnSeq->qual[len-(info_seedMM&0xff)]:alnSeq->qual[info_seedMM&0xff])-'@');		\
+	else if (n_seedMM == 2) 		\
+		ksprintf(str, "2\t%c->%d%c%d\t%c->%d%c%d\t", 	\
+				"ACGT"[(info_seedMM>>8)&3], info_seedMM&0xff, strain?"ACGT"[(int)rc[info_seedMM&0xff]]:"ACGT"[(int)seq[info_seedMM&0xff]], (strain?alnSeq->qual[len-(info_seedMM&0xff)-1]:alnSeq->qual[info_seedMM&0xff])-'@',				\
+				"ACGT"[(info_seedMM>>20)&3], (info_seedMM>>12)&0xff, strain?"ACGT"[(int)rc[(info_seedMM>>12)&0xff]]:"ACGT"[(int)seq[(info_seedMM>>12)&0xff]], (strain?alnSeq->qual[len-1-((info_seedMM>>12)&0xff)]:alnSeq->qual[(info_seedMM>>12)&0xff])-'@');		\
+	else if (n_seedMM == 3) {		\
+		ksprintf(str, "%d\t%d\t", (100+1+hit->n_gape), (info_seedMM>>12)&0xff);		\
+	}		\
+	else if (n_seedMM == 4) {		\
+		ksprintf(str, "%d\t%d\t", (200+1+hit->n_gape), (info_seedMM>>12)&0xff);		\
+	}		\
+	if ((alnSeq->itemList+j)->n_cigar){		\
+		for (k=0; k<hit->n_cigar;k++)			\
+			ksprintf(str, "%d%c", hit->cigar[k]&0x3ff, "MIDS"[(hit->cigar[k]>>14)]);		\
+	}else ksprintf(str, "%dM\t", alnSeq->len);		\
+	ksprintf(str, "\t%s\n", hit->md);			\
+}
+
+#if 0
+#define BINARY_SOAP() {			\
+	fwrite(&(alnSeq->itemList+j)->id, sizeof(unsigned int), 1, ofp);				\
+	fwrite(&(alnSeq->itemList+j)->chr, sizeof(unsigned int), 1, ofp);				\
+	fwrite(&(alnSeq->itemList+j)->pos, sizeof(unsigned int), 1, ofp);				\
+	fwrite(&(alnSeq->itemList+j)->len, sizeof(unsigned int), 1, ofp);			\
+	int k;				\
+	if (strain) {			\
+		for(k=0; k<(alnSeq->itemList+j)->len; ++k)		\
+			fwrite();						\
+	} else {		\
+		for(k=0; k<(alnSeq->itemList+j)->len; ++k)		\
+			fwrite();						\
+	}				\
+}
+#endif
+
+#include <assert.h>
+
+void DumpAln(MULTISEQ *mseqs, OUTAUX *o, OutFileList *ofp,unsigned int *nAln, unsigned int *nSE){
+	int n, i;
+	n = mseqs->n;
+	unsigned int n_aln = *nAln;
+	unsigned int n_se  = *nSE;
+	char **chrName = o->chrName ;
+	ALNSEQ *alnSeq;
+	HITITEM *hit;
+	FILE *ofpAln, *ofpSe, *ofpUn;
+	ofpAln = ofp->ofpAln;
+	ofpSe  = ofp->ofpSe;
+	ofpUn  = ofp->ofpUn;
+	kstring_t *str = (kstring_t *)calloc(1, sizeof(kstring_t));
+	for (i=0; i<n; ++i) {
+		alnSeq = mseqs->seqList + i;
+		int j = 0;
+		if (alnSeq->report) {
+			unsigned int strain, n_seedMM, n_mm, info_seedMM, len, flag;
+			char *seq = alnSeq->seq;
+			char *rc = alnSeq->rc;
+			flag = alnSeq->flag;
+			len = alnSeq->len;
+			if((alnSeq->flag>>1&0x1) || !(alnSeq->flag&0x1)) {
+				hit = alnSeq->itemList;
+				for(j=0; j<alnSeq->report; ++j) {
+					str->l = 0;
+					strain = hit->strain;
+					n_seedMM = hit->info >> 25 & 0x7;
+					info_seedMM = hit->info & 0xffffff;
+					n_mm = hit->n_mm;
+//					int file =(alnSeq->flag)&1?(i&1):0;
+					SOAPOUT((alnSeq->flag&1)?(i&1):0);
+					fprintf(ofpAln, "%s", str->s);
+					++hit;
+				}
+				++n_aln;
+			} else {
+				hit = alnSeq->itemList;
+				for(j=0; j<alnSeq->report; ++j) {
+					str->l = 0;
+					strain = hit->strain;
+					n_seedMM = hit->info >> 25 & 0x7;
+					info_seedMM = hit->info & 0xffffff;
+					n_mm = hit->n_mm;
+					SOAPOUT((alnSeq->flag&1)?(i&1):0);
+					fprintf(ofpSe, "%s", str->s);
+					++hit;
+				}
+				++n_se;
+			}
+		} else if (o->un) {
+			fprintf(ofpUn, ">%s\n", alnSeq->name);
+			int j=0;
+			for(;j<alnSeq->len;j++)
+				fprintf(ofpUn, "%c", "ACGT"[(int)*(alnSeq->seq+j)]);
+			fprintf(ofpUn, "\n");
+		}
+	}
+	*nAln = n_aln;
+	*nSE  = n_se;
+	free(str->s);
+	free(str);
+}
diff --git a/soapio.h b/soapio.h
new file mode 100644
index 0000000..f13ca6b
--- /dev/null
+++ b/soapio.h
@@ -0,0 +1,68 @@
+/*
+ * =============================================================================
+ *
+ *       Filename:  soapio.h
+ *
+ *    Description:  
+ *
+ *       Revision:  none
+ *       Compiler:  gcc 4.3.2 or above
+ *
+ *         Author:  Chang Yu (yc), yuchang at genomics.org.cn
+ *        Company:  BGI Shenzhen
+ *      CopyRight:  Copyright (c) 2009, BGI Shenzhen
+ *
+ * =============================================================================
+ */
+#ifndef  _SOAPIO_H_
+#define  _SOAPIO_H_
+#include <stdio.h>
+#include <stdlib.h>
+#include "extratools.h"
+#include "SeqIO.h"
+#define MAX_MULTI_READS 0x20000
+
+typedef struct _ALNSEQ_TYPE_{
+	int tid;
+	int id, len, ns;
+	char *name, *seq, *rc, *qual, *rcqual;
+	unsigned int flag;
+	int nhits;
+	struct {
+		int H0;
+		int H1;
+		int H2;
+	}top;
+	int report;
+	HITITEM *itemList;
+}ALNSEQ;
+
+typedef struct _MULTISEQ_TYPE_{
+	int n, max;
+	ALNSEQ *seqList;
+}MULTISEQ;
+
+typedef struct _OUTAUX_TYPE_{
+	int id, un, chrNum;
+	char **chrName;
+	int allErr;
+}OUTAUX;
+
+typedef struct _INFILELIST_{
+	FILE *ifpA, *ifpB;
+        int id;
+	int lock;
+}InFileList;
+
+typedef struct _OUTFILELIST_{
+	FILE *ofpAln, *ofpSe, *ofpUn;
+        int id;
+	int lock;
+}OutFileList;
+
+void FreeMultiSeq(MULTISEQ *);
+int GenMultiReads(const HSP *, MULTISEQ *, const int , const int , unsigned int *, int *);
+int GetMultiSeq (InFileList *, MULTISEQ *, const int , int (*)(FILE *, seq_t *, const int));
+void DumpAln(MULTISEQ *, OUTAUX *, OutFileList *,unsigned int *, unsigned int *);
+
+#endif   /* ----- #ifndef SOAPIO_INC  ----- */
diff --git a/stdaln.c b/stdaln.c
new file mode 100644
index 0000000..c1bf9a5
--- /dev/null
+++ b/stdaln.c
@@ -0,0 +1,856 @@
+/* The MIT License
+
+   Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3 at gmail.com>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "stdaln.h"
+
+/* char -> 17 (=16+1) nucleotides */
+unsigned char aln_nt16_table[256] = {
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,16 /*'-'*/,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
+	15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
+	15, 1,14, 4, 11,15,15, 2, 13,15,15,10, 15, 5,15,15,
+	15,15, 3, 6,  8,15, 7, 9,  0,12,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
+};
+char *aln_nt16_rev_table = "XAGRCMSVTWKDYHBN-";
+
+/* char -> 5 (=4+1) nucleotides */
+unsigned char aln_nt4_table[256] = {
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 5 /*'-'*/, 4, 4,
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 2,  4, 4, 4, 1,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 0, 4, 2,  4, 4, 4, 1,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  3, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
+	4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+char *aln_nt4_rev_table = "AGCTN-";
+
+/* char -> 22 (=20+1+1) amino acids */
+unsigned char aln_aa_table[256] = {
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,20,21, 21,22 /*'-'*/,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21, 0,21, 4,  3, 6,13, 7,  8, 9,21,11, 10,12, 2,21,
+	14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
+	21, 0,21, 4,  3, 6,13, 7,  8, 9,21,11, 10,12, 2,21,
+	14, 5, 1,15, 16,21,19,17, 21,18,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21,
+	21,21,21,21, 21,21,21,21, 21,21,21,21, 21,21,21,21
+};
+char *aln_aa_rev_table = "ARNDCQEGHILKMFPSTWYV*X-";
+                       /* 01234567890123456789012 */
+
+/* translation table. They are useless in stdaln.c, but when you realize you need it, you need not write the table again. */
+unsigned char aln_trans_table_eu[66] = {
+	11,11, 2, 2,  1, 1,15,15, 16,16,16,16,  9,12, 9, 9,
+	 6, 6, 3, 3,  7, 7, 7, 7,  0, 0, 0, 0, 19,19,19,19,
+	 5, 5, 8, 8,  1, 1, 1, 1, 14,14,14,14, 10,10,10,10,
+	20,20,18,18, 20,17, 4, 4, 15,15,15,15, 10,10,13,13, 21, 22
+};
+char *aln_trans_table_eu_char = "KKNNRRSSTTTTIMIIEEDDGGGGAAAAVVVVQQHHRRRRPPPPLLLL**YY*WCCSSSSLLFFX";
+                              /* 01234567890123456789012345678901234567890123456789012345678901234 */
+int aln_sm_blosum62[] = {
+/*	 A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *  X */
+	 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0,
+	-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1,
+	-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1,
+	-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1,
+	 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2,
+	-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1,
+	-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1,
+	 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1,
+	-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1,
+	-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1,
+	-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1,
+	-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1,
+	-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1,
+	-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1,
+	-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2,
+	 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0,
+	 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0,
+	-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2,
+	-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1,
+	 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1,
+	-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4,
+	 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1
+};
+
+int aln_sm_blosum45[] = {
+/*	 A  R  N  D  C  Q  E  G  H  I  L  K  M  F  P  S  T  W  Y  V  *  X */
+	 5,-2,-1,-2,-1,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-2,-2, 0,-5, 0,
+	-2, 7, 0,-1,-3, 1, 0,-2, 0,-3,-2, 3,-1,-2,-2,-1,-1,-2,-1,-2,-5,-1,
+	-1, 0, 6, 2,-2, 0, 0, 0, 1,-2,-3, 0,-2,-2,-2, 1, 0,-4,-2,-3,-5,-1,
+	-2,-1, 2, 7,-3, 0, 2,-1, 0,-4,-3, 0,-3,-4,-1, 0,-1,-4,-2,-3,-5,-1,
+	-1,-3,-2,-3,12,-3,-3,-3,-3,-3,-2,-3,-2,-2,-4,-1,-1,-5,-3,-1,-5,-2,
+	-1, 1, 0, 0,-3, 6, 2,-2, 1,-2,-2, 1, 0,-4,-1, 0,-1,-2,-1,-3,-5,-1,
+	-1, 0, 0, 2,-3, 2, 6,-2, 0,-3,-2, 1,-2,-3, 0, 0,-1,-3,-2,-3,-5,-1,
+	 0,-2, 0,-1,-3,-2,-2, 7,-2,-4,-3,-2,-2,-3,-2, 0,-2,-2,-3,-3,-5,-1,
+	-2, 0, 1, 0,-3, 1, 0,-2,10,-3,-2,-1, 0,-2,-2,-1,-2,-3, 2,-3,-5,-1,
+	-1,-3,-2,-4,-3,-2,-3,-4,-3, 5, 2,-3, 2, 0,-2,-2,-1,-2, 0, 3,-5,-1,
+	-1,-2,-3,-3,-2,-2,-2,-3,-2, 2, 5,-3, 2, 1,-3,-3,-1,-2, 0, 1,-5,-1,
+	-1, 3, 0, 0,-3, 1, 1,-2,-1,-3,-3, 5,-1,-3,-1,-1,-1,-2,-1,-2,-5,-1,
+	-1,-1,-2,-3,-2, 0,-2,-2, 0, 2, 2,-1, 6, 0,-2,-2,-1,-2, 0, 1,-5,-1,
+	-2,-2,-2,-4,-2,-4,-3,-3,-2, 0, 1,-3, 0, 8,-3,-2,-1, 1, 3, 0,-5,-1,
+	-1,-2,-2,-1,-4,-1, 0,-2,-2,-2,-3,-1,-2,-3, 9,-1,-1,-3,-3,-3,-5,-1,
+	 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-3,-1,-2,-2,-1, 4, 2,-4,-2,-1,-5, 0,
+	 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-1,-1, 2, 5,-3,-1, 0,-5, 0,
+	-2,-2,-4,-4,-5,-2,-3,-2,-3,-2,-2,-2,-2, 1,-3,-4,-3,15, 3,-3,-5,-2,
+	-2,-1,-2,-2,-3,-1,-2,-3, 2, 0, 0,-1, 0, 3,-3,-2,-1, 3, 8,-1,-5,-1,
+	 0,-2,-3,-3,-1,-3,-3,-3,-3, 3, 1,-2, 1, 0,-3,-1, 0,-3,-1, 5,-5,-1,
+	-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 1,-5,
+	 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 0,-2,-1,-1,-5,-1
+};
+
+int aln_sm_nt[] = {
+/*	 X  A  G  R  C  M  S  V  T  W  K  D  Y  H  B  N */
+	-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+	-2, 2,-1, 1,-2, 1,-2, 0,-2, 1,-2, 0,-2, 0,-2, 0,
+	-2,-1, 2, 1,-2,-2, 1, 0,-2,-2, 1, 0,-2,-2, 0, 0,
+	-2, 1, 1, 1,-2,-1,-1, 0,-2,-1,-1, 0,-2, 0, 0, 0,
+	-2,-2,-2,-2, 2, 1, 1, 0,-1,-2,-2,-2, 1, 0, 0, 0,
+	-2, 1,-2,-1, 1, 1,-1, 0,-2,-1,-2, 0,-1, 0, 0, 0,
+	-2,-2, 1,-1, 1,-1, 1, 0,-2,-2,-1, 0,-1, 0, 0, 0,
+	-2, 0, 0, 0, 0, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2,-2,-2,-1,-2,-2,-2, 2, 1, 1, 0, 1, 0, 0, 0,
+	-2, 1,-2,-1,-2,-1,-2, 0, 1, 1,-1, 0,-1, 0, 0, 0,
+	-2,-2, 1,-1,-2,-2,-1, 0, 1,-1, 1, 0,-1, 0, 0, 0,
+	-2, 0, 0, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2,-2,-2, 1,-1,-1, 0, 1,-1,-1, 0, 1, 0, 0, 0,
+	-2, 0,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2,-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	-2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+int aln_sm_read[] = {
+/*	  X   A   G   R   C   M   S   V   T   W   K   D   Y   H   B   N  */
+	-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,-17,
+	-17,  2,-17,  1,-17,  1,-17,  0,-17,  1,-17,  0,-17,  0,-17,  0,
+	-17,-17,  2,  1,-17,-17,  1,  0,-17,-17,  1,  0,-17,-17,  0,  0,
+	-17,  1,  1,  1,-17,-17,-17,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,-17,-17,-17,  2,  1,  1,  0,-17,-17,-17,-17,  1,  0,  0,  0,
+	-17,  1,-17,-17,  1,  1,-17,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,-17,  1,-17,  1,-17,  1,  0,-17,-17,-17,  0,-17,  0,  0,  0,
+	-17,  0,  0,  0,  0,  0,  0,  0,-17,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,-17,-17,-17,-17,-17,-17,  2,  1,  1,  0,  1,  0,  0,  0,
+	-17,  1,-17,-17,-17,-17,-17,  0,  1,  1,-17,  0,-17,  0,  0,  0,
+	-17,-17,  1,-17,-17,-17,-17,  0,  1,-17,  1,  0,-17,  0,  0,  0,
+	-17,  0,  0,  0,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,-17,-17,  1,-17,-17,  0,  1,-17,-17,  0,  1,  0,  0,  0,
+	-17,  0,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+	-17,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
+};
+
+int aln_sm_hs[] = {
+/*     A    G    C    T    N */
+	  91, -31,-114,-123, -44,
+	 -31, 100,-125,-114, -42,
+	-123,-125, 100, -31, -42,
+	-114,-114, -31,  91, -42,
+	 -44, -42, -42, -42, -43
+};
+
+int aln_sm_maq[] = {
+	11, -19, -19, -19, -13,
+	-19, 11, -19, -19, -13,
+	-19, -19, 11, -19, -13,
+	-19, -19, -19, 11, -13,
+	-13, -13, -13, -13, -13
+	//*/
+};
+
+int aln_sm_blast[] = {
+	1, -3, -3, -3, -2,
+	-3, 1, -3, -3, -2,
+	-3, -3, 1, -3, -2,
+	-3, -3, -3, 1, -2,
+	-2, -2, -2, -2, -2
+};
+
+/********************/
+/* START OF align.c */
+/********************/
+
+AlnParam aln_param_blast   = {  5,  2,  5, aln_sm_blast, 5, 50 };
+AlnParam aln_param_bwa     = { 30,  5,  0, aln_sm_maq, 5, 50 };
+AlnParam aln_param_nt2nt   = {  8,  2,  2, aln_sm_nt, 16, 75 };
+AlnParam aln_param_rd2rd   = {  1, 19, 19, aln_sm_read, 16, 75 };
+AlnParam aln_param_aa2aa   = { 10,  2,  2, aln_sm_blosum62, 22, 50 };
+
+AlnAln *aln_init_AlnAln()
+{
+	AlnAln *aa;
+	aa = (AlnAln*)MYALLOC(sizeof(AlnAln));
+	aa->path = 0;
+	aa->out1 = aa->out2 = aa->outm = 0;
+	aa->path_len = 0;
+	return aa;
+}
+void aln_free_AlnAln(AlnAln *aa)
+{
+	MYFREE(aa->path); MYFREE(aa->cigar);
+	MYFREE(aa->out1); MYFREE(aa->out2); MYFREE(aa->outm);
+	MYFREE(aa);
+}
+
+/***************************/
+/* START OF common_align.c */
+/***************************/
+
+#define LOCAL_OVERFLOW_THRESHOLD 32000
+#define LOCAL_OVERFLOW_REDUCE 16000
+#define NT_LOCAL_SCORE int
+#define NT_LOCAL_SHIFT 16
+#define NT_LOCAL_MASK 0xffff
+
+#define SET_INF(s) (s).M = (s).I = (s).D = MINOR_INF;
+
+#define set_M(MM, cur, p, sc)							\
+{														\
+	if ((p)->M >= (p)->I) {								\
+		if ((p)->M >= (p)->D) {							\
+			(MM) = (p)->M + (sc); (cur)->Mt = FROM_M;	\
+		} else {										\
+			(MM) = (p)->D + (sc); (cur)->Mt = FROM_D;	\
+		}												\
+	} else {											\
+		if ((p)->I > (p)->D) {							\
+			(MM) = (p)->I + (sc); (cur)->Mt = FROM_I;	\
+		} else {										\
+			(MM) = (p)->D + (sc); (cur)->Mt = FROM_D;	\
+		}												\
+	}													\
+}
+#define set_I(II, cur, p)								\
+{														\
+	if ((p)->M - gap_open > (p)->I) {					\
+		(cur)->It = FROM_M;								\
+		(II) = (p)->M - gap_open - gap_ext;				\
+	} else {											\
+		(cur)->It = FROM_I;								\
+		(II) = (p)->I - gap_ext;						\
+	}													\
+}
+#define set_end_I(II, cur, p)							\
+{														\
+	if (gap_end >= 0) {									\
+		if ((p)->M - gap_open > (p)->I) {				\
+			(cur)->It = FROM_M;							\
+			(II) = (p)->M - gap_open - gap_end;			\
+		} else {										\
+			(cur)->It = FROM_I;							\
+			(II) = (p)->I - gap_end;					\
+		}												\
+	} else set_I(II, cur, p);							\
+}
+#define set_D(DD, cur, p)								\
+{														\
+	if ((p)->M - gap_open > (p)->D) {					\
+		(cur)->Dt = FROM_M;								\
+		(DD) = (p)->M - gap_open - gap_ext;				\
+	} else {											\
+		(cur)->Dt = FROM_D;								\
+		(DD) = (p)->D - gap_ext;						\
+	}													\
+}
+#define set_end_D(DD, cur, p)							\
+{														\
+	if (gap_end >= 0) {									\
+		if ((p)->M - gap_open > (p)->D) {				\
+			(cur)->Dt = FROM_M;							\
+			(DD) = (p)->M - gap_open - gap_end;			\
+		} else {										\
+			(cur)->Dt = FROM_D;							\
+			(DD) = (p)->D - gap_end;					\
+		}												\
+	} else set_D(DD, cur, p);							\
+}
+
+typedef struct
+{
+	unsigned char Mt:3, It:2, Dt:2;
+} dpcell_t;
+
+typedef struct
+{
+	int M, I, D;
+} dpscore_t;
+
+/* build score profile for accelerating alignment, in theory */
+void aln_init_score_array(unsigned char *seq, int len, int row, int *score_matrix, int **s_array)
+{
+	int *tmp, *tmp2, i, k;
+	i = k = 0;
+	for (i = 0; i != row; ++i) {
+		tmp = score_matrix + i * row;
+		tmp2 = s_array[i];
+		for (k = 0; k != len; ++k)
+			tmp2[k] = tmp[seq[k]];
+	}
+}
+/***************************
+ * banded global alignment *
+ ***************************/
+int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+		path_t *path, int *path_len)
+{
+	register int i, j;
+	i = j = 0;
+	dpcell_t **dpcell, *q;
+	dpscore_t *curr, *last, *s;
+	path_t *p;
+	int b1, b2, tmp_end;
+	int *mat, end, max;
+	unsigned char type, ctype;
+
+	int gap_open, gap_ext, gap_end, b;
+	int *score_matrix, N_MATRIX_ROW;
+
+	/* initialize some align-related parameters. just for compatibility */
+	gap_open = ap->gap_open;
+	gap_ext = ap->gap_ext;
+	gap_end = ap->gap_end;
+	b = ap->band_width;
+	score_matrix = ap->matrix;
+	N_MATRIX_ROW = ap->row;
+	
+	if (len1 == 0 || len2 == 0) {
+		*path_len = 0;
+		return 0;
+	}
+	/* calculate b1 and b2 */
+	if (len1 > len2) {
+		b1 = len1 - len2 + b;
+		b2 = b;
+	} else {
+		b1 = b;
+		b2 = len2 - len1 + b;
+	}
+	if (b1 > len1) b1 = len1;
+	if (b2 > len2) b2 = len2;
+	--seq1; --seq2;
+
+	/* allocate memory */
+	end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1);
+	dpcell = (dpcell_t**)MYALLOC(sizeof(dpcell_t*) * (len2 + 1));
+	for (j = 0; j <= len2; ++j)
+		dpcell[j] = (dpcell_t*)MYALLOC(sizeof(dpcell_t) * end);
+	for (j = b2 + 1; j <= len2; ++j)
+		dpcell[j] -= j - b2;
+	curr = (dpscore_t*)MYALLOC(sizeof(dpscore_t) * (len1 + 1));
+	last = (dpscore_t*)MYALLOC(sizeof(dpscore_t) * (len1 + 1));
+
+	/* set first row */
+	SET_INF(*curr); curr->M = 0;
+	for (i = 1, s = curr + 1; i < b1; ++i, ++s) {
+		SET_INF(*s);
+		set_end_D(s->D, dpcell[0] + i, s - 1);
+	}
+	s = curr; curr = last; last = s;
+
+	/* core dynamic programming, part 1 */
+	tmp_end = (b2 < len2)? b2 : len2 - 1;
+	for (j = 1; j <= tmp_end; ++j) {
+		q = dpcell[j]; s = curr; SET_INF(*s);
+		set_end_I(s->I, q, last);
+		end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		++s; ++q;
+		for (i = 1; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_D(s->D, q, s - 1);
+		if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
+			set_end_I(s->I, q, last + i);
+		} else s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+	}
+	/* last row for part 1, use set_end_D() instead of set_D() */
+	if (j == len2 && b2 != len2 - 1) {
+		q = dpcell[j]; s = curr; SET_INF(*s);
+		set_end_I(s->I, q, last);
+		end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		++s; ++q;
+		for (i = 1; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
+			set_I(s->I, q, last + i);
+			set_end_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_end_D(s->D, q, s - 1);
+		if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
+			set_end_I(s->I, q, last + i);
+		} else s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+		++j;
+	}
+
+	/* core dynamic programming, part 2 */
+	for (; j <= len2 - b2 + 1; ++j) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		end = j + b1 - 1;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+		set_D(s->D, q, s - 1);
+		s->I = MINOR_INF;
+		s = curr; curr = last; last = s;
+	}
+
+	/* core dynamic programming, part 3 */
+	for (; j < len2; ++j) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
+		set_end_I(s->I, q, last + i);
+		set_D(s->D, q, s - 1);
+		s = curr; curr = last; last = s;
+	}
+	/* last row */
+	if (j == len2) {
+		SET_INF(curr[j - b2]);
+		mat = score_matrix + seq2[j] * N_MATRIX_ROW;
+		for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
+			set_M(s->M, q, last + i - 1, mat[seq1[i]]);
+			set_I(s->I, q, last + i);
+			set_end_D(s->D, q, s - 1);
+		}
+		set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
+		set_end_I(s->I, q, last + i);
+		set_end_D(s->D, q, s - 1);
+		s = curr; curr = last; last = s;
+	}
+
+	/* backtrace */
+	i = len1; j = len2;
+	q = dpcell[j] + i;
+	s = last + len1;
+	max = s->M; type = q->Mt; ctype = FROM_M;
+	if (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; }
+	if (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; }
+
+	p = path;
+	p->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */
+	++p;
+	do {
+		switch (ctype) {
+			case FROM_M: --i; --j; break;
+			case FROM_I: --j; break;
+			case FROM_D: --i; break;
+		}
+		q = dpcell[j] + i;
+		ctype = type;
+		switch (type) {
+			case FROM_M: type = q->Mt; break;
+			case FROM_I: type = q->It; break;
+			case FROM_D: type = q->Dt; break;
+		}
+		p->ctype = ctype; p->i = i; p->j = j;
+		++p;
+	} while (i || j);
+	*path_len = p - path - 1;
+
+	/* free memory */
+	for (j = b2 + 1; j <= len2; ++j)
+		dpcell[j] += j - b2;
+	for (j = 0; j <= len2; ++j)
+		MYFREE(dpcell[j]);
+	MYFREE(dpcell);
+	MYFREE(curr); MYFREE(last);
+	
+	return max;
+}
+/*************************************************
+ * local alignment combined with banded strategy *
+ *************************************************/
+int aln_local_core(unsigned char *seq1,const int len1, unsigned char *seq2,const int len2, const AlnParam *ap,
+				   path_t *path, int *path_len, int do_align)
+{
+	register NT_LOCAL_SCORE *s;
+	register int i;
+	int q, r, qr, tmp_len, qr_shift;
+	int **s_array, *score_array;
+	int e, f;
+	int is_overflow, of_base;
+	NT_LOCAL_SCORE *eh, curr_h, last_h, curr_last_h;
+	int j, start_i, start_j, end_i, end_j;
+	path_t *p;
+	int score_f, score_r, score_g;
+	int start, end, max_score;
+
+	int gap_open, gap_ext, b;
+	int *score_matrix, N_MATRIX_ROW;
+
+	/* initialize some align-related parameters. just for compatibility */
+	gap_open = ap->gap_open;
+	gap_ext = ap->gap_ext;
+	b = ap->band_width;
+	score_matrix = ap->matrix;
+	N_MATRIX_ROW = ap->row;
+
+	if (len1 == 0 || len2 == 0) return -1;
+
+	/* allocate memory */
+	eh = (NT_LOCAL_SCORE*)MYALLOC(sizeof(NT_LOCAL_SCORE) * (len1 + 1));
+	s_array = (int**)MYALLOC(sizeof(int*) * N_MATRIX_ROW);
+	for (i = 0; i != N_MATRIX_ROW; ++i)
+		s_array[i] = (int*)MYALLOC(sizeof(int) * len1);
+	/* initialization */
+	aln_init_score_array(seq1, len1, N_MATRIX_ROW, score_matrix, s_array);
+	q = gap_open;
+	r = gap_ext;
+	qr = q + r;
+	qr_shift = (qr+1) << NT_LOCAL_SHIFT;
+	tmp_len = len1 + 1;
+	start_i = start_j = end_i = end_j = 0;
+	for (i = 0, max_score = 0; i != N_MATRIX_ROW * N_MATRIX_ROW; ++i)
+		if (max_score < score_matrix[i]) max_score = score_matrix[i];
+	/* convert the coordinate */
+	--seq1; --seq2;
+	for (i = 0; i != N_MATRIX_ROW; ++i) --s_array[i];
+
+	/* forward dynamic programming */
+	for (i = 0, s = eh; i != tmp_len; ++i, ++s) *s = 0;
+	score_f = 0;
+	is_overflow = of_base = 0;
+	for (j = 1; j <= len2; ++j) {
+		last_h = f = 0;
+		score_array = s_array[seq2[j]];
+		if (is_overflow) { /* adjust eh[] array if overflow occurs. */
+			/* If LOCAL_OVERFLOW_REDUCE is too small, optimal alignment might be missed.
+			 * If it is too large, this block will be excuted frequently and therefore
+			 * slow down the whole program.
+			 * Acually, smaller LOCAL_OVERFLOW_REDUCE might also help to reduce the
+			 * number of assignments because it sets some cells to zero when overflow
+			 * happens. */
+			int tmp, tmp2;
+			score_f -= LOCAL_OVERFLOW_REDUCE;
+			of_base += LOCAL_OVERFLOW_REDUCE;
+			is_overflow = 0;
+			for (i = 1, s = eh; i <= tmp_len; ++i, ++s) {
+				tmp = *s >> NT_LOCAL_SHIFT; tmp2 = *s & NT_LOCAL_MASK;
+				if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
+				else tmp2 -= LOCAL_OVERFLOW_REDUCE;
+				if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
+				else tmp -= LOCAL_OVERFLOW_REDUCE;
+				*s = (tmp << NT_LOCAL_SHIFT) | tmp2;
+			}
+		}
+		for (i = 1, s = eh; i != tmp_len; ++i, ++s) {
+			/* prepare for calculate current h */
+			curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];
+			if (curr_h < 0) curr_h = 0;
+			if (last_h > qr) { /* initialize f */
+				f = (f > last_h - q)? f - r : last_h - qr;
+				if (curr_h < f) curr_h = f;
+			}
+			if (*(s+1) >= qr_shift) { /* initialize e */
+				curr_last_h = *(s+1) >> NT_LOCAL_SHIFT;
+				e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;
+				if (curr_h < e) curr_h = e;
+				*s = (last_h << NT_LOCAL_SHIFT) | e;
+			} else *s = last_h << NT_LOCAL_SHIFT; /* e = 0 */
+			last_h = curr_h;
+			if (score_f < curr_h) {
+				score_f = curr_h; end_i = i; end_j = j;
+				if (score_f > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
+			}
+		}
+		*s = last_h << NT_LOCAL_SHIFT;
+	}
+	score_f += of_base;
+
+	if (path == 0) goto end_func; /* skip path-filling */
+
+	/* reverse dynamic programming */
+	for (i = end_i, s = eh + end_i; i >= 0; --i, --s) *s = 0;
+	if (end_i == 0 || end_j == 0) goto end_func; /* no local match */
+	score_r = score_matrix[seq1[end_i] * N_MATRIX_ROW + seq2[end_j]];
+	is_overflow = of_base = 0;
+	start_i = end_i; start_j = end_j;
+	eh[end_i] = ((NT_LOCAL_SCORE)(qr + score_r)) << NT_LOCAL_SHIFT; /* in order to initialize f and e, 040408 */
+	start = end_i - 1;
+	end = end_i - 3;
+	if (end <= 0) end = 0;
+
+	/* second pass DP can be done in a band, speed will thus be enhanced */
+	for (j = end_j - 1; j != 0; --j) {
+		last_h = f = 0;
+		score_array = s_array[seq2[j]];
+		if (is_overflow) { /* adjust eh[] array if overflow occurs. */
+			int tmp, tmp2;
+			score_r -= LOCAL_OVERFLOW_REDUCE;
+			of_base += LOCAL_OVERFLOW_REDUCE;
+			is_overflow = 0;
+			for (i = start, s = eh + start + 1; i >= end; --i, --s) {
+				tmp = *s >> NT_LOCAL_SHIFT; tmp2 = *s & NT_LOCAL_MASK;
+				if (tmp2 < LOCAL_OVERFLOW_REDUCE) tmp2 = 0;
+				else tmp2 -= LOCAL_OVERFLOW_REDUCE;
+				if (tmp < LOCAL_OVERFLOW_REDUCE) tmp = 0;
+				else tmp -= LOCAL_OVERFLOW_REDUCE;
+				*s = (tmp << NT_LOCAL_SHIFT) | tmp2;
+			}
+		}
+		for (i = start, s = eh + start + 1; i != end; --i, --s) {
+			/* prepare for calculate current h */
+			curr_h = (*s >> NT_LOCAL_SHIFT) + score_array[i];
+			if (curr_h < 0) curr_h = 0;
+			if (last_h > qr) { /* initialize f */
+				f = (f > last_h - q)? f - r : last_h - qr;
+				if (curr_h < f) curr_h = f;
+			}
+			if (*(s-1) >= qr_shift) { /* initialize e */
+				curr_last_h = *(s-1) >> NT_LOCAL_SHIFT;
+				e = ((*s & NT_LOCAL_MASK) > curr_last_h - q)? (*s & NT_LOCAL_MASK) - r : curr_last_h - qr;
+				if (curr_h < e) curr_h = e;
+				*s = (last_h << NT_LOCAL_SHIFT) | e;
+			} else *s = last_h << NT_LOCAL_SHIFT; /* e = 0 */
+			last_h = curr_h;
+			if (score_r < curr_h) {
+				score_r = curr_h; start_i = i; start_j = j;
+				if (score_r + of_base - qr == score_f) {
+					j = 1; break;
+				}
+				if (score_r > LOCAL_OVERFLOW_THRESHOLD) is_overflow = 1;
+			}
+		}
+		*s = last_h << NT_LOCAL_SHIFT;
+		/* recalculate start and end, the boundaries of the band */
+		if ((eh[start] >> NT_LOCAL_SHIFT) <= qr) --start;
+		if (start <= 0) start = 0;
+		end = start_i - (start_j - j) - (score_r + of_base + (start_j - j) * max_score) / r - 1;
+		if (end <= 0) end = 0;
+	}
+
+	if (path_len == 0) {
+		path[0].i = start_i; path[0].j = start_j;
+		path[1].i = end_i; path[1].j = end_j;
+		goto end_func;
+	}
+
+	score_r += of_base;
+	score_r -= qr;
+
+#ifdef DEBUG
+	/* this seems not a bug */
+	if (score_f != score_r)
+		fprintf(stderr, "[aln_local_core] unknown flaw occurs: score_f(%d) != score_r(%d)\n", score_f, score_r);
+#endif
+
+	if (do_align) { /* call global alignment to fill the path */
+		score_g = 0;
+		j = (end_i - start_i > end_j - start_j)? end_i - start_i : end_j - start_j;
+		++j; /* j is the maximum band_width */
+		for (i = ap->band_width;; i <<= 1) {
+			AlnParam ap_real = *ap;
+			ap_real.gap_end = -1;
+			ap_real.band_width = i;
+			score_g = aln_global_core(seq1 + start_i, end_i - start_i + 1, seq2 + start_j,
+									  end_j - start_j + 1, &ap_real, path, path_len);
+			if (score_g == score_r || score_f == score_g) break;
+			if (i > j) break;
+		}
+#ifdef DEBUG
+		if (score_r > score_g && score_f > score_g)
+			fprintf(stderr, "[aln_local_core] Cannot find reasonable band width. Continue anyway.\n");
+#endif
+		score_f = score_g;
+
+		/* convert coordinate */
+		for (p = path + *path_len - 1; p >= path; --p) {
+			p->i += start_i - 1;
+			p->j += start_j - 1;
+		}
+	} else { /* just store the start and end */
+		*path_len = 2;
+		path[1].i = start_i; path[1].j = start_j;
+		path->i = end_i; path->j = end_j;
+	}
+
+end_func:
+	/* free */
+	MYFREE(eh);
+	for (i = 0; i != N_MATRIX_ROW; ++i) {
+		++s_array[i];
+		MYFREE(s_array[i]);
+	}
+	MYFREE(s_array);
+	return score_f;
+}
+
+AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
+					   int is_global, int do_align, int len1, int len2)
+{
+	unsigned char *seq11, *seq22;
+	int score;
+	int i, j, l;
+	path_t *p;
+	char *out1, *out2, *outm;
+	AlnAln *aa;
+
+	if (len1 < 0) len1 = strlen(seq1);
+	if (len2 < 0) len2 = strlen(seq2);
+
+	aa = aln_init_AlnAln();
+	seq11 = (unsigned char*)MYALLOC(sizeof(unsigned char) * len1);
+	seq22 = (unsigned char*)MYALLOC(sizeof(unsigned char) * len2);
+	aa->path = (path_t*)MYALLOC(sizeof(path_t) * (len1 + len2 + 1));
+
+	if (ap->row < 10) { /* 4-nucleotide alignment */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_nt4_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_nt4_table[(int)seq2[j]];
+	} else if (ap->row < 20) { /* 16-nucleotide alignment */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_nt16_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_nt16_table[(int)seq2[j]];
+	} else { /* amino acids */
+		for (i = 0; i < len1; ++i)
+			seq11[i] = aln_aa_table[(int)seq1[i]];
+		for (j = 0; j < len2; ++j)
+			seq22[j] = aln_aa_table[(int)seq2[j]];
+	}
+	
+	if (is_global) score = aln_global_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len);
+	else score = aln_local_core(seq11, len1, seq22, len2, ap, aa->path, &aa->path_len, do_align);
+	aa->score = score;
+
+	if (do_align) {
+		out1 = aa->out1 = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));
+		out2 = aa->out2 = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));
+		outm = aa->outm = (char*)MYALLOC(sizeof(char) * (aa->path_len + 1));
+
+		--seq1; --seq2;
+		--seq11; --seq22;
+
+		p = aa->path + aa->path_len - 1;
+
+		for (l = 0; p >= aa->path; --p, ++l) {
+			switch (p->ctype) {
+			case FROM_M: out1[l] = seq1[p->i]; out2[l] = seq2[p->j];
+				outm[l] = (seq11[p->i] == seq22[p->j] && seq11[p->i] != ap->row)? '|' : ' ';
+				break;
+			case FROM_I: out1[l] = '-'; out2[l] = seq2[p->j]; outm[l] = ' '; break;
+			case FROM_D: out1[l] = seq1[p->i]; out2[l] = '-'; outm[l] = ' '; break;
+			}
+		}
+		out1[l] = out2[l] = outm[l] = '\0';
+		++seq11; ++seq22;
+	}
+
+	MYFREE(seq11);
+	MYFREE(seq22);
+
+	p = aa->path + aa->path_len - 1;
+	aa->start1 = p->i? p->i : 1;
+	aa->end1 = aa->path->i;
+	aa->start2 = p->j? p->j : 1;
+	aa->end2 = aa->path->j;
+	aa->cigar = aln_path2cigar(aa->path, aa->path_len, &aa->n_cigar);
+
+	return aa;
+}
+AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int is_global, int do_align)
+{
+	return aln_stdaln_aux(seq1, seq2, ap, is_global, do_align, -1, -1);
+}
+
+cigar_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar)
+{
+	int i, n;
+	cigar_t *cigar;
+	unsigned char last_type;
+
+	if (path_len == 0 || path == 0) {
+		*n_cigar = 0;
+		return 0;
+	}
+
+	last_type = path->ctype;
+	for (i = n = 1; i < path_len; ++i) {
+		if (last_type != path[i].ctype) ++n;
+		last_type = path[i].ctype;
+	}
+	*n_cigar = n;
+	cigar = (cigar_t*)MYALLOC(*n_cigar * sizeof(cigar_t));
+
+	cigar[0] = (int)path[path_len-1].ctype << 14 | 1;
+	last_type = path[path_len-1].ctype;
+	for (i = path_len - 2, n = 0; i >= 0; --i) {
+		if (path[i].ctype == last_type) ++cigar[n];
+		else {
+			cigar[++n] = (int)path[i].ctype << 14 | 1;
+			last_type = path[i].ctype;
+		}
+	}
+
+	return cigar;
+}
+
diff --git a/stdaln.h b/stdaln.h
new file mode 100644
index 0000000..efe6084
--- /dev/null
+++ b/stdaln.h
@@ -0,0 +1,146 @@
+/* The MIT License
+
+   Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3 at gmail.com>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+  2008-08-04, 0.9.8
+
+  - Fixed the wrong declaration of aln_stdaln_aux()
+
+  - Avoid 0 coordinate for global alignment
+
+  2008-08-01, 0.9.7
+
+  - Change gap_end penalty to 5 in aln_param_bwa
+
+  - Add function to convert path_t to the CIGAR format
+
+  2008-08-01, 0.9.6
+
+  - The first gap now costs (gap_open+gap_ext), instead of
+    gap_open. Scoring systems are modified accordingly.
+
+  - Gap end is now correctly handled. Previously it is not correct.
+
+  - Change license to MIT.
+
+ */
+
+#ifndef LH3_STDALN_H_
+#define LH3_STDALN_H_
+
+
+#define STDALN_VERSION 0.9.8
+
+
+#ifndef MYALLOC
+#	define MYALLOC malloc
+#endif
+#ifndef MYFREE
+#	define MYFREE free
+#endif
+
+#define FROM_M 0
+#define FROM_I 1
+#define FROM_D 2
+
+/* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
+#define MINOR_INF -1073741823
+
+typedef unsigned short cigar_t;
+
+typedef struct
+{
+	int gap_open;
+	int gap_ext;
+	int gap_end;
+
+	int *matrix;
+	int row;
+	int band_width;
+} AlnParam;
+
+typedef struct
+{
+	int i, j;
+	unsigned char ctype;
+} path_t;
+
+typedef struct
+{
+	path_t *path; /* for advanced users... :-) */
+	int path_len; /* for advanced users... :-) */
+	int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
+	int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
+	int score; /* score */
+
+	char *out1, *out2; /* print them, and then you will know */
+	char *outm;
+
+	int n_cigar;
+	cigar_t *cigar;
+} AlnAln;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
+						   int is_global, int do_align, int len1, int len2);
+	AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int is_global, int do_align);
+	void aln_free_AlnAln(AlnAln *aa);
+
+	int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
+						path_t *path, int *path_len);
+	int aln_local_core(unsigned char *seq1,const int len1, unsigned char *seq2,const int len2, const AlnParam *ap,
+					   path_t *path, int *path_len, int do_align);
+	cigar_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
+
+#ifdef __cplusplus
+}
+#endif
+
+/********************
+ * global variables *
+ ********************/
+
+extern AlnParam aln_param_bwa ; /*= { 37,  9,  0, aln_sm_maq, 5, 50 };//*/
+extern AlnParam aln_param_blast; /* = {  5,  2,  0, aln_sm_blast, 5, 50 }; */
+extern AlnParam aln_param_nt2nt; /* = { 10,  2,  2, aln_sm_nt, 16, 75 }; */
+extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
+extern AlnParam aln_param_rd2rd; /* = { 12,  2,  2, aln_sm_blosum62, 22, 50 }; */
+
+/* common nucleotide score matrix for 16 bases */
+extern int           aln_sm_nt[];
+
+/* BLOSUM62 and BLOSUM45 */
+extern int           aln_sm_blosum62[], aln_sm_blosum45[];
+
+/* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
+extern int           aln_sm_read[];
+
+/* human-mouse score matrix for 4 bases */
+extern int           aln_sm_hs[];
+
+#endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/soapaligner.git