[med-svn] [pbalign] 01/08: Imported Upstream version 0.3.0

Sat Dec 24 02:17:06 UTC 2016

This is an automated email from the git hooks/post-receive script.

afif pushed a commit to branch master
in repository pbalign.

commit e5305172e075f437aac290706c104c5fd9ecce7a
Author: Afif Elghraoui <afif at debian.org>
Date:   Fri Dec 23 17:14:01 2016 -0800

    Imported Upstream version 0.3.0
---
 LICENSES.txt                                      |  34 +-
 Makefile                                          |  11 +-
 README.md                                         |  18 +-
 doc/howto.rst                                     |   7 +-
 pbalign/__init__.py                               |   4 +-
 pbalign/alignservice/align.py                     |   6 +-
 pbalign/alignservice/blasr.py                     | 105 ++++--
 pbalign/bampostservice.py                         | 137 +++++++
 pbalign/ccs.py                                    |  32 ++
 pbalign/filterservice.py                          |  33 +-
 pbalign/forquiverservice/__init__.py              |  34 --
 pbalign/forquiverservice/forquiver.py             |  99 -----
 pbalign/forquiverservice/loadchemistry.py         |  77 ----
 pbalign/forquiverservice/loadpulses.py            | 103 ------
 pbalign/forquiverservice/repack.py                |  87 -----
 pbalign/forquiverservice/sort.py                  |  89 -----
 pbalign/options.py                                | 416 ++++++++++++++--------
 pbalign/pbalignfiles.py                           |  41 ++-
 pbalign/pbalignrunner.py                          | 247 ++++++-------
 pbalign/tasks/__init__.py                         |   0
 pbalign/tasks/consolidate_alignments.py           |  96 +++++
 pbalign/tools/createChemistryHeader.py            |   4 +-
 pbalign/utils/fileutil.py                         |  35 +-
 pbalign/utils/progutil.py                         |   2 +-
 setup.py                                          |   6 +-
 tests/cram/bam.t                                  |  14 +
 tests/cram/ccs.t                                  |  15 +
 tests/cram/dataset_tiny.t                         |  38 ++
 tests/cram/filtercriteria_hitpolicy.t             |  78 ++++
 tests/cram/path_with_space.t                      |   9 +
 tests/cram/setup.sh                               |  21 ++
 tests/cram/test_pbalign.t                         | 276 --------------
 tests/cram/xml.t                                  |  38 ++
 tests/cram_h5/ccs.t                               |  51 +++
 tests/cram_h5/filter_adapter_only.t               |  11 +
 tests/cram_h5/inputs_outputs.t                    | 130 +++++++
 tests/cram_h5/pulsefile.t                         |  23 ++
 tests/cram_h5/setup.sh                            |  22 ++
 tests/{cram => cram_h5}/test_mask_aligned_reads.t |   2 +-
 tests/data/1.config                               |   3 +-
 tests/data/ecoli_lp.fofn                          |   4 +-
 tests/data/lambda_bax.fofn                        |   2 +-
 tests/data/reference_lambda.xml                   |  14 +
 tests/data/subreads_dataset1.xml                  |  21 ++
 tests/data/subreads_dataset2.xml                  |  10 +
 tests/data/test_bas.fofn                          |   2 +-
 tests/data/test_ccs.fofn                          |   2 +-
 tests/data/test_filterAdapterOnly.fofn            |   2 +-
 tests/data/test_leftmost_query.fasta              |   2 +-
 tests/data/test_pulseFile.fofn                    |   8 +-
 tests/out/readme                                  |   3 +-
 tests/unit/nose.cfg                               |  17 +
 tests/unit/test_fileutil.py                       |  63 ++--
 tests/unit/test_options.py                        | 109 +++---
 tests/unit/test_pbalign.py                        |  37 +-
 tests/unit/test_pbalignfiles.py                   |  31 +-
 tests/unit/test_referenceInfo.py                  |   3 +-
 tests/unit/test_setpath.py                        |  25 ++
 tests/unit/test_tool_contract.py                  |  91 +++++
 tests/unit/test_unrolled.py                       |  87 +++++
 tests/unit/test_xmlout.py                         |  38 ++
 tests/unit_h5/nose.cfg                            |  17 +
 tests/{unit => unit_h5}/test_filterservice.py     |  11 +-
 tests/{unit => unit_h5}/test_forquiverservice.py  |  11 +-
 tests/{unit => unit_h5}/test_gmap.py              |   8 +-
 tests/{unit => unit_h5}/test_loadpulsesservice.py |   8 +-
 tests/{unit => unit_h5}/test_pbalign.py           |  11 +-
 tests/{unit => unit_h5}/test_repackservice.py     |   7 +-
 tests/{unit => unit_h5}/test_rgnh5io.py           |  10 +-
 tests/unit_h5/test_setpath.py                     |  27 ++
 tests/{unit => unit_h5}/test_sortservice.py       |   5 +-
 71 files changed, 1837 insertions(+), 1303 deletions(-)

diff --git a/LICENSES.txt b/LICENSES.txt
index 8aee62e..4ec79b5 100644
--- a/LICENSES.txt
+++ b/LICENSES.txt
@@ -1,33 +1,27 @@
-Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+Copyright (c) 2016, Pacific Biosciences of California, Inc.
 
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
+modification, are permitted provided that the following conditions are met:
 * Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
-
 * Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
-
 * Neither the name of Pacific Biosciences nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.
 
-NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
-GRANTED BY THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC
-BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS CONTRIBUTORS
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
+NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
index 83c2e54..757d0a3 100644
--- a/Makefile
+++ b/Makefile
@@ -17,13 +17,20 @@ develop:
 
 test:
 	# Unit tests
-	find tests/unit -name "*.py" | xargs nosetests
+	#find tests/unit -name "*.py" | xargs nosetests
+	nosetests --verbose tests/unit/*.py
 	# End-to-end tests
 	@echo pbalign cram tests require blasr installed.
 	find tests/cram -name "*.t" | xargs cram 
 
+h5test:
+	# Tests for pre-3.0 smrtanalysis when default file formats are *.h5
+	@echo pbalign h5 tests require blasr, samtoh5, loadPulses, samFilter and etc installed.
+	nosetests --verbose tests/unit_h5/*.py
+	find tests/cram_h5 -name "*.t" | xargs cram -v
+
 doc:
-	sphinx-apidoc -T -f -o doc src/ && cd doc && make html
+	sphinx-apidoc -T -f -o doc pbalign/ && cd doc && make html
 
 docs: doc
 
diff --git a/README.md b/README.md
index 3c8d149..a4464f8 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,5 @@
-###pbalign maps PacBio reads to reference sequences.###
+pbalign maps PacBio reads to reference sequences.
 
+Want to know how to install and run pbalign?
 
-**Q: Want to know how to install and run pbalign?**
-
-A: Please refer to [pbalign readme document](https://github.com/PacificBiosciences/pbalign/blob/master/doc/howto.rst)
-
-
-**Q: 'pbalign.py' does not work?**
-
-A: The main script has been changed from 'pbalign.py' to 'pbalign'. Please use 'pbalign' instead.
-
-
-**Q: Can pbalign handle large datasets with many movies?**
-
-A: pbalign is not designed to handle large datasets, you should follow a [divide and conquer way](https://github.com/PacificBiosciences/pbalign/wiki/Tutorial:-How-to-divide-and-conquer-large-datasets-using-pbalign) to align many movies to a reference.
-
+Please refer to https://github.com/PacificBiosciences/pbalign/blob/master/doc/howto.rst
diff --git a/doc/howto.rst b/doc/howto.rst
index 1bbcf3e..0cf6656 100644
--- a/doc/howto.rst
+++ b/doc/howto.rst
@@ -37,8 +37,7 @@ required,
 The following software is optionally required if ``--forQuiver`` option 
 will be used to convert the output Compare HDF5 file to be compatible 
 with Quiver.
-- ``pbh5tools.cmph5tools``, a PacBio Bioinformatics tools that manipulates 
-Compare HDF5 files. 
+- ``pbh5tools.cmph5tools``, a PacBio Bioinformatics tools that manipulates Compare HDF5 files. 
 - ``h5repack``, a HDF5 tool to compress and repack HDF5 files.
 
 The default aligner that pbalign uses is ``blasr``. If you want to use
@@ -140,9 +139,9 @@ Before installing pbcore, you may need to install numpy and h5py from ::
 
 , or if you have root permission on Ubuntu, do ::
 
-    $ git install numpy
+    $ pip install numpy
     $ sudo apt-get install libhdf5-serial-dev
-    $ git install h5py
+    $ pip install h5py
 
 To install pbcore, execute ::
 
diff --git a/pbalign/__init__.py b/pbalign/__init__.py
index 76a6c57..2842f3b 100755
--- a/pbalign/__init__.py
+++ b/pbalign/__init__.py
@@ -34,7 +34,7 @@
 
 from __future__ import absolute_import
 
-_changelist = "$Change: 141024 $"
+_changelist = "$Change: 173392 $"
 
 
 def _get_changelist(perforce_str):
@@ -60,7 +60,7 @@ def get_dir():
     """Return lib directory."""
     return op.dirname(op.realpath(__file__))
 
-VERSION = (0, 2, 0, get_changelist())
+VERSION = (0, 3, 0)
 
 
 def get_version():
diff --git a/pbalign/alignservice/align.py b/pbalign/alignservice/align.py
index eb39eb2..f8333ae 100755
--- a/pbalign/alignservice/align.py
+++ b/pbalign/alignservice/align.py
@@ -38,6 +38,7 @@ from copy import copy
 from pbalign.options import importDefaultOptions
 from pbalign.utils.tempfileutil import TempFileManager
 from pbalign.service import Service
+from pbalign.utils.fileutil import getFileFormat, FILE_FORMATS
 
 
 class AlignService (Service):
@@ -184,8 +185,11 @@ class AlignService (Service):
             self._tempFileManager,
             self._fileNames.isWithinRepository)
 
+        outFormat = getFileFormat(self._fileNames.outputFileName)
+        suffix = ".bam" if (outFormat == FILE_FORMATS.BAM or
+                            outFormat == FILE_FORMATS.XML) else ".sam"
         self._fileNames.alignerSamOut = self._tempFileManager.\
-            RegisterNewTmpFile(suffix=".sam")
+            RegisterNewTmpFile(suffix=suffix)
 
         # Generate and execute cmd.
         try:
diff --git a/pbalign/alignservice/blasr.py b/pbalign/alignservice/blasr.py
index 3ec94af..1372e37 100755
--- a/pbalign/alignservice/blasr.py
+++ b/pbalign/alignservice/blasr.py
@@ -35,7 +35,7 @@
 
 from __future__ import absolute_import
 from pbalign.alignservice.align import AlignService
-from pbalign.utils.fileutil import FILE_FORMATS, real_upath
+from pbalign.utils.fileutil import FILE_FORMATS, real_upath, getFileFormat
 import logging
 
 
@@ -96,54 +96,60 @@ class BlasrService(AlignService):
 
         ignoredBinaryOptions = ['-m', '-out', '-V']
         ignoredUnitaryOptions = ['-h', '--help', '--version',
-                                 '-v', '-vv', '-sam']
+                                 '-v', '-vv', '--sam', '--bam']
 
         items = self.__parseAlgorithmOptionItems(options.algorithmOptions)
         i = 0
         try:
             while i < len(items):
                 infoMsg, errMsg, item = "", "", items[i]
-                if item == "-sa":
+                if item == "--sa":
                     val = real_upath(items[i+1])
                     if fileNames.sawriterFileName != val:
                         infoMsg = "Over write sa file with {0}".format(val)
                         fileNames.sawriterFileName = val
-                elif item == "-regionTable":
+                elif item == "--regionTable":
                     val = real_upath(items[i+1])
                     if fileNames.regionTable != val:
                         infoMsg = "Over write regionTable with {0}.\n"\
                                   .format(val)
                         fileNames.regionTable = val
-                elif item == "-bestn":
+                elif item == "--bestn":
                     val = int(items[i+1])
                     if options.maxHits is not None and \
                             int(options.maxHits) != val:
-                        errMsg = "blasr -bestn specified within " + \
+                        errMsg = "blasr --bestn specified within " + \
                                  "--algorithmOptions is equivalent to " + \
                                  "--maxHits. Conflicting values of " + \
-                                 "--algorithmOptions '-bestn' and " +\
+                                 "--algorithmOptions '--bestn' and " +\
                                  "--maxHits have been found."
                     else:
                         options.maxHits = val
-                elif item == "-minMatch":
+                elif item == "--minMatch":
                     val = int(items[i+1])
                     if options.minAnchorSize is not None and \
                             int(options.minAnchorSize) != val:
-                        errMsg = "blasr -minMatch specified within " + \
+                        errMsg = "blasr --minMatch specified within " + \
                                  "--algorithmOptions is equivalent to " + \
                                  "--minAnchorSize. Conflicting values " + \
-                                 "of --algorithmOptions '-minMatch' and " + \
+                                 "of --algorithmOptions '--minMatch' and " + \
                                  "--minAnchorSize have been found."
                     else:
                         options.minAnchorSize = val
-                elif item == "-nproc":
+                elif item == "--maxMatch":
+                    val = int(items[i+1])
+                    if options.maxMatch is not None and \
+                            int(options.maxMatch) != val:
+                        infoMsg = "Override maxMatch with {n}.".format(n=val)
+                        options.maxMatch = val
+                elif item == "--nproc":
                     val = int(items[i+1])
                     # The number of threads is not critical.
                     if options.nproc is None or \
                             int(options.nproc) != val:
                         infoMsg = "Over write nproc with {n}.".format(n=val)
                         options.nproc = val
-                elif item == "-noSplitSubreads":
+                elif item == "--noSplitSubreads":
                     if not options.noSplitSubreads:
                         infoMsg = "Over write noSplitSubreads with True."
                         logging.info(self.name +
@@ -151,22 +157,25 @@ class BlasrService(AlignService):
                         options.noSplitSubreads = True
                     del items[i]
                     continue
-                elif item == "-concordant":
+                elif item == "--concordant":
                     if not options.concordant:
                         infoMsg = "Over writer concordant with True."
                         logging.info(self.name +
                                      ": Resolve algorithmOptions. " + infoMsg)
                         options.concordant = True
                     del items[i]
-                elif "-useccs" in item:  # -useccs, -useccsall, -useccsdenovo
-                    val = item.lstrip('-')
+                elif "--useccs" in item:  # -useccs, -useccsall, -useccsdenovo
+                    val = item.lstrip('--')
                     if options.useccs != val and options.useccs is not None:
                         errMsg = "Found conflicting options in " + \
                             "--algorithmOptions '{v}' \nand --useccs={u}"\
                             .format(v=item, u=options.useccs)
                     else:
                         options.useccs = val
-                elif item == "-seed" or item == "-randomSeed":
+                elif item == "--unaligned":
+                    val = str(items[i+1])
+                    options.unaligned = val
+                elif item == "--seed" or item == "--randomSeed":
                     val = int(items[i+1])
                     if options.seed is None or int(options.seed) != val:
                         infoMsg = "Overwrite random seed with {0}.".format(val)
@@ -195,6 +204,15 @@ class BlasrService(AlignService):
             logging.error(errMsg + str(e))
             raise ValueError(errMsg + str(e))
 
+        # Existing suffix array always uses match size 8.
+        # When BLASR search option -minMatch is less than 8, suffix array needs
+        # to be created on the fly.
+        if (options.minAnchorSize is not None and options.minAnchorSize != "" and
+            int(options.minAnchorSize) < 8):
+            logging.warning("Suffix array must be recreated on the fly when " +
+                            "minMatch < 8, which may take a long time.")
+            fileNames.sawriterFileName = None
+
         # Update algorithmOptions when resolve is done
         options.algorithmOptions = " ".join(items)
         return options
@@ -211,57 +229,84 @@ class BlasrService(AlignService):
             Output:
                 a command-line string which can be used in bash.
         """
-        cmdStr = "blasr {queryFile} {targetFile} -sam -out {outFile} ".format(
+        cmdStr = "blasr {queryFile} {targetFile} --out {outFile} ".format(
             queryFile=fileNames.queryFileName,
             targetFile=fileNames.targetFileName,
             outFile=fileNames.alignerSamOut)
 
+        if getFileFormat(fileNames.alignerSamOut) == FILE_FORMATS.BAM:
+            cmdStr += " --bam "
+        else:
+            cmdStr += " --sam "
+
         if ((fileNames.sawriterFileName is not None) and
                 (fileNames.sawriterFileName != "")):
-            cmdStr += " -sa {sawriter} ".format(
+            cmdStr += " --sa {sawriter} ".format(
                 sawriter=fileNames.sawriterFileName)
 
         if ((fileNames.regionTable != "") and
                 (fileNames.regionTable is not None)):
-            cmdStr += " -regionTable {regionTable} ".format(
+            cmdStr += " --regionTable {regionTable} ".format(
                 regionTable=fileNames.regionTable)
 
         if options.maxHits is not None and options.maxHits != "":
-            cmdStr += " -bestn {n}".format(n=options.maxHits)
+            cmdStr += " --bestn {n}".format(n=options.maxHits)
 
         if (options.minAnchorSize is not None and
                 options.minAnchorSize != ""):
-            cmdStr += " -minMatch {0} ".format(options.minAnchorSize)
+            cmdStr += " --minMatch {0} ".format(options.minAnchorSize)
+
+        if (options.maxMatch is not None and options.maxMatch != ""):
+            cmdStr += " --maxMatch {0} ".format(options.maxMatch)
 
         if options.nproc is not None and options.nproc != "":
-            cmdStr += " -nproc {0} ".format(options.nproc)
+            cmdStr += " --nproc {0} ".format(options.nproc)
 
+        # Specify filter criteira and hit policy.
         if options.minLength is not None:
-            cmdStr += " -minSubreadLength {n} -minReadLength {n} ".\
+            cmdStr += " --minSubreadLength {n} --minAlnLength {n} ".\
                     format(n=options.minLength)
 
+        if options.maxDivergence is not None:
+            maxDivergence = int(options.maxDivergence if options.maxDivergence
+                                > 1.0 else (options.maxDivergence * 100))
+            cmdStr += " --minPctSimilarity {0}".format(100 - maxDivergence)
+
+        if options.minAccuracy is not None:
+            minAccuracy = int(options.minAccuracy if options.minAccuracy > 1.0
+                              else (options.minAccuracy * 100))
+            cmdStr += " --minPctAccuracy {0}".format(minAccuracy)
+
+        if options.scoreCutoff is not None:
+            cmdStr += " --maxScore {0}".format(options.scoreCutoff)
+
+        cmdStr += " --hitPolicy {0} ".format(options.hitPolicy)
+
         if options.noSplitSubreads:
-            cmdStr += " -noSplitSubreads "
+            cmdStr += " --noSplitSubreads "
 
         if options.concordant:
-            cmdStr += " -concordant "
+            cmdStr += " --concordant "
 
         if options.seed is not None and options.seed != 0:
-            cmdStr += " -randomSeed {0} ".format(options.seed)
+            cmdStr += " --randomSeed {0} ".format(options.seed)
 
-        if options.hitPolicy == "randombest":
-            cmdStr += " -placeRepeatsRandomly "
+        #if options.hitPolicy == "randombest":
+        #    cmdStr += " --placeRepeatsRandomly "
 
         if options.useccs is not None and options.useccs != "":
-            cmdStr += " -{0} ".format(options.useccs)
+            cmdStr += " --{0} ".format(options.useccs)
 
         # When input is a FASTA file, blasr -clipping = soft
         if fileNames.inputFileFormat == FILE_FORMATS.FASTA:
-            cmdStr += " -clipping soft "
+            cmdStr += " --clipping soft "
 
         if options.algorithmOptions is not None:
             cmdStr += " {0} ".format(options.algorithmOptions)
 
+        if options.unaligned is not None:
+            cmdStr += " --unaligned {f} --noPrintUnalignedSeqs".format(f=options.unaligned)
+
         return cmdStr
 
     def _preProcess(self, inputFileName, referenceFile=None,
diff --git a/pbalign/bampostservice.py b/pbalign/bampostservice.py
new file mode 100755
index 0000000..15cf937
--- /dev/null
+++ b/pbalign/bampostservice.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python
+###############################################################################
+# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
+# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+###############################################################################
+
+"""This script defines BamPostService, which
+   * calls 'samtools sort' to sort out.bam, and
+   * calls 'samtools index' to make out.bai index, and
+   * calls 'makePbi.py' to make out.pbi index file.
+"""
+
+# Author: Yuan Li
+
+from __future__ import absolute_import
+import logging
+from pbalign.service import Service
+from pbalign.utils.progutil import Execute
+
+
+class BamPostService(Service):
+
+    """Sort a bam, makes bam index and PacBio index."""
+    @property
+    def name(self):
+        """Name of bam post service."""
+        return "BamPostService"
+
+    @property
+    def progName(self):
+        return "samtools"
+
+    @property
+    def cmd(self):
+        return ""
+
+    def __init__(self, filenames):
+        """Initialize a BamPostService object.
+            Input - unsortedBamFile: a filtered, unsorted bam file
+                    refFasta : a reference fasta file
+            Output - sortedBamFile: sorted BAM file
+                     outBaiFile: index BAI file
+        """
+        self.refFasta = filenames.targetFileName
+
+        # filtered, unsorted bam file.
+        self.unsortedBamFile = filenames.filteredSam
+        self.outBamFile = filenames.outBamFileName
+        self.outBaiFile = filenames.outBaiFileName
+        self.outPbiFile = filenames.outPbiFileName
+
+    def _sortbam(self, unsortedBamFile, sortedBamFile):
+        """Sort unsortedBamFile and output sortedBamFile."""
+        if not sortedBamFile.endswith(".bam"):
+            raise ValueError("sorted bam file name %s must end with .bam" %
+                             sortedBamFile)
+        sortedPrefix = sortedBamFile[0:-4]
+        cmd = 'samtools --version'
+        _samtoolsversion = ["0","1","19"]
+        try:
+            _out, _code, _msg = Execute(self.name, cmd)
+            if "samtools" in _out[0]:
+                _samtoolsversion = str(_out[0][8:]).strip().split('.')
+            else:
+                pass
+        except Exception:
+            pass
+        _stvmajor = int(_samtoolsversion[0])
+        if _stvmajor >= 1:
+            cmd = 'samtools sort -m 4G -o {sortedBamFile} {unsortedBamFile}'.format(
+                sortedBamFile=sortedBamFile, unsortedBamFile=unsortedBamFile)
+        else:
+            cmd = 'samtools sort -m 4G {unsortedBamFile} {prefix}'.format(
+                unsortedBamFile=unsortedBamFile, prefix=sortedPrefix)
+        Execute(self.name, cmd)
+
+    def _makebai(self, sortedBamFile, outBaiFile):
+        """Build *.bai index file."""
+        cmd = 'samtools --version'
+        _samtoolsversion = ["0","1","19"]
+        try:
+            _out, _code, _msg = Execute(self.name, cmd)
+            if "samtools" in _out[0]:
+                _samtoolsversion = str(_out[0][8:]).strip().split('.')
+            else:
+                pass
+        except Exception:
+            pass
+        _stvmajor = int(_samtoolsversion[0])
+        _stvminor = int(_samtoolsversion[1])
+        if _stvmajor == 1 and _stvminor == 2:
+            # only for 1.2
+            cmd = "samtools index {sortedBamFile}".format(
+                sortedBamFile=sortedBamFile)
+        else:
+            cmd = "samtools index {sortedBamFile} {outBaiFile}".format(
+                sortedBamFile=sortedBamFile, outBaiFile=outBaiFile)
+        Execute(self.name, cmd)
+
+    def _makepbi(self, sortedBamFile):
+        """Generate *.pbi PacBio BAM index."""
+        cmd = "pbindex %s" % sortedBamFile
+        Execute(self.name, cmd)
+
+    def run(self):
+        """ Run the BAM post-processing service. """
+        logging.info(self.name + ": Sort and build index for a bam file.")
+        self._sortbam(unsortedBamFile=self.unsortedBamFile,
+                      sortedBamFile=self.outBamFile)
+        self._makebai(sortedBamFile=self.outBamFile,
+                      outBaiFile=self.outBaiFile)
+        self._makepbi(sortedBamFile=self.outBamFile)
diff --git a/pbalign/ccs.py b/pbalign/ccs.py
new file mode 100644
index 0000000..ad3b4a4
--- /dev/null
+++ b/pbalign/ccs.py
@@ -0,0 +1,32 @@
+
+"""
+VERY thin wrapper on top of pbalign to provide a tool contract-driven task
+specific to CCS reads.
+"""
+
+import sys
+
+from pbcommand.models import FileTypes
+
+from pbalign import pbalignrunner
+import pbalign.options
+
+class Constants(pbalign.options.Constants):
+    TOOL_ID = "pbalign.tasks.pbalign_ccs"
+    DRIVER_EXE = "python -m pbalign.ccs --resolved-tool-contract"
+    INPUT_FILE_TYPE = FileTypes.DS_CCS
+    OUTPUT_FILE_TYPE = FileTypes.DS_ALIGN_CCS
+    # some modified defaults
+    ALGORITHM_OPTIONS_DEFAULT = "--minMatch 12 --bestn 10 --minPctSimilarity 70.0"
+
+def get_parser():
+    return pbalign.options.get_contract_parser(Constants, ccs_mode=True)
+
+def main(argv=sys.argv):
+    return pbalignrunner.main(
+        argv=argv,
+        get_parser_func=get_parser,
+        contract_runner_func=pbalignrunner.resolved_tool_contract_runner_ccs)
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/pbalign/filterservice.py b/pbalign/filterservice.py
index 9d23215..bd65240 100755
--- a/pbalign/filterservice.py
+++ b/pbalign/filterservice.py
@@ -37,8 +37,7 @@ in an input SAM file according to filtering criteria."""
 from __future__ import absolute_import
 import logging
 from pbalign.service import Service
-from pbalign.utils.fileutil import isExist
-
+from pbalign.utils.fileutil import getFileFormat, FILE_FORMATS, isExist
 
 class FilterService(Service):
     """ Call samFilter to filter low quality hits and apply multiple hits
@@ -54,22 +53,22 @@ class FilterService(Service):
         return "samFilter"
 
     def __init__(self, inSamFile, refFile, outSamFile,
-                 alnServiceName, scoreSign, options,
+                 alignerName, scoreSign, options,
                  adapterGffFile=None):
         """Initialize a FilterService object.
             Input:
-                inSamFile: an input SAM file
+                inSamFile: an input SAM/BAM file
                 refFile  : the reference FASTA file
-                outSAM   : an output SAM file
+                outSAM   : an output SAM/BAM file
                 alnServiceName: the name of the align service
                 scoreSign: score sign of the aligner, can be -1 or 1
                 options  : pbalign options
                 adapterGffFile: a GFF file storing all the adapters
         """
-        self.inSamFile = inSamFile
+        self.inSamFile = inSamFile # sam|bam
         self.refFile = refFile
-        self.outSamFile = outSamFile
-        self.alnServiceName = alnServiceName
+        self.outSamFile = outSamFile # sam|bam
+        self.alignerName = alignerName
         self.scoreSign = scoreSign
         self.options = options
         self.adapterGffFile = adapterGffFile
@@ -78,23 +77,32 @@ class FilterService(Service):
     def cmd(self):
         """String of a command-line to execute."""
         return self._toCmd(self.inSamFile,  self.refFile,
-                           self.outSamFile, self.alnServiceName,
+                           self.outSamFile, self.alignerName,
                            self.scoreSign,  self.options,
                            self.adapterGffFile)
 
     def _toCmd(self, inSamFile, refFile, outSamFile,
-               alnServiceName, scoreSign, options, adapterGffFile):
+            alignerName, scoreSign, options, adapterGffFile):
         """ Generate a samFilter command line from options.
             Input:
                 inSamFile : the input SAM file
                 refFile   : the reference FASTA file
                 outSamFile: the output SAM file
-                alnServiceName: aligner service name
+                alignerName: aligner service name
                 scoreSign : score sign, can be -1 or 1
                 options   : argument options
             Output:
                 a command-line string
         """
+        # blasr supports in-line alignment filteration,
+        # no need to call samFilter at all.
+        if alignerName == "blasr" and \
+            not self.options.filterAdapterOnly:
+            cmdStr = "rm -f {outFile} && ln -s {inFile} {outFile}".format(
+                    inFile=inSamFile, outFile=outSamFile)
+            return cmdStr
+
+        # if aligner is not blasr, call samFilter instead
         cmdStr = self.progName + \
             " {inSamFile} {refFile} {outSamFile} ".format(
                 inSamFile=inSamFile,
@@ -121,7 +129,7 @@ class FilterService(Service):
             cmdStr += " -scoreSign {0}".format(scoreSign)
         else:
             logging.error("{0}'s score sign is neither 1 nor -1.".format(
-                alnServiceName))
+                alignerName))
 
         if options.scoreCutoff is not None:
             cmdStr += " -scoreCutoff {0}".format(options.scoreCutoff)
@@ -133,6 +141,7 @@ class FilterService(Service):
             isExist(adapterGffFile):
             cmdStr += " -filterAdapterOnly {gffFile}".format(
                     gffFile=adapterGffFile)
+
         return cmdStr
 
     def run(self):
diff --git a/pbalign/forquiverservice/__init__.py b/pbalign/forquiverservice/__init__.py
deleted file mode 100755
index 175bd6a..0000000
--- a/pbalign/forquiverservice/__init__.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-###############################################################################
-# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# * Neither the name of Pacific Biosciences nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
-# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
-# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
-# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-###############################################################################
-
-# Author: Yuan Li
-"""Initialization."""
-from __future__ import absolute_import
diff --git a/pbalign/forquiverservice/forquiver.py b/pbalign/forquiverservice/forquiver.py
deleted file mode 100755
index e478daa..0000000
--- a/pbalign/forquiverservice/forquiver.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/usr/bin/env python
-###############################################################################
-# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# * Neither the name of Pacific Biosciences nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
-# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
-# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
-# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-###############################################################################
-
-"""This script defines ForQuiverService, which post-processes a cmp.h5 file so
-that it can be used by quiver directly. ForQuiverService sorts the file, loads
-pulse information to it and finally repacks it.."""
-
-# Author: Yuan Li
-
-from __future__ import absolute_import
-import logging
-from pbalign.forquiverservice.sort import SortService
-from pbalign.forquiverservice.loadpulses import LoadPulsesService
-from pbalign.forquiverservice.loadchemistry import LoadChemistryService
-from pbalign.forquiverservice.repack import RepackService
-
-
-class ForQuiverService(object):
-    """
-    Uses SortService, LoadPulsesService, LoadChemistryService,
-    RepackService to post process a cmp.h5 file so that the file can
-    be used by quiver directly.
-    """
-    @property
-    def name(self):
-        """Name of ForQuiverService."""
-        return "ForQuiverService"
-
-    def __init__(self, fileNames, options):
-        """Initialize a ForQuiverService object.
-
-        Input:
-            fileNames : pbalign file names
-            options   : pbalign options
-
-        """
-        self.fileNames = fileNames
-        self.options = options
-        self._loadpulsesService = LoadPulsesService(
-            self.fileNames.pulseFileName,
-            self.fileNames.outputFileName,
-            self.options)
-        self._loadchemistryService = LoadChemistryService(
-            self.fileNames.pulseFileName,
-            self.fileNames.outputFileName,
-            self.options)
-        self._sortService = SortService(
-            self.fileNames.outputFileName,
-            self.options)
-        self._repackService = RepackService(
-            self.fileNames.outputFileName,
-            self.fileNames.outputFileName + ".TMP")
-
-    def run(self):
-        """ Run the ForQuiver service."""
-        logging.info(self.name + ": Sort.")
-
-        self._sortService.checkAvailability()
-        self._sortService.run()
-
-        logging.info(self.name + ": LoadPulses.")
-        self._loadpulsesService.checkAvailability()
-        self._loadpulsesService.run()
-
-        logging.info(self.name + ": LoadChemistry.")
-        self._loadchemistryService.checkAvailability()
-        self._loadchemistryService.run()
-
-        logging.info(self.name + ": Repack.")
-        self._repackService.checkAvailability()
-        self._repackService.run()
diff --git a/pbalign/forquiverservice/loadchemistry.py b/pbalign/forquiverservice/loadchemistry.py
deleted file mode 100755
index 5f6516f..0000000
--- a/pbalign/forquiverservice/loadchemistry.py
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/usr/bin/env python
-###############################################################################
-# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# * Neither the name of Pacific Biosciences nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
-# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
-# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
-# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-###############################################################################
-
-from __future__ import absolute_import
-import logging
-from pbalign.service import Service
-
-class LoadChemistryService(Service):
-    @property
-    def name(self):
-        return "LoadChemistryService"
-
-    @property
-    def progName(self):
-        return "loadChemistry.py"
-
-    # Quiver only uses the following five metrics.
-
-    def __init__(self, basFofnFile, cmpFile, options):
-        """
-        Input:
-            basFofnFile: the input BASE.H5 (or fofn) files
-            cmpFile    : an input CMP.H5 file
-            options    : pbalign options
-        """
-        self.basFofnFile = basFofnFile
-        self.cmpFile = cmpFile
-        self.options = options
-
-    @property
-    def cmd(self):
-        """String of a command-line to execute."""
-        return self._toCmd(self.basFofnFile, self.cmpFile)
-
-    def _toCmd(self, basFofnFile, cmpFile):
-        """
-        Generate a loadChemistry command line.
-        """
-        cmdStr = self.progName + \
-            " {basFofnFile} {cmpFile} ".format(
-                basFofnFile=basFofnFile, cmpFile=cmpFile)
-
-        return cmdStr
-
-    def run(self):
-        """Run the loadChemistry service."""
-        logging.info(self.name + ": Load pulses using {progName}.".
-                    format(progName=self.progName))
-        return self._execute()
diff --git a/pbalign/forquiverservice/loadpulses.py b/pbalign/forquiverservice/loadpulses.py
deleted file mode 100755
index 98c71b8..0000000
--- a/pbalign/forquiverservice/loadpulses.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/usr/bin/env python
-###############################################################################
-# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# * Neither the name of Pacific Biosciences nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
-# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
-# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
-# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-###############################################################################
-
-"""Define LoadPulseService class, which calls loadPulses to load PacBio
-pulse metrics to a cmp.h5 file. Five metrics, inclduing DeletionQV,
-DeletionTag, InsertionQV, MergeQV, and SubstitutionQV, are loaded by default
-unless --metrics is specified."""
-
-# Author: Yuan Li
-
-from __future__ import absolute_import
-import logging
-from pbalign.service import Service
-
-
-class LoadPulsesService(Service):
-    """
-    LoadPulsesService calls loadPulses to load PacBio pulse information to a
-    cmp.h5 file.
-    """
-    @property
-    def name(self):
-        """Name of LoadPulsesService."""
-        return "LoadPulsesService"
-
-    @property
-    def progName(self):
-        """Program to call."""
-        return "loadPulses"
-
-    # Quiver only uses the following five metrics.
-
-    def __init__(self, basFofnFile, cmpFile, options):
-        """Initialize a LoadPulsesService object.
-        Input:
-            basFofnFile: the input BASE.H5 (or fofn) files
-            cmpFile    : an input CMP.H5 file
-            options    : pbalign options
-        """
-        self.basFofnFile = basFofnFile
-        self.cmpFile = cmpFile
-        self.options = options
-
-    @property
-    def cmd(self):
-        """String of a command-line to execute."""
-        return self._toCmd(self.basFofnFile, self.cmpFile)
-
-    def _toCmd(self, basFofnFile, cmpFile):
-        """Generate a loadPulses command line.
-
-        Input:
-            basFofnFile: a BAX/PLX.H5 (or fofn) file with pulses
-            cmpFile    : an input CMP.H5 file
-        Output:
-            a command-line string
-
-        """
-        cmdStr = self.progName + \
-            " {basFofnFile} {cmpFile} ".format(
-                basFofnFile=basFofnFile, cmpFile=cmpFile)
-
-        metrics = self.options.metrics.replace(" ", "")
-        cmdStr += " -metrics {metrics} ".format(metrics=metrics)
-
-        if self.options.byread:
-            cmdStr += " -byread "
-
-        return cmdStr
-
-    def run(self):
-        """Run the loadPulses service."""
-        logging.info(self.name + ": Load pulses using {progName}.".
-                    format(progName=self.progName))
-        return self._execute()
diff --git a/pbalign/forquiverservice/repack.py b/pbalign/forquiverservice/repack.py
deleted file mode 100755
index 5c0f210..0000000
--- a/pbalign/forquiverservice/repack.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-###############################################################################
-# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# * Neither the name of Pacific Biosciences nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
-# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
-# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
-# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-###############################################################################
-
-"""Define RepackService class, which calls h5repack to repack a cmp.h5 file."""
-
-# Author: Yuan Li
-
-from __future__ import absolute_import
-import logging
-from pbalign.service import Service
-
-
-class RepackService(Service):
-    """RepackService calls h5repack to repack a PacBio cmp.h5 file."""
-    @property
-    def name(self):
-        """Name of RepackService."""
-        return "RepackService"
-
-    @property
-    def progName(self):
-        """Program to call."""
-        return "h5repack"
-
-    def __init__(self, cmpFile, tmpcmpFile):
-        """Initialize a RepackService object.
-
-        Input:
-            cmpFile : an input CMP.H5 file
-            tmpcmpFile : a temporary CMP.H5 file
-
-        """
-        self.cmpFile = cmpFile
-        self.tmpcmpFile = tmpcmpFile
-
-    @property
-    def cmd(self):
-        """String of a command-line to execute."""
-        return self._toCmd(self.cmpFile, self.tmpcmpFile)
-
-    def _toCmd(self, cmpFile, tmpcmpFile):
-        """Generate a h5repack  command line.
-
-        Input:
-            cmpFile : an input CMP.H5 file
-            options : pbalign options
-        Output:
-            a command-line string
-
-        """
-        cmdStr = self.progName + " -f GZIP=1 {i} {o} && mv {o} {i}".\
-            format(i=cmpFile, o=tmpcmpFile)
-        return cmdStr
-
-    def run(self):
-        """Run the Repack service."""
-        logging.info(self.name + ": Repack a cmp.h5 file using {progName}.".
-            format(progName=self.progName))
-        return self._execute()
diff --git a/pbalign/forquiverservice/sort.py b/pbalign/forquiverservice/sort.py
deleted file mode 100755
index 750601e..0000000
--- a/pbalign/forquiverservice/sort.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env python
-###############################################################################
-# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# * Neither the name of Pacific Biosciences nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
-# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
-# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
-# NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-###############################################################################
-
-"""This script defines SortService, which calls cmph5tools.py sort to sort a
-cmp.h5 file."""
-
-# Author: Yuan Li
-
-from __future__ import absolute_import
-import logging
-from pbalign.service import Service
-
-
-class SortService(Service):
-    """Call cmph5tools.py sort to sort a PacBio cmp.h5 file."""
-    @property
-    def name(self):
-        """Name of SortService."""
-        return "SortService"
-
-    @property
-    def progName(self):
-        """Program to call."""
-        return "cmph5tools.py"
-
-    def __init__(self, cmpFile, options):
-        """Initialize a SortService object.
-        Input:
-            cmpFile : an input CMP.H5 file
-            options : pbalign options
-        """
-        self.cmpFile = cmpFile
-        self.options = options
-
-    @property
-    def cmd(self):
-        """String of a command-line to execute."""
-        return self._toCmd(self.cmpFile, self.options)
-
-    def _toCmd(self, cmpFile, options):
-        """Generate a cmph5tools.py sort command line.
-
-        Input:
-            cmpFile : an input CMP.H5 file
-            options : pbalign options
-        Output:
-            a command-line string
-
-        """
-        cmdStr = self.progName
-        if options.verbosity > 1:
-            cmdStr += " -vv "
-
-        cmdStr += " sort --deep --inPlace {cmpFile} ".format(cmpFile=cmpFile)
-        return cmdStr
-
-    def run(self):
-        """Run the sort service."""
-        logging.info(self.name + ": Sort a cmp.h5 file using {progName}.".
-            format(progName=self.progName))
-        return self._execute()
diff --git a/pbalign/options.py b/pbalign/options.py
index 9cb1e81..acb1f9f 100755
--- a/pbalign/options.py
+++ b/pbalign/options.py
@@ -34,8 +34,40 @@
 
 from __future__ import absolute_import
 import argparse
+import logging
 from copy import copy
 
+from pbcommand.models import FileTypes, SymbolTypes, ResourceTypes, get_pbparser
+from pbcommand.common_options import add_debug_option, add_base_options
+
+from pbalign.__init__ import get_version
+
+log = logging.getLogger(__name__)
+
+
+class Constants(object):
+    TOOL_ID = "pbalign.tasks.pbalign"
+    INPUT_FILE_TYPE = FileTypes.DS_SUBREADS
+    OUTPUT_FILE_TYPE = FileTypes.DS_ALIGN
+    OUTPUT_FILE_NAME = "mapped"
+    ALGORITHM_OPTIONS_ID = "pbalign.task_options.algorithm_options"
+    ALGORITHM_OPTIONS_DEFAULT = "--minMatch 12 --bestn 10 --minPctSimilarity 70.0"
+    MIN_ACCURACY_ID = "pbalign.task_options.min_accuracy"
+    MIN_LENGTH_ID = "pbalign.task_options.min_length"
+    NO_SPLIT_ID = "pbalign.task_options.no_split_subreads"
+    CONCORDANT_ID = "pbalign.task_options.concordant"
+    HIT_POLICY_ID = "pbalign.task_options.hit_policy"
+    DRIVER_EXE = "pbalign --resolved-tool-contract "
+    VERSION = get_version()
+    PARSER_DESC = """\
+Mapping PacBio sequences to references using an algorithm selected from a
+selection of supported command-line alignment algorithms. Input can be a
+fasta, pls.h5, bas.h5 or ccs.h5 file or a fofn (file of file names). Output
+can be in SAM or BAM format. If output is BAM format, aligner can
+only be blasr and QVs will be loaded automatically.
+
+NOTE that pbalign no longer supports CMP.H5 in 3.0."""
+
 # The first candidate 'blasr' is the default.
 ALGORITHM_CANDIDATES = ('blasr', 'bowtie', 'gmap')
 
@@ -57,15 +89,17 @@ DEFAULT_OPTIONS = {"regionTable": None,
                    # Aligner options
                    "maxHits": 10,
                    "minAnchorSize": 12,
+                   "maxMatch": 30,
                    "noSplitSubreads": False,
                    "concordant": False,
+                   "unaligned": None,
                    "algorithmOptions": None,
                    "useccs": None,
                    # Filter options
-                   "maxDivergence": 30,
-                   "minAccuracy": 70,
+                   "maxDivergence": 30.0,
+                   "minAccuracy": 70.0,
                    "minLength": 50,
-                   "scoreFunction": SCOREFUNCTION_CANDIDATES[0],
+                   #"scoreFunction": SCOREFUNCTION_CANDIDATES[0],
                    "scoreCutoff": None,
                    "hitPolicy": HITPOLICY_CANDIDATES[0],
                    "filterAdapterOnly": False,
@@ -78,39 +112,30 @@ DEFAULT_OPTIONS = {"regionTable": None,
                    # Miscellaneous options
                    "nproc": 8,
                    "seed": 1,
-                   "tmpDir": "/scratch"}
-
-
-def constructOptionParser(parser=None):
-    """Constrct and return an argument parser.
-
-    If a parser is specified, use it. Otherwise, create a parser instead.
-    Add PBAlignRunner arguments to construct the parser, and finally
-    return it.
+                   "tmpDir": "/tmp"}
 
+def constructOptionParser(parser, C=Constants, ccs_mode=False):
     """
-    desc = "Mapping PacBio sequences to references using an algorithm \n"
-    desc += "selected from a selection of supported command-line alignment\n"
-    desc += "algorithms. Input can be a fasta, pls.h5, bas.h5 or ccs.h5\n"
-    desc += "file or a fofn (file of file names). Output is in either\n"
-    desc += "cmp.h5 or sam format.\n"
-
-    if (parser is None):
-        parser = argparse.ArgumentParser()
-
-    parser.description = desc
-    parser.argument_default = argparse.SUPPRESS
-    parser.formatter_class = argparse.RawTextHelpFormatter
+    Add PBAlignRunner arguments to the parser.
+    """
+    # save reference to PbParser
+    p = parser
+    tcp = p.tool_contract_parser
+    parser = parser.arg_parser.parser
+    #parser.argument_default = argparse.SUPPRESS
+    parser.formatter_class = argparse.ArgumentDefaultsHelpFormatter
+    add_debug_option(parser)
 
     # Optional input.
-    parser.add_argument("--regionTable",
+    input_group = parser.add_argument_group("Optional input arguments")
+    input_group.add_argument("--regionTable",
                         dest="regionTable",
                         type=str,
                         default=None,
                         action="store",
                         help="Specify a region table for filtering reads.")
 
-    parser.add_argument("--configFile",
+    input_group.add_argument("--configFile",
                         dest="configFile",
                         default=None,
                         type=str,
@@ -120,7 +145,7 @@ def constructOptionParser(parser=None):
     helpstr = "When input reads are in fasta format and output is a cmp.h5\n" + \
               "this option can specify pls.h5 or bas.h5 or \n" + \
               "FOFN files from which pulse metrics can be loaded for Quiver."
-    parser.add_argument("--pulseFile",
+    input_group.add_argument("--pulseFile",
                         dest="pulseFile",
                         default=None,
                         type=str,
@@ -128,9 +153,9 @@ def constructOptionParser(parser=None):
                         help=helpstr)
 
     # Chose an aligner.
+    align_group = parser.add_argument_group("Alignment options")
     helpstr = "Select an aligorithm from {0}.\n".format(ALGORITHM_CANDIDATES)
-    helpstr += "Default algorithm is {0}.".format(DEFAULT_OPTIONS["algorithm"])
-    parser.add_argument("--algorithm",
+    align_group.add_argument("--algorithm",
                         dest="algorithm",
                         type=str,
                         action="store",
@@ -140,9 +165,8 @@ def constructOptionParser(parser=None):
 
     # Aligner options.
     helpstr = "The maximum number of matches of each read to the \n" + \
-              "reference sequence that will be evaluated. Default\n" + \
-              "value is {0}.".format(DEFAULT_OPTIONS["maxHits"])
-    parser.add_argument("--maxHits",
+              "reference sequence that will be evaluated."
+    align_group.add_argument("--maxHits",
                         dest="maxHits",
                         type=int,
                         default=None,  # Set as None instead of a real number.
@@ -150,9 +174,8 @@ def constructOptionParser(parser=None):
                         help=helpstr)
 
     helpstr = "The minimum anchor size defines the length of the read\n" + \
-              "that must match against the reference sequence. Default\n" + \
-              "value is {0}.".format(DEFAULT_OPTIONS["minAnchorSize"])
-    parser.add_argument("--minAnchorSize",
+              "that must match against the reference sequence."
+    align_group.add_argument("--minAnchorSize",
                         dest="minAnchorSize",
                         type=int,
                         default=None,  # Set as None to avoid conflicts with
@@ -160,6 +183,11 @@ def constructOptionParser(parser=None):
                         action="store",
                         help=helpstr)
 
+    helpstr = "BLASR maxMatch option. (Will be overriden if is also set in algorithmOptions)"
+    align_group.add_argument("--maxMatch", dest="maxMatch", type=int,
+                             default=DEFAULT_OPTIONS["maxMatch"],
+                             action="store", help=helpstr)
+
     # Aligner options: Use ccs or not?
     helpstr = "Map the ccsSequence to the genome first, then align\n" + \
               "subreads to the interval that the CCS reads mapped to.\n" + \
@@ -167,7 +195,7 @@ def constructOptionParser(parser=None):
               "          the template.\n" + \
               "  useccsall: maps all subreads.\n" + \
               "  useccsdenovo: maps ccs only."
-    parser.add_argument("--useccs",
+    align_group.add_argument("--useccs",
                         type=str,
                         choices=["useccs", "useccsall", "useccsdenovo"],
                         action="store",
@@ -175,24 +203,32 @@ def constructOptionParser(parser=None):
                         help=helpstr)
 
     helpstr = "Do not split reads into subreads even if subread \n" + \
-              "regions are available. Default value is {0}."\
-              .format(DEFAULT_OPTIONS["noSplitSubreads"])
-    parser.add_argument("--noSplitSubreads",
+              "regions are available."
+    align_group.add_argument("--noSplitSubreads",
                         dest="noSplitSubreads",
                         default=DEFAULT_OPTIONS["noSplitSubreads"],
                         action="store_true",
                         help=helpstr)
+    if not ccs_mode:
+        tcp.add_boolean(C.NO_SPLIT_ID, "noSplitSubreads",
+            default=DEFAULT_OPTIONS["noSplitSubreads"],
+            name="Align unsplit polymerase reads",
+            description=helpstr)
 
     helpstr = "Map subreads of a ZMW to the same genomic location.\n"
-    parser.add_argument("--concordant",
+    align_group.add_argument("--concordant",
                         dest="concordant",
                         default=DEFAULT_OPTIONS["concordant"],
                         action="store_true",
                         help=helpstr)
-
-    helpstr = "Number of threads. Default value is {v}."\
-              .format(v=DEFAULT_OPTIONS["nproc"])
-    parser.add_argument("--nproc",
+    if not ccs_mode:
+        tcp.add_boolean(C.CONCORDANT_ID, "concordant",
+            default=DEFAULT_OPTIONS["concordant"],
+            name="Concordant alignment",
+            description="Map subreads of a ZMW to the same genomic location")
+
+    helpstr = "Number of threads."
+    align_group.add_argument("--nproc",
                         type=int,
                         dest="nproc",
                         default=DEFAULT_OPTIONS["nproc"],
@@ -200,18 +236,24 @@ def constructOptionParser(parser=None):
                         action="store",
                         help=helpstr)
 
-    parser.add_argument("--algorithmOptions",
+    align_group.add_argument("--algorithmOptions",
                         type=str,
                         dest="algorithmOptions",
                         default=None,
                         action="append",
                         help="Pass alignment options through.")
+    # XXX the arguments used in SMRTpipe 2.3 are different from the defaults
+    # for the command line tool
+    tcp.add_str(C.ALGORITHM_OPTIONS_ID, "algorithmOptions",
+        default=C.ALGORITHM_OPTIONS_DEFAULT,
+        name="Algorithm options",
+        description="List of space-separated arguments passed to BLASR")
 
     # Filtering criteria and hit policy.
+    filter_group = parser.add_argument_group("Filter criteria options")
     helpstr = "The maximum allowed percentage divergence of a read \n" + \
-              "from the reference sequence. Default value is {0}." \
-              .format(DEFAULT_OPTIONS["maxDivergence"])
-    parser.add_argument("--maxDivergence",
+              "from the reference sequence."
+    filter_group.add_argument("--maxDivergence",
                         dest="maxDivergence",
                         type=float,
                         default=DEFAULT_OPTIONS["maxDivergence"],
@@ -219,39 +261,45 @@ def constructOptionParser(parser=None):
                         action="store",
                         help=helpstr)
 
-    helpstr = "The minimum percentage accuracy of alignments that\n" + \
-              "will be evaluated. Default value is {v}." \
-              .format(v=DEFAULT_OPTIONS["minAccuracy"])
-    parser.add_argument("--minAccuracy",
+    helpstr = "The minimum concordance of alignments that\n" + \
+              "will be evaluated."
+    filter_group.add_argument("--minAccuracy",
                         dest="minAccuracy",
                         type=float,
                         default=DEFAULT_OPTIONS["minAccuracy"],
                         #default=70,
                         action="store",
                         help=helpstr)
+    tcp.add_float(C.MIN_ACCURACY_ID, "minAccuracy",
+        default=DEFAULT_OPTIONS["minAccuracy"],
+        name="Min. concordance",
+        description="Minimum required alignment concordance")
 
     helpstr = "The minimum aligned read length of alignments that\n" + \
-              "will be evaluated. Default value is {v}." \
-              .format(v=DEFAULT_OPTIONS["minLength"])
-    parser.add_argument("--minLength",
+              "will be evaluated."
+    filter_group.add_argument("--minLength",
                         dest="minLength",
                         type=int,
                         default=DEFAULT_OPTIONS["minLength"],
                         action="store",
                         help=helpstr)
-
-    helpstr = "Specify a score function for evaluating alignments.\n"
-    helpstr += "  alignerscore : aligner's score in the SAM tag 'as'.\n"
-    helpstr += "  editdist     : edit distance between read and reference.\n"
-    helpstr += "  blasrscore   : blasr's default score function.\n"
-    helpstr += "Default value is {0}.".format(DEFAULT_OPTIONS["scoreFunction"])
-    parser.add_argument("--scoreFunction",
-                        dest="scoreFunction",
-                        type=str,
-                        choices=SCOREFUNCTION_CANDIDATES,
-                        default=DEFAULT_OPTIONS["scoreFunction"],
-                        action="store",
-                        help=helpstr)
+    tcp.add_int(C.MIN_LENGTH_ID, "minLength",
+        default=DEFAULT_OPTIONS["minLength"],
+        name="Min. length",
+        description="Minimum required alignment length")
+
+    #helpstr = "Specify a score function for evaluating alignments.\n"
+    #helpstr += "  alignerscore : aligner's score in the SAM tag 'as'.\n"
+    #helpstr += "  editdist     : edit distance between read and reference.\n"
+    #helpstr += "  blasrscore   : blasr's default score function.\n"
+    #helpstr += "Default value is {0}.".format(DEFAULT_OPTIONS["scoreFunction"])
+    #filter_group.add_argument("--scoreFunction",
+    #                    dest="scoreFunction",
+    #                    type=str,
+    #                    choices=SCOREFUNCTION_CANDIDATES,
+    #                    default=DEFAULT_OPTIONS["scoreFunction"],
+    #                    action="store",
+    #                    help=helpstr)
     #"  userscore    : user-defined score matrix (by -scoreMatrix).\n")
     #parser.add_argument("--scoreMatrix",
     #                    dest="scoreMatrix",
@@ -275,7 +323,7 @@ def constructOptionParser(parser=None):
     #                    "= -5 (match),\n"
     #                    "mismatch = 6.\n")
 
-    parser.add_argument("--scoreCutoff",
+    filter_group.add_argument("--scoreCutoff",
                         dest="scoreCutoff",
                         type=int,
                         default=None,
@@ -288,89 +336,98 @@ def constructOptionParser(parser=None):
            "  allbest   : selects all the best score hits.\n" + \
            "  randombest: selects a random hit from all best score hits.\n" + \
            "  leftmost  : selects a hit which has the best score and the\n" + \
-           "              smallest mapping coordinate in any reference.\n" + \
-           "Default value is {v}.".format(v=DEFAULT_OPTIONS["hitPolicy"])
-    parser.add_argument("--hitPolicy",
+           "              smallest mapping coordinate in any reference.\n"
+    filter_group.add_argument("--hitPolicy",
                         dest="hitPolicy",
                         type=str,
                         choices=HITPOLICY_CANDIDATES,
                         default=DEFAULT_OPTIONS["hitPolicy"],
                         action="store",
                         help=helpstr)
+    tcp.add_str(C.HIT_POLICY_ID, "hitPolicy",
+        default=DEFAULT_OPTIONS["hitPolicy"],
+        name="Hit policy",
+        description=helpstr)
 
     helpstr = "If specified, do not report adapter-only hits using\n" + \
               "annotations with the reference entry."
-    parser.add_argument("--filterAdapterOnly",
+    filter_group.add_argument("--filterAdapterOnly",
                         dest="filterAdapterOnly",
                         default=DEFAULT_OPTIONS["filterAdapterOnly"],
                         action="store_true",
                         help=helpstr)
 
     # Output.
-    helpstr = "Specify the ReadType attribute in the cmp.h5 output.\n" + \
-              "Default value is {v}.".format(v=DEFAULT_OPTIONS["readType"])
-    parser.add_argument("--readType",
+    # CMP H5 output has been deprecated, let's hide associated options.
+    cmph5_group = parser.add_argument_group("Options for cmp.h5")
+    helpstr = "Specify the ReadType attribute in the cmp.h5 output.\n"
+    cmph5_group.add_argument("--readType",
                         dest="readType",
                         type=str,
                         action="store",
                         default=DEFAULT_OPTIONS["readType"],
                         help=argparse.SUPPRESS)
-                        #help=helpstr)
 
     helpstr = "The output cmp.h5 file which will be sorted, loaded\n" + \
               "with pulse QV information, and repacked, so that it \n" + \
               "can be consumed by quiver directly. This requires\n" + \
               "the input file to be in PacBio bas/pls.h5 format,\n" + \
-              "and --useccs must be None. Default value is False."
-    parser.add_argument("--forQuiver",
+              "and --useccs must be None."
+    cmph5_group.add_argument("--forQuiver",
                         dest="forQuiver",
                         action="store_true",
                         default=DEFAULT_OPTIONS["forQuiver"],
-                        help=helpstr)
+                        help=argparse.SUPPRESS)
 
     helpstr = "Similar to --forQuiver, the only difference is that \n" + \
-              "--useccs can be specified. Default value is False."
-    parser.add_argument("--loadQVs",
+              "--useccs can be specified."
+    cmph5_group.add_argument("--loadQVs",
                         dest="loadQVs",
                         action="store_true",
                         default=DEFAULT_OPTIONS["loadQVs"],
-                        help=helpstr)
+                        help=argparse.SUPPRESS)
 
     helpstr = "Load pulse information using -byread option instead\n" + \
               "of -bymetric. Only works when --forQuiver or \n" + \
-              "--loadQVs are set. Default value is False."
-    parser.add_argument("--byread",
+              "--loadQVs are set."
+    cmph5_group.add_argument("--byread",
                         dest="byread",
                         action="store_true",
                         default=DEFAULT_OPTIONS["byread"],
-                        help=helpstr)
+                        help=argparse.SUPPRESS)
 
     helpstr = "Load the specified (comma-delimited list of) metrics\n" + \
               "instead of the default metrics required by quiver.\n" + \
               "This option only works when --forQuiver  or \n" + \
-              "--loadQVs are set. Default: {m}".\
-              format(m=DEFAULT_OPTIONS["metrics"])
-    parser.add_argument("--metrics",
+              "--loadQVs are set."
+    cmph5_group.add_argument("--metrics",
                         dest="metrics",
                         type=str,
                         action="store",
                         default=DEFAULT_OPTIONS["metrics"],
-                        help=helpstr)
+                        help=argparse.SUPPRESS)
 
     # Miscellaneous.
+    misc_group = parser.add_argument_group("Miscellaneous options")
+    helpstr = "Output names of unaligned reads to specified file."
+    misc_group.add_argument("--unaligned",
+                            dest="unaligned",
+                            type=str,
+                            action="store",
+                            default=DEFAULT_OPTIONS["unaligned"],
+                            help=helpstr)
+
     helpstr = "Initialize the random number generator with a none-zero \n" + \
-              "integer. Zero means that current system time is used.\n" + \
-              "Default value is {v}.".format(v=DEFAULT_OPTIONS["seed"])
-    parser.add_argument("--seed",
+              "integer. Zero means that current system time is used.\n"
+    misc_group.add_argument("--seed",
                         dest="seed",
                         type=int,
                         default=DEFAULT_OPTIONS["seed"],
                         action="store",
                         help=helpstr)
 
-    helpstr = "Specify a directory for saving temporary files.\n" + \
-              "Default is {v}.".format(v=DEFAULT_OPTIONS["tmpDir"])
-    parser.add_argument("--tmpDir",
+    helpstr = "Specify a directory for saving temporary files.\n"
+    misc_group.add_argument("--tmpDir",
                         dest="tmpDir",
                         type=str,
                         action="store",
@@ -378,31 +435,11 @@ def constructOptionParser(parser=None):
                         help=helpstr)
 
     # Keep all temporary & intermediate files.
-    parser.add_argument("--keepTmpFiles",
+    misc_group.add_argument("--keepTmpFiles",
                         dest="keepTmpFiles",
                         action="store_true",
                         default=False,
                         help=argparse.SUPPRESS)
-
-    # Required options: inputs and outputs.
-    helpstr = "The input file can be a fasta, plx.h5, bax.h5, ccs.h5\n" + \
-              "file or a fofn."
-    parser.add_argument("inputFileName",
-                        type=str,
-                        action="store",
-                        help=helpstr)
-
-    helpstr = "Either a reference fasta file or a reference repository."
-    parser.add_argument("referencePath",
-                        type=str,
-                        action="store",
-                        help=helpstr)
-
-    parser.add_argument("outputFileName",
-                        type=str,
-                        action="store",
-                        help="The output cmp.h5 or sam file.")
-
     return parser
 
 
@@ -479,44 +516,109 @@ def importDefaultOptions(parsedOptions, additionalDefaults=DEFAULT_OPTIONS):
     return newOptions, infoMsg.rstrip(', ')
 
 
-def parseOptions(argumentList, parser=None):
-    """Parse a list of arguments, return options and an info message.
-
-    If a parser is not specified, create a new parser, otherwise, use
-    the specifed parser. If there exists a config file, import options
-    from the config file and finally overwrite these options with
-    options from the argument list.
-
+class _ArgParser(argparse.ArgumentParser):
     """
-    # Obtain a constructed argument parser.
-    parser = constructOptionParser(parser)
-
-    # Parse argumentList for the first time in order to
-    # get a config file.
-    options = parser.parse_args(args=argumentList)
-
-    # Import options from the specified config file, if it exists.
-    configOptions, infoMsg = importConfigOptions(options)
-
-    # Parse argumentList for the second time in order to
-    # overwrite config options with options in argumentList.
-    newOptions = copy(configOptions)
-    newOptions.algorithmOptions = None
-    newOptions = parser.parse_args(namespace=newOptions, args=argumentList)
-
-    # Overwrite config algorithmOptions if it is specified in argumentList
-    if newOptions.algorithmOptions is None:
-        if configOptions.algorithmOptions is not None:
-            newOptions.algorithmOptions = configOptions.algorithmOptions
-    else:
-        newOptions.algorithmOptions = \
-            " ".join(newOptions.algorithmOptions)
-
-    # Return the updated options and an info message.
-    return parser, newOptions, infoMsg
-
-#if __name__ == "__main__":
-#     import sys
-#     parser = argparse.ArgumentParser()
-#     parser, options, info = parseOptions(argumentList = sys.argv[1:],
-#                                          parser=parser)
+    Substitute for the standard argument parser, where parse_args is
+    extended to facilitate the use of config files.
+    """
+    def parse_args(self, args=None, namespace=None):
+        options = super(_ArgParser, self).parse_args(args=args,
+            namespace=namespace)
+
+        # Import options from the specified config file, if it exists.
+        configOptions, infoMsg = importConfigOptions(options)
+
+        # Parse argumentList for the second time in order to
+        # overwrite config options with options in argumentList.
+        newOptions = copy(configOptions)
+        newOptions.algorithmOptions = None
+        newOptions = super(_ArgParser, self).parse_args(namespace=newOptions,
+            args=args)
+
+        # Overwrite config algorithmOptions if it is specified in argumentList
+        if newOptions.algorithmOptions is None:
+            if configOptions.algorithmOptions is not None:
+                newOptions.algorithmOptions = configOptions.algorithmOptions
+        else:
+            newOptions.algorithmOptions = \
+                " ".join(newOptions.algorithmOptions)
+
+        # FIXME gross hack to work around the problem of passing this
+        # parameter from a resolved tool contract
+        def unquote(s):
+            if s[0] in ["'", '"'] and s[-1] in ["'", '"']:
+                return s[1:-1]
+            return s
+        if newOptions.algorithmOptions is not None:
+            newOptions.algorithmOptions = unquote(newOptions.algorithmOptions)
+
+        # Return the updated options and an info message.
+        return newOptions #parser, newOptions, infoMsg
+
+def get_contract_parser(C=Constants, ccs_mode=False):
+    """
+    Create and populate the combined tool contract/argument parser.  This
+    method can optionally be overridden with a different Constants object for
+    defining additional tasks (e.g. CCS alignment).
+    """
+    p = get_pbparser(
+        tool_id=C.TOOL_ID,
+        version=C.VERSION,
+        name=C.TOOL_ID,
+        description=C.PARSER_DESC,
+        driver_exe=C.DRIVER_EXE,
+        nproc=SymbolTypes.MAX_NPROC,
+        resource_types=(ResourceTypes.TMP_DIR,),
+        default_level="WARN")
+    p.arg_parser.parser = _ArgParser(
+        description=C.PARSER_DESC,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    p.arg_parser.parser.version = C.VERSION
+    p.arg_parser.parser.add_argument('--version',
+         action="version",
+         help="show program's version number and exit")
+    add_base_options(p.arg_parser.parser)
+
+    # Required options: inputs and outputs.
+    p.add_input_file_type(C.INPUT_FILE_TYPE, "inputFileName",
+        "Subread DataSet", "SubreadSet or unaligned .bam")
+    p.add_input_file_type(FileTypes.DS_REF, "referencePath",
+        "ReferenceSet", "Reference DataSet or FASTA file")
+    p.add_output_file_type(C.OUTPUT_FILE_TYPE, "outputFileName",
+        name="Alignments",
+        description="Alignment results dataset",
+        default_name=C.OUTPUT_FILE_NAME)
+    constructOptionParser(p, ccs_mode=ccs_mode)
+    p.arg_parser.parser.add_argument(
+        "--profile", action="store_true",
+        help="Print runtime profile at exit")
+    return p
+
+def resolved_tool_contract_to_args(resolved_tool_contract):
+    rtc = resolved_tool_contract
+    p = get_contract_parser().arg_parser.parser
+    args = [
+        rtc.task.input_files[0],
+        rtc.task.input_files[1],
+        rtc.task.output_files[0],
+        "--nproc", str(resolved_tool_contract.task.nproc),
+        "--minAccuracy", str(rtc.task.options[Constants.MIN_ACCURACY_ID]),
+        "--minLength", str(rtc.task.options[Constants.MIN_LENGTH_ID]),
+        "--hitPolicy", str(rtc.task.options[Constants.HIT_POLICY_ID]),
+        "--tmpDir", rtc.task.tmpdir_resources[0].path,
+        "--log-level", rtc.task.log_level,
+    ]
+    if rtc.task.options[Constants.ALGORITHM_OPTIONS_ID]:
+        # FIXME this is gross: if I don't quote the options, the parser chokes;
+        # if I do quote them, the quotes get propagated, so I have to strip
+        # them off later
+        args.extend([
+            "--algorithmOptions=\"%s\"" %
+            rtc.task.options[Constants.ALGORITHM_OPTIONS_ID],
+        ])
+    if rtc.task.options.get(Constants.CONCORDANT_ID, False):
+        args.append("--concordant")
+    if rtc.task.options.get(Constants.NO_SPLIT_ID, False):
+        args.append("--noSplitSubreads")
+    log.info("Converted command line: 'pbalign {a}'".format(a=" ".join(args)))
+    return p.parse_args(args)
diff --git a/pbalign/pbalignfiles.py b/pbalign/pbalignfiles.py
index 9bf4ad9..7d401bc 100755
--- a/pbalign/pbalignfiles.py
+++ b/pbalign/pbalignfiles.py
@@ -33,14 +33,15 @@
 """This script defines class PBALignFiles."""
 
 from __future__ import absolute_import
-import logging
 from pbalign.utils.fileutil import checkInputFile, getRealFileFormat, \
     checkOutputFile, checkReferencePath, checkRegionTableFile, \
     getFileFormat, FILE_FORMATS
 
 
 class PBAlignFiles:
+
     """PBAlignFiles contains files that will be used by pbalign."""
+
     def __init__(self, inputFileName=None, referencePath=None,
                  outputFileName=None, regionTable=None,
                  pulseFileName=None):
@@ -80,8 +81,12 @@ class PBAlignFiles:
         self.targetFileName = None
         self.sawriterFileName = None
         self.isWithinRepository = False
-        self.alignerSamOut = None   # The sam output file by an aligner
-        self.filteredSam = None     # The filtered sam file.
+        self.alignerSamOut = None   # The sam/bam output file by an aligner
+        self.filteredSam = None     # The filtered sam/bam file.
+
+        self.outBamFileName = None     # filtered, sorted bam
+        self.outBaiFileName = None     # output *.bai bam index
+        self.outPbiFileName = None     # output *.pbi pacbio bam index
 
         # There might be an adapter file in the reference repository in
         # directory 'annotations', which can be used by the
@@ -99,7 +104,7 @@ class PBAlignFiles:
         # the absolute and expanded path. Validate file format.
         if inputFileName is not None and inputFileName != "":
             self.inputFileName = checkInputFile(inputFileName)
-            self.inputFileFormat = getRealFileFormat(inputFileName)
+            self.inputFileFormat = getRealFileFormat(self.inputFileName)
 
     def SetPulseFileName(self, inputFileName, pulseFileName):
         """Verify and assign the pulse file from which pulses can be
@@ -133,10 +138,17 @@ class PBAlignFiles:
 
     def SetOutputFileName(self, outputFileName):
         """Validate the user-specified output file and get the absolute and
-        expanded path.
+        expanded path. If output file format is XML or BAM, set output BAM
+        filename, BAM index bai file and PacBio BAM index pbi file.
         """
         if outputFileName is not None and outputFileName != "":
             self.outputFileName = checkOutputFile(outputFileName)
+            if getFileFormat(self.outputFileName) in [FILE_FORMATS.BAM,
+                    FILE_FORMATS.XML]:
+                prefix = str(self.outputFileName[0:-3])
+                self.outBamFileName = prefix + "bam"
+                self.outBaiFileName = self.outBamFileName + ".bai"
+                self.outPbiFileName = self.outBamFileName + ".pbi"
 
     def SetRegionTable(self, regionTable):
         """Validate the user-specified region table and get the absolute and
@@ -167,16 +179,25 @@ class PBAlignFiles:
         desc += "Output file: {o}\n".format(o=self.outputFileName)
         desc += "Query file : {q}\n".format(q=self.queryFileName)
         desc += "Target file: {t}\n".format(t=self.targetFileName)
-        desc += "Suffix array file: {s}\n".format(s=self.sawriterFileName)
-        desc += "regionTable:{s}\n".format(s=self.regionTable)
+        if self.sawriterFileName is not None:
+            desc += "Suffix array file: {s}\n".format(s=self.sawriterFileName)
+        if self.regionTable is not None:
+            desc += "Region table:{s}\n".format(s=self.regionTable)
         if self.pulseFileName is not None:
             desc += "Pulse files: {s}\n".format(s=self.pulseFileName)
-        desc += "Aligner's SAM out: {t}\n".format(t=self.alignerSamOut)
-        desc += "Filtered SAM file: {t}\n".format(t=self.filteredSam)
+        desc += "Aligner's SAM/BAM out: {t}\n".format(t=self.alignerSamOut)
+        desc += "Filtered SAM/BAM file: {t}\n".format(t=self.filteredSam)
         if self.adapterGffFileName is not None:
             desc += "Adapter GFF file: {t}\n".format(
                 t=self.adapterGffFileName)
+        if self.outBamFileName is not None:
+            desc += "Out bam file: {b}\n".format(b=self.outBamFileName)
+        if self.outBaiFileName is not None:
+            desc += "Bam index file: {b}\n".format(b=self.outBaiFileName)
+        if self.outPbiFileName is not None:
+            desc += "PacBio Bam index file: {p}\n".format(p=self.outPbiFileName)
+
         return desc
 
-#if __name__ == "__main__":
+# if __name__ == "__main__":
 #    p = PBAlignFiles("lambda.fasta", "lambda_ref.fasta", "tmp.sam")
diff --git a/pbalign/pbalignrunner.py b/pbalign/pbalignrunner.py
index afd2a18..9583287 100755
--- a/pbalign/pbalignrunner.py
+++ b/pbalign/pbalignrunner.py
@@ -34,19 +34,26 @@
 PBAlignRunner uses AlignService to align PacBio reads in FASTA/BASE/PULSE/FOFN
 formats to reference sequences, then uses FilterServices to filter out
 alignments that do not satisfy filtering criteria, and finally generates a SAM
-or CMP.H5 file.
+or BAM file.
 
 """
 
 # Author: Yuan Li
 
+import functools
 import logging
 import time
 import sys
 import shutil
 
+from pbcommand.cli import pbparser_runner
+from pbcommand.utils import setup_log
+from pbcore.util.ToolRunner import PBToolRunner
+from pbcore.io import (AlignmentSet, ConsensusAlignmentSet)
+
 from pbalign.__init__ import get_version
-from pbalign.options import parseOptions, ALGORITHM_CANDIDATES
+from pbalign.options import (ALGORITHM_CANDIDATES, get_contract_parser,
+                             resolved_tool_contract_to_args)
 from pbalign.alignservice.blasr import BlasrService
 from pbalign.alignservice.bowtie import BowtieService
 from pbalign.alignservice.gmap import GMAPService
@@ -54,35 +61,40 @@ from pbalign.utils.fileutil import getFileFormat, FILE_FORMATS, real_ppath
 from pbalign.utils.tempfileutil import TempFileManager
 from pbalign.pbalignfiles import PBAlignFiles
 from pbalign.filterservice import FilterService
-from pbalign.forquiverservice.forquiver import ForQuiverService
-from pbcore.util.Process import backticks
-from pbcore.util.ToolRunner import PBToolRunner
-
+from pbalign.bampostservice import BamPostService
 
 class PBAlignRunner(PBToolRunner):
+
     """Tool runner."""
 
-    def __init__(self, argumentList):
+    def __init__(self, args=None, argumentList=(),
+                 output_dataset_type=AlignmentSet):
         """Initialize a PBAlignRunner object.
            argumentList is a list of arguments, such as:
            ['--debug', '--maxHits', '10', 'in.fasta', 'ref.fasta', 'out.sam']
         """
         desc = "Utilities for aligning PacBio reads to reference sequences."
+        if args is None: # FIXME unit testing hack
+            args = get_contract_parser().arg_parser.parser.parse_args(argumentList)
+        self.args = args
+        # args.verbosity is computed by counting # of 'v's in '-vv...'.
+        # However in parseOptions, arguments are parsed twice to import config
+        # options and then overwrite them with argumentList (e.g. command-line)
+        # options.
+        #self.args.verbosity = 1 if (self.args.verbosity is None) else \
+        #    (int(self.args.verbosity) / 2 + 1)
         super(PBAlignRunner, self).__init__(desc)
-        self._argumentList = argumentList
+        self._output_dataset_type = output_dataset_type
         self._alnService = None
         self._filterService = None
         self.fileNames = PBAlignFiles()
         self._tempFileManager = TempFileManager()
 
-        self.parser, self.args, _infoMsg = parseOptions(
-            argumentList=self._argumentList, parser=self.parser)
-        # args.verbosity is computed by counting # of 'v's in '-vv...'.
-        # However in parseOptions, arguments are parsed twice to import config
-        # options and then overwrite them with argumentList (e.g. command-line)
-        # options.
-        self.args.verbosity = 0 if (self.args.verbosity is None) else \
-            int(self.args.verbosity) / 2
+    def _setupParsers(self, description):
+        pass
+
+    def _addStandardArguments(self):
+        pass
 
     def getVersion(self):
         """Return version."""
@@ -132,24 +144,20 @@ class PBAlignRunner(PBToolRunner):
             args.readType = "CCS"
 
         if args.forQuiver:
-            if args.useccs is not None:
-                errMsg = "Options --forQuiver and --useccs should not " + \
-                         "be used together, since Quiver is not designed to " + \
-                         "polish ccs reads. if you want to align ccs reads" + \
-                         "in cmp.h5 format with pulse QVs loaded, use " + \
-                         "--loadQVs with --useccs instead."
+            logging.warning("Option --forQuiver has been deprecated in 3.0")
+
+        outFormat = getFileFormat(fileNames.outputFileName)
+
+        if outFormat == FILE_FORMATS.CMP:
+            errMsg = "pbalign no longer supports CMP.H5 Output in 3.0."
+            raise IOError(errMsg)
+
+        if outFormat == FILE_FORMATS.BAM or outFormat == FILE_FORMATS.XML:
+            if args.algorithm != "blasr":
+                errMsg = "Must choose blasr in order to output a bam file."
                 raise ValueError(errMsg)
-            args.loadQVs = True
-
-        if args.loadQVs:
-            if fileNames.pulseFileName is None:
-                errMsg = "The input file has to be in bas/pls/ccs.h5 " + \
-                         "format, or --pulseFile needs to be specified, "
-            if getFileFormat(fileNames.outputFileName) != FILE_FORMATS.CMP:
-                errMsg = "The output file has to be in cmp.h5 format, "
-            if errMsg != "":
-                errMsg += "in order to load pulse QVs."
-                logging.error(errMsg)
+            if args.filterAdapterOnly:
+                errMsg = "-filterAdapter does not work when out format is BAM."
                 raise ValueError(errMsg)
 
     def _parseArgs(self):
@@ -159,12 +167,12 @@ class PBAlignRunner(PBToolRunner):
         """
         pass
 
-    def _output(self, inSam, refFile, outFile, readType=None, smrtTitle=False):
-        """Generate a sam or a cmp.h5 file.
+    def _output(self, inSam, refFile, outFile, readType=None):
+        """Generate a SAM, BAM file.
         Input:
-            inSam   : an input SAM file. (e.g. fileName.filteredSam)
+            inSam   : an input SAM/BAM file. (e.g. fileName.filteredSam)
             refFile : the reference file. (e.g. fileName.targetFileName)
-            outFile : the output SAM or CMP.H5 file.
+            outFile : the output SAM/BAM file
                       (i.e. fileName.outputFileName)
             readType: standard or cDNA or CCS (can be None if not specified)
         Output:
@@ -172,34 +180,37 @@ class PBAlignRunner(PBToolRunner):
         """
         output, errCode, errMsg = "", 0, ""
 
-        if getFileFormat(outFile) == FILE_FORMATS.SAM:
-            #`mv inSam outFile`
+        outFormat = getFileFormat(outFile)
+
+        if outFormat == FILE_FORMATS.BAM:
+            pass # Nothing to be done
+        if outFormat == FILE_FORMATS.SAM:
             logging.info("OutputService: Genearte the output SAM file.")
-            logging.debug("OutputService: Move {src} as {dst}".format(
-                src=inSam, dst=outFile))
+            logging.debug("OutputService: Move %s as %s", inSam, outFile)
             try:
                 shutil.move(real_ppath(inSam), real_ppath(outFile))
             except shutil.Error as e:
-                output, errCode, errMsg = "", 1, str(e)
-        elif getFileFormat(outFile) == FILE_FORMATS.CMP:
-            #`samtoh5 inSam outFile -readType readType
-            logging.info("OutputService: Genearte the output CMP.H5 " +
-                         "file using samtoh5.")
-            prog = "samtoh5"
-            cmd = "samtoh5 {samFile} {refFile} {outFile}".format(
-                samFile=inSam, refFile=refFile, outFile=outFile)
-            if readType is not None:
-                cmd += " -readType {0} ".format(readType)
-            if smrtTitle:
-                cmd += " -smrtTitle "
-            # Execute the command line
-            logging.debug("OutputService: Call \"{0}\"".format(cmd))
-            output, errCode, errMsg = backticks(cmd)
-
-        if errCode != 0:
-            errMsg = prog + " returned a non-zero exit status." + errMsg
+                output, errCode, errMsg = "", 1, "Exited with error: " + str(e)
+                logging.error(errMsg)
+                raise RuntimeError(errMsg)
+        elif outFormat == FILE_FORMATS.CMP:
+            errMsg = "pbalign no longer supports CMP.H5 Output in 3.0."
             logging.error(errMsg)
-            raise RuntimeError(errMsg)
+            raise IOError(errMsg)
+        elif outFormat == FILE_FORMATS.XML:
+            logging.info("OutputService: Generating the output XML file %s %s",
+                         inSam, outFile)
+            # Create {out}.xml, given {out}.bam
+            outBam = str(outFile[0:-3]) + "bam"
+            aln = None
+            # FIXME This should really be more automatic
+            if readType == "CCS":
+                self._output_dataset_type = ConsensusAlignmentSet
+            aln = self._output_dataset_type(real_ppath(outBam))
+            for res in aln.externalResources:
+                res.reference = refFile
+            aln.write(outFile)
+
         return output, errCode, errMsg
 
     def _cleanUp(self, realDelete=False):
@@ -207,26 +218,13 @@ class PBAlignRunner(PBToolRunner):
         logging.debug("Clean up temporary files and directories.")
         self._tempFileManager.CleanUp(realDelete)
 
-#    def _setupLogging(self):
-#        LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
-#        if self.args.verbosity >= 2:
-#            print "logLevel = debug"
-#            logLevel = logging.DEBUG
-#        elif self.args.verbosity == 1:
-#            print "logLevel = info"
-#            logLevel = logging.INFO
-#        else:
-#            print "logLevel = warn"
-#            logLevel = logging.WARN
-#        logging.basicConfig(level=logLevel, format=LOG_FORMAT)
-
     def run(self):
         """
         The main function, it is called by PBToolRunner.start().
         """
         startTime = time.time()
-        logging.info("pbalign version: {version}".format(version=get_version()))
-        logging.debug("Original arguments: " + str(self._argumentList))
+        logging.info("pbalign version: %s", get_version())
+        #logging.debug("Original arguments: " + str(self._argumentList))
 
         # Create an AlignService by algorithm name.
         self._alnService = self._createAlignService(self.args.algorithm,
@@ -238,69 +236,76 @@ class PBAlignRunner(PBToolRunner):
         self._makeSane(self.args, self.fileNames)
 
         # Run align service.
-        try:
-            self._alnService.run()
-        except RuntimeError:
-            return 1
+        self._alnService.run()
 
-        # Create a temporary filtered SAM file as output for FilterService.
+        # Create a temporary filtered SAM/BAM file as output for FilterService.
+        outFormat = getFileFormat(self.fileNames.outputFileName)
+        suffix = ".bam" if outFormat in \
+                [FILE_FORMATS.BAM, FILE_FORMATS.XML] else ".sam"
         self.fileNames.filteredSam = self._tempFileManager.\
-            RegisterNewTmpFile(suffix=".sam")
+            RegisterNewTmpFile(suffix=suffix)
 
-        # Call filter service.
+        # Call filter service on SAM or BAM file.
         self._filterService = FilterService(self.fileNames.alignerSamOut,
                                             self.fileNames.targetFileName,
                                             self.fileNames.filteredSam,
-                                            self._alnService.name,
+                                            self.args.algorithm,
+                                            #self._alnService.name,
                                             self._alnService.scoreSign,
                                             self.args,
                                             self.fileNames.adapterGffFileName)
-        try:
-            self._filterService.run()
-        except RuntimeError:
-            return 1
-
-        # Output all hits either in SAM or CMP.H5.
-        try:
-            useSmrtTitle = False
-            if (self.args.algorithm != "blasr" or
-                self.fileNames.inputFileFormat == FILE_FORMATS.FASTA):
-                useSmrtTitle = True
-
-            self._output(
-                self.fileNames.filteredSam,
-                self.fileNames.targetFileName,
-                self.fileNames.outputFileName,
-                self.args.readType,
-                useSmrtTitle)
-        except RuntimeError:
-            return 1
-
-        # Call post service for quiver.
-        if self.args.forQuiver or self.args.loadQVs:
-            postService = ForQuiverService(self.fileNames,
-                                           self.args)
-            try:
-                postService.run()
-            except RuntimeError:
-                return 1
+        self._filterService.run()
+
+        # Sort bam before output
+        if outFormat in [FILE_FORMATS.BAM, FILE_FORMATS.XML]:
+            # Sort/make index for BAM output.
+            BamPostService(self.fileNames).run()
+
+        # Output all hits in SAM, BAM.
+        self._output(
+            inSam=self.fileNames.filteredSam,
+            refFile=self.fileNames.targetFileName,
+            outFile=self.fileNames.outputFileName,
+            readType=self.args.readType)
 
         # Delete temporay files anyway to make
         self._cleanUp(False if (hasattr(self.args, "keepTmpFiles") and
-                               self.args.keepTmpFiles is True) else True)
+                                self.args.keepTmpFiles is True) else True)
 
         endTime = time.time()
         logging.info("Total time: {:.2f} s.".format(float(endTime - startTime)))
         return 0
 
-
-def main():
-    pbobj = PBAlignRunner(sys.argv[1:])
-    return pbobj.start()
-
-
-if __name__ == "__main__":
-    # For testing PBAlignRunner.
+def args_runner(args, output_dataset_type=AlignmentSet):
+    """args runner"""
     # PBAlignRunner inherits PBToolRunner. So PBAlignRunner.start() parses args,
     # sets up logging and finally returns run().
+    return PBAlignRunner(args, output_dataset_type=output_dataset_type).start()
+
+def _resolved_tool_contract_runner(output_dataset_type,
+                                   resolved_tool_contract):
+    """
+    Template function for running from a tool contract with an explicitly
+    specified output dataset type.
+    """
+    args = resolved_tool_contract_to_args(resolved_tool_contract)
+    return args_runner(args, output_dataset_type=output_dataset_type)
+
+resolved_tool_contract_runner = functools.partial(
+    _resolved_tool_contract_runner, AlignmentSet)
+resolved_tool_contract_runner_ccs = functools.partial(
+    _resolved_tool_contract_runner, ConsensusAlignmentSet)
+
+def main(argv=sys.argv, get_parser_func=get_contract_parser,
+         contract_runner_func=resolved_tool_contract_runner):
+    """Main, supporting both args runner and tool contract runner."""
+    return pbparser_runner(
+        argv=argv[1:],
+        parser=get_parser_func(),
+        args_runner_func=args_runner,
+        contract_runner_func=contract_runner_func,
+        alog=logging.getLogger(__name__),
+        setup_log_func=setup_log)
+
+if __name__ == "__main__":
     sys.exit(main())
diff --git a/pbalign/tasks/__init__.py b/pbalign/tasks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/pbalign/tasks/consolidate_alignments.py b/pbalign/tasks/consolidate_alignments.py
new file mode 100644
index 0000000..59f74ff
--- /dev/null
+++ b/pbalign/tasks/consolidate_alignments.py
@@ -0,0 +1,96 @@
+
+# FIXME this should probably live somewhere more general, e.g. pbdataset?
+
+"""
+Consolidate AlignmentSet .bam files
+"""
+
+import functools
+import tempfile
+import logging
+import os.path as op
+import os
+import sys
+
+from pbcommand.models import get_pbparser, FileTypes, ResourceTypes
+from pbcommand.cli import pbparser_runner
+from pbcommand.utils import setup_log
+from pbcore.io import openDataSet
+
+
+class Constants(object):
+    TOOL_ID = "pbalign.tasks.consolidate_alignments"
+    VERSION = "0.1.0"
+    DRIVER = "python -m pbalign.tasks.consolidate_alignments --resolved-tool-contract "
+    CONSOLIDATE_ID = "pbalign.task_options.consolidate_aligned_bam"
+    N_FILES_ID = "pbalign.task_options.consolidate_n_files"
+
+
+def get_parser():
+    p = get_pbparser(Constants.TOOL_ID,
+                     Constants.VERSION,
+                     "AlignmentSet consolidate",
+                     __doc__,
+                     Constants.DRIVER,
+                     is_distributed=True,
+                     resource_types=(ResourceTypes.TMP_DIR,))
+
+    p.add_input_file_type(FileTypes.DS_ALIGN, "align_in", "Input AlignmentSet",
+                          "Gathered AlignmentSet to consolidate")
+    p.add_output_file_type(FileTypes.DS_ALIGN,
+                           "ds_out",
+                           "Alignments",
+                           description="Alignment results dataset",
+                           default_name="combined")
+    p.add_boolean(Constants.CONSOLIDATE_ID, "consolidate",
+        default=False,
+        name="Consolidate .bam",
+        description="Merge chunked/gathered .bam files")
+    p.add_int(Constants.N_FILES_ID, "consolidate_n_files",
+        default=1,
+        name="Number of .bam files",
+        description="Number of .bam files to create in consolidate mode")
+    return p
+
+
+def run_consolidate(dataset_file, output_file, consolidate, n_files):
+    with openDataSet(dataset_file) as ds_in:
+        # XXX shouldn't the file count check be done elsewhere?
+        if consolidate and len(ds_in.toExternalFiles()) != 1:
+            new_resource_file = op.splitext(output_file)[0] + ".bam" # .fasta?
+            ds_in.consolidate(new_resource_file, numFiles=n_files)
+        ds_in.newUuid()
+        ds_in.write(output_file)
+    return 0
+
+
+def args_runner(args):
+    return run_consolidate(
+        dataset_file=args.align_in,
+        output_file=args.ds_out,
+        consolidate=args.consolidate,
+        n_files=args.consolidate_n_files)
+
+
+def rtc_runner(rtc):
+    tempfile.tempdir = rtc.task.tmpdir_resources[0].path
+    return run_consolidate(
+        dataset_file=rtc.task.input_files[0],
+        output_file=rtc.task.output_files[0],
+        consolidate=rtc.task.options[Constants.CONSOLIDATE_ID],
+        n_files=rtc.task.options[Constants.N_FILES_ID])
+
+
+def main(argv=sys.argv):
+    logging.basicConfig(level=logging.DEBUG)
+    log = logging.getLogger()
+    return pbparser_runner(argv[1:],
+                           get_parser(),
+                           args_runner,
+                           rtc_runner,
+                           log,
+                           setup_log)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/pbalign/tools/createChemistryHeader.py b/pbalign/tools/createChemistryHeader.py
index 434d98c..ee71da7 100755
--- a/pbalign/tools/createChemistryHeader.py
+++ b/pbalign/tools/createChemistryHeader.py
@@ -28,8 +28,8 @@ def format_rgds_entries(rgds_entries):
 
     rgds_strings = {}
     for rg_id in rgds_entries:
-        rgds_string = ("BINDINGKIT:{b};SEQUENCINGKIT:{s};"
-                       "SOFTWAREVERSION:{v}"
+        rgds_string = ("BINDINGKIT={b};SEQUENCINGKIT={s};"
+                       "SOFTWAREVERSION={v}"
                        .format(b=rgds_entries[rg_id][0],
                                s=rgds_entries[rg_id][1],
                                v=rgds_entries[rg_id][2]))
diff --git a/pbalign/utils/fileutil.py b/pbalign/utils/fileutil.py
index 02afbb4..0ff5245 100755
--- a/pbalign/utils/fileutil.py
+++ b/pbalign/utils/fileutil.py
@@ -39,6 +39,8 @@ import os.path as op
 import logging
 from xml.etree import ElementTree as ET
 from pbcore.util.Process import backticks
+from pbcore.io import DataSet, ReferenceSet
+
 
 def enum(**enums):
     """Simulate enum."""
@@ -48,16 +50,19 @@ FILE_FORMATS = enum(FASTA="FASTA", PLS="PLS_H5", PLX="PLX_H5",
                     BAS="BAS_H5", BAX="BAX_H5", FOFN="FOFN",
                     SAM="SAM", CMP="CMP_H5", RGN="RGN_H5",
                     SA="SA", XML="XML", UNKNOWN="UNKNOWN",
-                    CCS="CCS_H5")
+                    CCS="CCS_H5", BAM="BAM")
 
 VALID_INPUT_FORMATS = (FILE_FORMATS.FASTA, FILE_FORMATS.PLS,
                        FILE_FORMATS.PLX,   FILE_FORMATS.BAS,
                        FILE_FORMATS.BAX,   FILE_FORMATS.FOFN,
-                       FILE_FORMATS.CCS)
+                       FILE_FORMATS.CCS,   FILE_FORMATS.BAM,
+                       FILE_FORMATS.XML)
 
 VALID_REGIONTABLE_FORMATS = (FILE_FORMATS.RGN, FILE_FORMATS.FOFN)
 
-VALID_OUTPUT_FORMATS = (FILE_FORMATS.CMP, FILE_FORMATS.SAM)
+VALID_OUTPUT_FORMATS = (FILE_FORMATS.CMP, FILE_FORMATS.SAM,
+                        FILE_FORMATS.BAM, FILE_FORMATS.XML)
+
 
 def real_ppath(fn):
     """Return real 'python-style' path of a file.
@@ -99,6 +104,7 @@ def isExist(ff):
     _output, errCode, _errMsg = backticks(cmd)
     return (errCode == 0)
 
+
 def isValidInputFormat(ff):
     """Return True if ff is a valid input file format."""
     return ff in VALID_INPUT_FORMATS
@@ -127,6 +133,8 @@ def getFileFormat(filename):
         return FILE_FORMATS.FASTA
     elif ext in [".sam"]:
         return FILE_FORMATS.SAM
+    elif ext in [".bam"]:
+        return FILE_FORMATS.BAM
     elif ext in [".sa"]:
         return FILE_FORMATS.SA
     elif ext in [".fofn"]:
@@ -173,7 +181,6 @@ def getFileFormatsFromFOFN(fofnname):
     fs = getFilesFromFOFN(fofnname)
     return [getFileFormat(f) for f in fs]
 
-
 def checkInputFile(filename, validFormats=VALID_INPUT_FORMATS):
     """
     Check whether an input file has the valid file format and exists.
@@ -207,6 +214,7 @@ def checkInputFile(filename, validFormats=VALID_INPUT_FORMATS):
                 raise IOError(errMsg)
             else:
                 fileListRet.append(f)
+
     return real_upath(filename)
 
 
@@ -238,7 +246,7 @@ def checkOutputFile(filename):
     """
     filename = real_ppath(filename)
     if not isValidOutputFormat(getFileFormat(filename)):
-        errMsg = "The output file format can only be SAM or CMP.H5."
+        errMsg = "The output file format can only be CMP.H5, SAM, BAM or XML."
         logging.error(errMsg)
         raise ValueError(errMsg)
     try:
@@ -252,7 +260,9 @@ def checkOutputFile(filename):
 
 
 class ReferenceInfo:
+
     """Parse reference.info.xml in reference path."""
+
     def __init__(self, fileName):
         fileName = real_ppath(fileName)
         if getFileFormat(fileName) != FILE_FORMATS.XML:
@@ -325,11 +335,12 @@ class ReferenceInfo:
 def checkReferencePath(inRefpath):
     """Validate input reference path.
     Check whether the input reference path exists or not.
-    Input : can be a FASTA file or a reference repository.
+    Input : can be a FASTA file, a XML file or a reference repository.
     Output: [refpath, FASTA_file, None, False, gff], if input is a FASTA file,
             and it is not located within a reference repository.
             [refpath, FASTA_file, SA_file, True, gff], if input is a FASTA
             file, and it is located within a reference repository.
+            [refpath, FASTA_file, None, False, None], if input is a XML
             [refpath, FASTA_file, SA_file, True, gff], if input is a reference
             repository produced by PacBio referenceUploader.
     """
@@ -348,6 +359,16 @@ def checkReferencePath(inRefpath):
         # reference repository
         refinfoxml = op.join(op.split(op.dirname(refpath))[0],
                              "reference.info.xml")
+    elif getFileFormat(refpath) == FILE_FORMATS.XML:
+        fastaFiles = ReferenceSet(refpath).toFofn()
+        if len(fastaFiles) != 1:
+            errMsg = refpath + " must contain exactly one reference"
+            logging.error(errMsg)
+            raise Exception(errMsg)
+        fastaFile = fastaFiles[0][5:] if fastaFiles[0].startswith('file:') \
+                else fastaFiles[0]
+        refinfoxml = op.join(op.split(op.dirname(refpath))[0],
+                             "reference.info.xml")
     else:
         refinfoxml = op.join(refpath, "reference.info.xml")
 
@@ -387,7 +408,7 @@ def checkReferencePath(inRefpath):
     return real_upath(refpath), real_upath(fastaFile), sawriterFile, \
            isWithinRepository, adapterGffFile
 
-#if __name__ == "__main__":
+# if __name__ == "__main__":
 #    refPath = "/opt/smrtanalysis" + \
 #              "/common/references/lambda/"
 #    refpath, faFile, saFile, isWithinRepository = checkReferencePath(refPath)
diff --git a/pbalign/utils/progutil.py b/pbalign/utils/progutil.py
index 3e6f0f7..7ad523b 100755
--- a/pbalign/utils/progutil.py
+++ b/pbalign/utils/progutil.py
@@ -60,7 +60,7 @@ def Execute(name, cmd):
         errCode: the error code (zero means normal exit)
         errMsg : the error message
     """
-    logging.debug(name + ": Call \"{0}\"".format(cmd))
+    logging.info(name + ": Call \"{0}\"".format(cmd))
     output, errCode, errMsg = backticks(cmd)
     if errCode != 0:
         errMsg = name + " returned a non-zero exit status. " + errMsg
diff --git a/setup.py b/setup.py
index 340abed..ffd0d5e 100755
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ import sys
 
 setup(
     name = 'pbalign',
-    version='0.2.0',
+    version='0.3.0',
     author='Pacific Biosciences',
     author_email='devnet at pacificbiosciences.com',
     license='LICENSE.txt',
@@ -14,8 +14,10 @@ setup(
     zip_safe = False,
     install_requires=[
         'pbcore >= 0.8.5',
-        'pysam'
+        'pbcommand >= 0.2.0',
+        'pysam',
         ],
+    test_requires=['pbtestdata'],
     entry_points={'console_scripts': [
         'pbalign=pbalign.pbalignrunner:main',
         'maskAlignedReads.py = pbalign.tools.mask_aligned_reads:main',
diff --git a/tests/cram/bam.t b/tests/cram/bam.t
new file mode 100644
index 0000000..5d54f4f
--- /dev/null
+++ b/tests/cram/bam.t
@@ -0,0 +1,14 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+#Test pbalign with bam in bam out
+  $ Q=/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/test_bam/tiny_bam.fofn
+  $ T=/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta
+  $ O=tiny_bam.bam
+  $ pbalign $Q $T $O >/dev/null
+
+#Call samtools index to check whether out.bam is sorted or not
+  $ samtools index $O $TMP1.bai && ls $TMP1.bai >/dev/null && echo $?
+  0
+
+
diff --git a/tests/cram/ccs.t b/tests/cram/ccs.t
new file mode 100644
index 0000000..bba0bed
--- /dev/null
+++ b/tests/cram/ccs.t
@@ -0,0 +1,15 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+#Test pbalign with bam in bam out
+  $ Q=/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/all4mer-ccs/tiny_ccs.bam
+  $ T=/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/all4mer-ccs/all4mer_v2_30_1.fasta
+  $ O=$OUTDIR/tiny_ccs.bam
+  $ pbalign $Q $T $O >/dev/null
+
+#Call samtools index to check whether out.bam is sorted or not
+  $ samtools index $O $TMP1.bai && ls $TMP1.bai >/dev/null && echo $?
+  0
+
+  $ samtools view $O |wc -l
+  8
diff --git a/tests/cram/dataset_tiny.t b/tests/cram/dataset_tiny.t
new file mode 100644
index 0000000..be6f70b
--- /dev/null
+++ b/tests/cram/dataset_tiny.t
@@ -0,0 +1,38 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+#Test pbalign with dataset in and out
+  $ D=$TESTDATASETS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.all.subreadset.xml
+  $ T=$REFDIR/lambda/reference.dataset.xml
+  $ O=$OUTDIR/tiny_bam.bam
+  $ rm -f $O
+  $ pbalign $D $T $O --algorithmOptions=" --holeNumbers 1-1000,30000-30500,60000-60600,100000-100500" >/dev/null
+
+Try feeding an aligned bam back in...
+  $ RA=$OUTDIR/tiny_bam_realigned.bam
+  $ pbalign $O $T $RA >/dev/null
+
+Call samtools index to check whether out.bam is sorted or not and coverage is sufficient and basic mapped stats
+  $ samtools index $O $TMP1.bai && ls $TMP1.bai >/dev/null && echo $?
+  0
+
+Sum of depth > 197740
+  $ samtools depth $O | awk '{sum+=$3} END {if (sum >= 197740) print "TRUE"; else print "FALSE"}'
+  TRUE
+
+  $ samtools flagstat $O
+  248 + 0 in total (QC-passed reads + QC-failed reads)
+  0 + 0 secondary
+  0 + 0 supplementary
+  0 + 0 duplicates
+  248 + 0 mapped * (glob)
+  0 + 0 paired in sequencing
+  0 + 0 read1
+  0 + 0 read2
+  0 + 0 properly paired * (glob)
+  0 + 0 with itself and mate mapped
+  0 + 0 singletons * (glob)
+  0 + 0 with mate mapped to a different chr
+  0 + 0 with mate mapped to a different chr (mapQ>=5)
+
+
diff --git a/tests/cram/filtercriteria_hitpolicy.t b/tests/cram/filtercriteria_hitpolicy.t
new file mode 100644
index 0000000..208f739
--- /dev/null
+++ b/tests/cram/filtercriteria_hitpolicy.t
@@ -0,0 +1,78 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+##Test --maxDivergence --minAnchorSize --minAccuracy 
+#  $ Q=$DATDIR/lambda_query.fasta
+#  $ T="/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta"
+#
+#  $ NAME=lambda5
+#  $ SAMOUT=$OUTDIR/$NAME.sam
+#  $ M4OUT=$OUTDIR/$NAME.m4
+#  $ M4STDOUT=$STDSIVDIR/$NAME.m4
+#
+#  $ rm -f $SAMOUT $M4OUT
+#  $ pbalign --maxDivergence 40 --minAnchorSize 20 --minAccuracy 80 $Q $T $SAMOUT 2>/dev/null
+#  $ $SAMTOM4 $SAMOUT $T $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+#  $ diff $M4OUT $M4STDOUT
+#
+##Test whether pbalign interprets minAccuracy and maxDivergence correclty.
+#  $ rm -f $SAMOUT $M4OUT
+#  $ pbalign --maxDivergence 0.4 --minAnchorSize 20 --minAccuracy 0.8 $Q $T $SAMOUT 2>/dev/null 
+#  $ $SAMTOM4 $SAMOUT $T $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+#  $ diff $M4OUT $M4STDOUT
+#
+##Test --hitPolicy  random
+#  $ NAME=lambda_hitPolicy_random
+#  $ SAMOUT=$OUTDIR/$NAME.sam
+#  $ M4OUT=$OUTDIR/$NAME.m4
+#  $ M4STDOUT=$STDSIVDIR/$NAME.m4
+#
+#  $ rm -f $SAMOUT $M4OUT
+#  $ pbalign --hitPolicy random --seed 1 $Q $T $SAMOUT 2>/dev/null
+#  $ $SAMTOM4 $SAMOUT $T $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+#  $ diff $M4OUT $M4STDOUT
+#
+##Test --hitPolicy  all
+#  $ NAME=lambda_hitPolicy_all
+#  $ SAMOUT=$OUTDIR/$NAME.sam
+#  $ M4OUT=$OUTDIR/$NAME.m4
+#  $ M4STDOUT=$STDSIVDIR/$NAME.m4
+#
+#  $ rm -f $SAMOUT $M4OUT
+#  $ pbalign --hitPolicy all $Q $T $SAMOUT 2>/dev/null
+#  $ $SAMTOM4 $SAMOUT $T $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+#  $ diff $M4OUT $M4STDOUT
+#
+#
+##Test --hitPolicy  randombest
+#  $ NAME=lambda_hitPolicy_randombest
+#  $ SAMOUT=$OUTDIR/$NAME.sam
+#  $ M4OUT=$OUTDIR/$NAME.m4
+#  $ M4STDOUT=$STDSIVDIR/$NAME.m4
+#
+#  $ rm -f $SAMOUT $M4OUT
+#  $ pbalign  --hitPolicy randombest --seed 1 $Q $T $SAMOUT 2>/dev/null
+#  $ $SAMTOM4 $SAMOUT $T $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+#  $ diff $M4OUT $M4STDOUT
+
+#Test --hitPolicy  allbest
+  $ Q=$DATDIR/example_read.fasta
+  $ T=$DATDIR/example_ref.fasta
+  $ SAMOUT=$OUTDIR/hitPolicy_allbest.sam
+
+  $ rm -f $SAMOUT
+  $ pbalign --hitPolicy allbest $Q $T $SAMOUT >/dev/null
+
+
+
+#Test pbalign with -hitPolicy leftmost
+  $ Q=$DATDIR/test_leftmost_query.fasta
+  $ T=$DATDIR/test_leftmost_target.fasta
+  $ O=$OUTDIR/test_leftmost_out.sam 
+  $ pbalign $Q $T $O --hitPolicy leftmost >/dev/null
+  $ echo $?
+  0
+  $ grep -v '@' $O | cut -f 4 
+  1
+
+
diff --git a/tests/cram/path_with_space.t b/tests/cram/path_with_space.t
new file mode 100644
index 0000000..e2a6a9d
--- /dev/null
+++ b/tests/cram/path_with_space.t
@@ -0,0 +1,9 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+#Test pbalign with space in file names.
+  $ FA=$DATDIR/dir\ with\ spaces/reads.fasta 
+  $ pbalign "$FA" "$FA" $OUTDIR/with_space.sam >/dev/null
+  $ echo $?
+  0
+
diff --git a/tests/cram/setup.sh b/tests/cram/setup.sh
new file mode 100755
index 0000000..1b3e12c
--- /dev/null
+++ b/tests/cram/setup.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/bash
+
+#Note:
+#Program name has been changed from `pbalign.py` in version 0.1.0 
+#to `pbalign` in 0.2.0, pseudo namespace pbtools has been removed also.
+
+CURDIR=$TESTDIR
+DATDIR=$CURDIR/../data
+OUTDIR=$CURDIR/../out
+STDDIR=$CURDIR/../stdout
+SIVDIR=/pbi/dept/secondary/siv/testdata/pbalign-unittest
+DATSIVDIR=$SIVDIR/data/
+STDSIVDIR=$SIVDIR/stdout/
+
+TESTDATASETS=/pbi/dept/secondary/siv/testdata/SA3-DS
+REFDIR=/pbi/dept/secondary/siv/references
+
+TMP1=$$.tmp.out
+TMP2=$$.tmp.stdout
+SAMTOM4=samtom4
+CMPH5TOOLS="/mnt/secondary/Smrtanalysis/smrtcmds/bin/smrtwrap cmph5tools.py "
diff --git a/tests/cram/test_pbalign.t b/tests/cram/test_pbalign.t
deleted file mode 100644
index 9e3b91c..0000000
--- a/tests/cram/test_pbalign.t
+++ /dev/null
@@ -1,276 +0,0 @@
-Note:
-Program name has been changed from `pbalign.py` in version 0.1.0 
-to `pbalign` in 0.2.0, pseudo namespace pbtools has been removed also.
-
-Test pbalign
-  $ CURDIR=$TESTDIR
-  $ DATDIR=$CURDIR/../data
-  $ OUTDIR=$CURDIR/../out
-  $ STDDIR=$CURDIR/../stdout
-
-#Test pbalign with all combinations of input & output formats
-#input, reference and output formats are: fasta, fasta, and sam/cmp.h5
-  $ READFILE=$DATDIR/lambda_query.fasta
-  $ REFFILE="/mnt/secondary-siv/references/lambda/sequence/lambda.fasta"
-
-  $ SAMOUT=$OUTDIR/lambda.sam
-  $ CMPOUT=$OUTDIR/lambda.cmp.h5
-
-  $ rm -f $SAMOUT $CMPOUT
-  $ pbalign $READFILE $REFFILE $SAMOUT
-  $ tail -n+6 $SAMOUT | cut -f 1-11 | sort | md5sum
-  ea31763bc847a6c75d3ddb5fb6036489  -
-
-  $ pbalign $READFILE $REFFILE $CMPOUT
-  $ h5dump -g /ref000001 $CMPOUT > tmpfile 
-  $ sed -n '2,11p' tmpfile
-  GROUP "/ref000001" {
-     GROUP "m120619_015854_42161_c100392070070000001523040811021231_s1_p0" {
-        DATASET "AlnArray" {
-           DATATYPE  H5T_STD_U8LE
-           DATASPACE  SIMPLE { ( 48428 ) / ( H5S_UNLIMITED ) }
-           DATA {
-           (0): 34, 136, 17, 17, 34, 17, 136, 136, 136, 17, 136, 34, 136, 68,
-           (14): 128, 34, 17, 136, 34, 17, 136, 17, 2, 34, 136, 136, 34, 34,
-           (28): 68, 17, 68, 34, 17, 136, 136, 136, 17, 136, 136, 1, 17, 68,
-           (42): 34, 17, 136, 136, 136, 34, 68, 34, 136, 17, 136, 17, 17, 68,
-  $ rm tmpfile
- 
-
-
-#input, reference and output formats are: fasta, folder and sam/cmp.h5
-  $ READFILE=$DATDIR/lambda_query.fasta
-  $ REFFILE=/mnt/secondary-siv/references/lambda/
-
-  $ SAMOUT=$OUTDIR/lambda2.sam
-  $ CMPOUT=$OUTDIR/lambda2.cmp.h5
-
-  $ rm -f $SAMOUT $CMPOUT
-  $ pbalign $READFILE $REFFILE $SAMOUT
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  ea31763bc847a6c75d3ddb5fb6036489  -
-
-  $ pbalign $READFILE $REFFILE $CMPOUT
-  $ h5dump -g /ref000001 $CMPOUT > tmpfile 
-  $ sed -n '2,11p' tmpfile
-  GROUP "/ref000001" {
-     GROUP "m120619_015854_42161_c100392070070000001523040811021231_s1_p0" {
-        DATASET "AlnArray" {
-           DATATYPE  H5T_STD_U8LE
-           DATASPACE  SIMPLE { ( 48428 ) / ( H5S_UNLIMITED ) }
-           DATA {
-           (0): 34, 136, 17, 17, 34, 17, 136, 136, 136, 17, 136, 34, 136, 68,
-           (14): 128, 34, 17, 136, 34, 17, 136, 17, 2, 34, 136, 136, 34, 34,
-           (28): 68, 17, 68, 34, 17, 136, 136, 136, 17, 136, 136, 1, 17, 68,
-           (42): 34, 17, 136, 136, 136, 34, 68, 34, 136, 17, 136, 17, 17, 68,
-  $ rm -f tmpfile
-
-
-#input, reference and output formats are: fofn, fasta and sam/cmp.h5
-  $ READFILE=$DATDIR/lambda_bax.fofn
-  $ REFFILE="/mnt/secondary-siv/references/lambda/sequence/lambda.fasta"
-
-  $ SAMOUT=$OUTDIR/lambda3.sam
-  $ CMPOUT=$OUTDIR/lambda3.cmp.h5
-
-  $ rm -f $SAMOUT $CMPOUT
-  $ pbalign $READFILE $REFFILE $SAMOUT
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  e5c29fba1efbbfbe164fa2797408dbf6  -
-
-  $ pbalign $READFILE $REFFILE $CMPOUT
-  $ h5dump -g /ref000001 $CMPOUT > tmpfile 
-  $ sed -n '2,11p' tmpfile
-  GROUP "/ref000001" {
-     GROUP "m130220_114643_42129_c100471902550000001823071906131347_s1_p0" {
-        DATASET "AlnArray" {
-           DATATYPE  H5T_STD_U8LE
-           DATASPACE  SIMPLE { ( 79904 ) / ( H5S_UNLIMITED ) }
-           DATA {
-           (0): 136, 34, 136, 34, 136, 68, 34, 68, 68, 68, 17, 68, 136, 68,
-           (14): 136, 2, 34, 68, 68, 68, 17, 17, 136, 17, 17, 136, 136, 1, 17,
-           (29): 17, 17, 34, 68, 17, 136, 68, 2, 17, 34, 17, 34, 17, 68, 68,
-           (44): 68, 8, 136, 136, 17, 68, 34, 68, 34, 68, 136, 17, 2, 17, 130,
-  $ rm -f tmpfile
-
-
-#input, reference and output formats are: fofn, folder and sam/cmp.h5
-  $ READFILE=$DATDIR/lambda_bax.fofn
-  $ REFFILE=/mnt/secondary-siv/references/lambda/
-
-  $ SAMOUT=$OUTDIR/lambda4.sam
-  $ CMPOUT=$OUTDIR/lambda4.cmp.h5
-
-  $ rm -f $SAMOUT $CMPOUT
-  $ pbalign $READFILE $REFFILE $SAMOUT
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  e5c29fba1efbbfbe164fa2797408dbf6  -
-
-  $ pbalign $READFILE $REFFILE $CMPOUT
-  $ h5dump -g /ref000001 $CMPOUT > tmpfile 
-  $ sed -n '2,11p' tmpfile
-  GROUP "/ref000001" {
-     GROUP "m130220_114643_42129_c100471902550000001823071906131347_s1_p0" {
-        DATASET "AlnArray" {
-           DATATYPE  H5T_STD_U8LE
-           DATASPACE  SIMPLE { ( 79904 ) / ( H5S_UNLIMITED ) }
-           DATA {
-           (0): 136, 34, 136, 34, 136, 68, 34, 68, 68, 68, 17, 68, 136, 68,
-           (14): 136, 2, 34, 68, 68, 68, 17, 17, 136, 17, 17, 136, 136, 1, 17,
-           (29): 17, 17, 34, 68, 17, 136, 68, 2, 17, 34, 17, 34, 17, 68, 68,
-           (44): 68, 8, 136, 136, 17, 68, 34, 68, 34, 68, 136, 17, 2, 17, 130,
-  $ rm tmpfile
- 
-
-#Test --maxDivergence --minAnchorSize --minAccuracy 
-  $ READFILE=$DATDIR/lambda_query.fasta
-  $ REFFILE="/mnt/secondary-siv/references/lambda/sequence/lambda.fasta"
-
-  $ SAMOUT=$OUTDIR/lambda5.sam
-
-  $ rm -f $SAMOUT
-  $ pbalign --maxDivergence 40 --minAnchorSize 20 --minAccuracy 80 $READFILE $REFFILE $SAMOUT 
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  29f8897b8ee6d4b7fff126d374edb306  -
-
-#Test whether pbalign interprets minAccuracy and maxDivergence correclty.
-  $ rm -f $SAMOUT
-  $ pbalign --maxDivergence 0.4 --minAnchorSize 20 --minAccuracy 0.8 $READFILE $REFFILE $SAMOUT 
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  29f8897b8ee6d4b7fff126d374edb306  -
-
-#Test --hitPolicy  random
-  $ SAMOUT=$OUTDIR/lambda_hitPolicy_random.sam
-
-  $ rm -f $SAMOUT
-  $ pbalign --hitPolicy random --seed 1 $READFILE $REFFILE $SAMOUT 
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  ea31763bc847a6c75d3ddb5fb6036489  -
-
-#Test --hitPolicy  all
-  $ SAMOUT=$OUTDIR/lambda_hitPolicy_all.sam
-
-  $ rm -f $SAMOUT
-  $ pbalign --hitPolicy all $READFILE $REFFILE $SAMOUT 
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  2022614eb99fe3288c332aadcfefe739  -
-
-
-#Test --hitPolicy  randombest
-  $ SAMOUT=$OUTDIR/lambda_hitPolicy_randombest.sam
-
-  $ rm -f $SAMOUT
-  $ pbalign  --hitPolicy randombest --seed 1 $READFILE $REFFILE $SAMOUT
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  ea31763bc847a6c75d3ddb5fb6036489  -
-
-#Test --scoreFunction
-  $ SAMOUT=$OUTDIR/lambda_scoreFunction_editdist.sam
-
-  $ rm -f $SAMOUT
-  $ pbalign $READFILE $REFFILE $SAMOUT --scoreFunction editdist
-  $ tail -n+6 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  ea31763bc847a6c75d3ddb5fb6036489  -
-
-
-#Test --hitPolicy  allbest
-  $ READFILE=$DATDIR/example_read.fasta
-  $ REFFILE=$DATDIR/example_ref.fasta
-  $ SAMOUT=$OUTDIR/hitPolicy_allbest.sam
-
-  $ rm -f $SAMOUT
-  $ pbalign --hitPolicy allbest $READFILE $REFFILE $SAMOUT 
-  $ tail -n+8 $SAMOUT  | cut -f 1-11 | sort | md5sum
-  6e68a0902f282c25526e14e5516e663b  -
-
-#Test --useccs=useccsdenovo, whether attribute /ReadType is 'CCS'
-  $ READFILE=$DATDIR/lambda_bax.fofn
-  $ REFFILE="/mnt/secondary-siv/references/lambda/sequence/lambda.fasta"
-  $ CMPOUT=$OUTDIR/lambda_denovo.cmp.h5
-
-  $ rm -f $CMPOUT
-  $ pbalign $READFILE $REFFILE $CMPOUT --useccs=useccsdenovo --algorithmOptions=" -holeNumbers 0-100"
-  $ h5dump -a /ReadType $CMPOUT | grep "CCS"
-     (0): "CCS"
-
-#Test --forQuiver can not be used together with --useccs
-  $ pbalign $READFILE $REFFILE $CMPOUT --useccs=useccsdenovo --algorithmOptions=" -holeNumbers 0-100" --forQuiver 1>/dev/null 2>/dev/null || echo 'fail as expected'
-  fail as expected
-
-
-#Test whether pbalign can produce sam output for non-PacBio reads
-#  $ READFILE=$DATDIR/notSMRT.fasta
-#  $ REFFILE="/mnt/secondary-siv/references/lambda/sequence/lambda.fasta"
-#  $ SAMOUT=$OUTDIR/notSMRT.sam
-#
-#  $ rm -f $SAMOUT $CMPOUT
-#  $ pbalign $READFILE $REFFILE $SAMOUT
-
-
-# Test whether (ccs.h5) produces
-# identical results as (bas.h5 and --useccs=useccsdenovo).
-  $ READFILE=$DATDIR/test_ccs.fofn 
-  $ REFFILE=/mnt/secondary-siv/references/ecoli_k12_MG1655/sequence/ecoli_k12_MG1655.fasta
-  $ CCS_CMPOUT=$OUTDIR/test_ccs.cmp.h5
-
-  $ rm -f $CCS_CMPOUT
-  $ pbalign $READFILE $REFFILE $CCS_CMPOUT
-
-  $ READFILE=$DATDIR/test_bas.fofn
-  $ BAS_CMPOUT=$OUTDIR/test_bas.cmp.h5
-
-  $ rm -f $BAS_CMPOUT
-  $ pbalign $READFILE $REFFILE $BAS_CMPOUT --useccs=useccsdenovo
-
-  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /AlnGroup  /AlnGroup
-  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /AlnInfo   /AlnInfo
-  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /MovieInfo /MovieInfo
-  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /RefInfo   /RefInfo
-  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /ref000001 /ref000001
-
-
-#Test pbalign with -filterAdapterOnly
-  $ READFILE=$DATDIR/test_filterAdapterOnly.fofn
-  $ REFDIR=/mnt/secondary-siv/testdata/BlasrTestData/pbalign/data/references/H1_6_Scal_6x/
-  $ OUTPUT=$OUTDIR/test_filterAdapterOnly.sam
-  $ rm -f $OUTPUT
-  $ pbalign $READFILE $REFDIR $OUTPUT --filterAdapterOnly --algorithmOptions=" -holeNumbers 10817,14760" --seed=1 
-  $ tail -n+6 $OUTPUT | cut -f 1-4
-
-# Test pbalign with --pulseFile
-# This is an experimental option which goes only with gmap,
-# it enables users to bypass the pls2fasta step and use their own fasta 
-# file instead, while keep the ability of generating cmp.h5 files with pulses 
-# (i.e., --forQuiver). Eventually, we need to support --algorithm=blasr.
-  $ OUTFILE=$OUTDIR/test_pulseFile.cmp.h5
-  $ REFPATH=/mnt/secondary-siv/references/Ecoli_K12_DH10B/
-  $ REFFILE=/mnt/secondary-siv/references/Ecoli_K12_DH10B/sequence/Ecoli_K12_DH10B.fasta
-  $ pbalign $DATDIR/test_pulseFile.fasta $REFPATH $OUTFILE --pulseFile $DATDIR/test_pulseFile.fofn --forQuiver --algorithm gmap --byread
-  $ echo $?
-  0
-
-  $ OUTFILE=$OUTDIR/test_pulseFile.cmp.h5
-  $ REFPATH=/mnt/secondary-siv/references/Ecoli_K12_DH10B/
-  $ REFFILE=/mnt/secondary-siv/references/Ecoli_K12_DH10B/sequence/Ecoli_K12_DH10B.fasta
-  $ rm -f $OUTFILE
-  $ pbalign $DATDIR/test_pulseFile.fasta $REFPATH $OUTFILE --pulseFile $DATDIR/test_pulseFile.fofn --forQuiver --algorithm blasr --byread
-  $ echo $?
-  0
-
-#Test pbalign with space in file names.
-  $ FA=$DATDIR/dir\ with\ spaces/reads.fasta 
-  $ pbalign "$FA" "$FA" $OUTDIR/with_space.sam
-  $ echo $?
-  0
-
-#Test pbalign with -hitPolicy leftmost
-  $ Q=$DATDIR/test_leftmost_query.fasta
-  $ T=$DATDIR/test_leftmost_target.fasta
-  $ O=$OUTDIR/test_leftmost_out.sam 
-  $ pbalign $Q $T $O --hitPolicy leftmost
-  $ echo $?
-  0
-  $ tail -n+6 $O | cut -f 4 
-  1
-
-
diff --git a/tests/cram/xml.t b/tests/cram/xml.t
new file mode 100644
index 0000000..8839141
--- /dev/null
+++ b/tests/cram/xml.t
@@ -0,0 +1,38 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+#Test pbalign with xml in bam out
+  $ Q=$DATDIR/subreads_dataset1.xml
+  $ T=$DATDIR/reference_lambda.xml
+  $ O=$OUTDIR/xml_in_bam_out.bam
+  $ rm -f $O
+  $ pbalign $Q $T $O --algorithmOptions=" --holeNumbers 1-2000" >/dev/null 2>/dev/null
+  $ echo $?
+  0
+
+#Test pbalign with xml in xml out
+  $ Q=$DATDIR/subreads_dataset1.xml
+  $ T=$DATDIR/reference_lambda.xml
+  $ O=$OUTDIR/xml_in_xml_out.xml
+  $ rm -f $O
+  $ pbalign $Q $T $O --algorithmOptions=" --holeNumbers 1-2000" >/dev/null 2>/dev/null
+  $ echo $?
+  0
+
+  $ ls $OUTDIR/xml_in_xml_out.bam.bai && echo $? # bam index exists
+  *.bai (glob)
+  0
+
+  $ ls $OUTDIR/xml_in_xml_out.bam.pbi && echo $? # pbi index exists
+  *.pbi (glob)
+  0
+
+#Test pbalign with up-to-dated xml inputs
+  $ Q=$DATDIR/subreads_dataset2.xml
+  $ T=$DATSIVDIR/ecoli.fasta
+  $ O=$OUTDIR/xml_in_bam_out_2.bam
+  $ rm -f $O
+  $ pbalign $Q $T $O --algorithmOptions=" --holeNumbers 1-2000" >/dev/null 2>/dev/null
+  $ echo $?
+  0
+
diff --git a/tests/cram_h5/ccs.t b/tests/cram_h5/ccs.t
new file mode 100644
index 0000000..7665c7a
--- /dev/null
+++ b/tests/cram_h5/ccs.t
@@ -0,0 +1,51 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+#Test --useccs=useccsdenovo, whether attribute /ReadType is 'CCS'
+  $ Q=$DATDIR/lambda_bax.fofn
+  $ T="/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta"
+  $ CMPOUT=$OUTDIR/lambda_denovo.cmp.h5
+
+  $ rm -f $CMPOUT
+  $ pbalign $Q $T $CMPOUT --useccs=useccsdenovo --algorithmOptions=" -holeNumbers 0-100" 2>/dev/null
+  $ h5dump -a /ReadType $CMPOUT | grep "CCS"
+     (0): "CCS"
+
+#Test --forQuiver can not be used together with --useccs
+  $ pbalign $Q $T $CMPOUT --useccs=useccsdenovo --algorithmOptions=" -holeNumbers 0-100" --forQuiver 1>/dev/null 2>/dev/null || echo 'fail as expected'
+  fail as expected
+
+
+#Test whether pbalign can produce sam output for non-PacBio reads
+#  $ Q=$DATDIR/notSMRT.fasta
+#  $ T="/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta"
+#  $ SAMOUT=$OUTDIR/notSMRT.sam
+#
+#  $ rm -f $SAMOUT $CMPOUT
+#  $ pbalign $Q $T $SAMOUT 2>/dev/null
+
+
+# Test whether (ccs.h5) produces
+# identical results as (bas.h5 and --useccs=useccsdenovo).
+  $ Q=$DATDIR/test_ccs.fofn 
+  $ T=/pbi/dept/secondary/siv/references/ecoli_k12_MG1655/sequence/ecoli_k12_MG1655.fasta
+  $ CCS_CMPOUT=$OUTDIR/test_ccs.cmp.h5
+
+  $ rm -f $CCS_CMPOUT
+  $ pbalign $Q $T $CCS_CMPOUT 2>/dev/null
+
+  $ Q=$DATDIR/test_bas.fofn
+  $ BAS_CMPOUT=$OUTDIR/test_bas.cmp.h5
+
+  $ rm -f $BAS_CMPOUT
+  $ pbalign $Q $T $BAS_CMPOUT --useccs=useccsdenovo 2>/dev/null
+  $ /mnt/secondary/Smrtanalysis/smrtcmds/bin/smrtwrap cmph5tools.py sort $BAS_CMPOUT --deep --inPlace
+  $ /mnt/secondary/Smrtanalysis/smrtcmds/bin/smrtwrap cmph5tools.py sort $CCS_CMPOUT --deep --inPlace
+
+  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /AlnGroup  /AlnGroup
+  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /AlnInfo   /AlnInfo
+  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /MovieInfo /MovieInfo
+  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /RefInfo   /RefInfo
+  $ h5diff $CCS_CMPOUT $BAS_CMPOUT /ref000001 /ref000001
+
+
diff --git a/tests/cram_h5/filter_adapter_only.t b/tests/cram_h5/filter_adapter_only.t
new file mode 100644
index 0000000..7e87cf6
--- /dev/null
+++ b/tests/cram_h5/filter_adapter_only.t
@@ -0,0 +1,11 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+#Test pbalign with -filterAdapterOnly
+  $ Q=$DATDIR/test_filterAdapterOnly.fofn
+  $ T=/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/references/H1_6_Scal_6x/
+  $ O=$OUTDIR/test_filterAdapterOnly.sam
+  $ rm -f $O
+  $ pbalign $Q $T $O --filterAdapterOnly --algorithmOptions=" -holeNumbers 10817,14760" --seed=1 2>/dev/null
+  $ grep -v '@' $O | cut -f 1-4
+
diff --git a/tests/cram_h5/inputs_outputs.t b/tests/cram_h5/inputs_outputs.t
new file mode 100644
index 0000000..a2eb155
--- /dev/null
+++ b/tests/cram_h5/inputs_outputs.t
@@ -0,0 +1,130 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+#Test pbalign with all combinations of input & output formats
+#input, reference and output formats are: fasta, fasta, and sam/cmp.h5
+  $ Q=$DATDIR/lambda_query.fasta
+  $ T="/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta"
+
+  $ NAME=lambda
+  $ SAMOUT=$OUTDIR/$NAME.sam
+  $ CMPOUT=$OUTDIR/$NAME.cmp.h5
+  $ M4OUT=$OUTDIR/$NAME.m4
+  $ M4STDOUT=$STDSIVDIR/$NAME.m4
+
+  $ rm -f $SAMOUT $CMPOUT $M4OUT
+  $ pbalign $Q $T $SAMOUT 2>/dev/null
+  $ $SAMTOM4 $SAMOUT $T $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+  $ diff $M4OUT $M4STDOUT
+
+  $ pbalign $Q $T $CMPOUT 2>/dev/null
+  $ $CMPH5TOOLS sort $CMPOUT --deep --inPlace
+  $ h5dump -g /ref000001 $CMPOUT > tmpfile 
+  $ sed -n '2,11p' tmpfile
+  GROUP "/ref000001" {
+     GROUP "rg1-0" {
+        DATASET "AlnArray" {
+           DATATYPE  H5T_STD_U8LE
+           DATASPACE  SIMPLE { ( 48428 ) / ( H5S_UNLIMITED ) }
+           DATA {
+           (0): 34, 136, 68, 128, 136, 136, 136, 34, 136, 34, 34, 17, 68, 34,
+           (14): 68, 34, 17, 68, 34, 17, 34, 34, 68, 136, 16, 17, 17, 136, 136,
+           (29): 17, 34, 136, 68, 32, 136, 68, 17, 68, 34, 34, 17, 136, 34, 17,
+           (44): 136, 68, 17, 34, 68, 34, 34, 68, 17, 136, 68, 68, 17, 68, 34,
+  $ rm tmpfile
+ 
+#input, reference and output formats are: fasta, folder and sam/cmp.h5
+  $ Q=$DATDIR/lambda_query.fasta
+  $ T=/pbi/dept/secondary/siv/references/lambda/
+
+  $ NAME=lambda2
+  $ SAMOUT=$OUTDIR/$NAME.sam
+  $ CMPOUT=$OUTDIR/$NAME.cmp.h5
+  $ M4OUT=$OUTDIR/$NAME.m4
+  $ M4STDOUT=$STDSIVDIR/$NAME.m4
+
+  $ rm -f $SAMOUT $CMPOUT $M4OUT
+  $ pbalign $Q $T $SAMOUT 2>/dev/null
+  $ $SAMTOM4 $SAMOUT $T/sequence/lambda.fasta $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+  $ diff $M4OUT $M4STDOUT
+
+  $ pbalign $Q $T $CMPOUT 2>/dev/null
+  $ $CMPH5TOOLS sort $CMPOUT --deep --inPlace
+  $ h5dump -g /ref000001 $CMPOUT > tmpfile 
+  $ sed -n '2,11p' tmpfile
+  GROUP "/ref000001" {
+     GROUP "rg1-0" {
+        DATASET "AlnArray" {
+           DATATYPE  H5T_STD_U8LE
+           DATASPACE  SIMPLE { ( 48428 ) / ( H5S_UNLIMITED ) }
+           DATA {
+           (0): 34, 136, 68, 128, 136, 136, 136, 34, 136, 34, 34, 17, 68, 34,
+           (14): 68, 34, 17, 68, 34, 17, 34, 34, 68, 136, 16, 17, 17, 136, 136,
+           (29): 17, 34, 136, 68, 32, 136, 68, 17, 68, 34, 34, 17, 136, 34, 17,
+           (44): 136, 68, 17, 34, 68, 34, 34, 68, 17, 136, 68, 68, 17, 68, 34,
+  $ rm tmpfile
+
+
+#input, reference and output formats are: fofn, fasta and sam/cmp.h5
+  $ Q=$DATDIR/lambda_bax.fofn
+  $ T="/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta"
+
+  $ NAME=lambda3
+  $ SAMOUT=$OUTDIR/$NAME.sam
+  $ CMPOUT=$OUTDIR/$NAME.cmp.h5
+  $ M4OUT=$OUTDIR/$NAME.m4
+  $ M4STDOUT=$STDSIVDIR/$NAME.m4
+
+  $ rm -f $SAMOUT $CMPOUT $M4OUT
+  $ pbalign $Q $T $SAMOUT 2>/dev/null
+  $ $SAMTOM4 $SAMOUT $T $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+  $ diff $M4OUT $M4STDOUT
+
+  $ pbalign $Q $T $CMPOUT 2>/dev/null
+  $ $CMPH5TOOLS sort $CMPOUT --deep --inPlace
+  $ h5dump -g /ref000001 $CMPOUT > tmpfile 
+  $ sed -n '2,11p' tmpfile
+  GROUP "/ref000001" {
+     GROUP "rg1-0" {
+        DATASET "AlnArray" {
+           DATATYPE  H5T_STD_U8LE
+           DATASPACE  SIMPLE { ( 79904 ) / ( H5S_UNLIMITED ) }
+           DATA {
+           (0): 68, 68, 68, 34, 68, 68, 34, 68, 17, 34, 34, 136, 32, 34, 68,
+           (15): 34, 68, 68, 68, 136, 136, 136, 136, 34, 68, 34, 128, 136, 17,
+           (29): 136, 136, 136, 17, 136, 68, 17, 17, 17, 17, 128, 136, 136,
+           (42): 136, 136, 34, 34, 68, 68, 128, 136, 136, 136, 16, 17, 17, 64,
+
+  $ rm tmpfile
+
+#input, reference and output formats are: fofn, folder and sam/cmp.h5
+  $ Q=$DATDIR/lambda_bax.fofn
+  $ T=/pbi/dept/secondary/siv/references/lambda/
+
+  $ NAME=lambda4
+  $ SAMOUT=$OUTDIR/$NAME.sam
+  $ CMPOUT=$OUTDIR/$NAME.cmp.h5
+  $ M4OUT=$OUTDIR/$NAME.m4
+  $ M4STDOUT=$STDSIVDIR/$NAME.m4
+
+  $ rm -f $SAMOUT $CMPOUT $M4OUT
+  $ pbalign $Q $T $SAMOUT 2>/dev/null
+  $ $SAMTOM4 $SAMOUT $T/sequence/lambda.fasta $TMP1 && sort $TMP1 > $M4OUT && rm $TMP1
+  $ diff $M4OUT $M4STDOUT
+
+  $ pbalign $Q $T $CMPOUT 2>/dev/null
+  $ $CMPH5TOOLS sort $CMPOUT --deep --inPlace
+  $ h5dump -g /ref000001 $CMPOUT > tmpfile 
+  $ sed -n '2,11p' tmpfile
+  GROUP "/ref000001" {
+     GROUP "rg1-0" {
+        DATASET "AlnArray" {
+           DATATYPE  H5T_STD_U8LE
+           DATASPACE  SIMPLE { ( 79904 ) / ( H5S_UNLIMITED ) }
+           DATA {
+           (0): 68, 68, 68, 34, 68, 68, 34, 68, 17, 34, 34, 136, 32, 34, 68,
+           (15): 34, 68, 68, 68, 136, 136, 136, 136, 34, 68, 34, 128, 136, 17,
+           (29): 136, 136, 136, 17, 136, 68, 17, 17, 17, 17, 128, 136, 136,
+           (42): 136, 136, 34, 34, 68, 68, 128, 136, 136, 136, 16, 17, 17, 64,
+
+  $ rm tmpfile
diff --git a/tests/cram_h5/pulsefile.t b/tests/cram_h5/pulsefile.t
new file mode 100644
index 0000000..976fcb7
--- /dev/null
+++ b/tests/cram_h5/pulsefile.t
@@ -0,0 +1,23 @@
+Set up 
+  $ . $TESTDIR/setup.sh
+
+# Test pbalign with --pulseFile
+# This is an experimental option which goes only with gmap,
+# it enables users to bypass the pls2fasta step and use their own fasta 
+# file instead, while keep the ability of generating cmp.h5 files with pulses 
+# (i.e., --forQuiver).
+  $ O=$OUTDIR/test_pulseFile.cmp.h5
+  $ T=/pbi/dept/secondary/siv/references/Ecoli_K12_DH10B/
+  $ T=/pbi/dept/secondary/siv/references/Ecoli_K12_DH10B/sequence/Ecoli_K12_DH10B.fasta
+  $ pbalign $DATDIR/test_pulseFile.fasta $T $O --pulseFile $DATDIR/test_pulseFile.fofn --forQuiver --algorithm gmap --byread 2>/dev/null
+  $ echo $?
+  0
+
+  $ O=$OUTDIR/test_pulseFile.cmp.h5
+  $ T=/pbi/dept/secondary/siv/references/Ecoli_K12_DH10B/
+  $ T=/pbi/dept/secondary/siv/references/Ecoli_K12_DH10B/sequence/Ecoli_K12_DH10B.fasta
+  $ rm -f $O
+  $ pbalign $DATDIR/test_pulseFile.fasta $T $O --pulseFile $DATDIR/test_pulseFile.fofn --forQuiver --algorithm blasr --byread 2>/dev/null
+  $ echo $?
+  0
+
diff --git a/tests/cram_h5/setup.sh b/tests/cram_h5/setup.sh
new file mode 100755
index 0000000..391b51f
--- /dev/null
+++ b/tests/cram_h5/setup.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/bash
+
+#Note:
+#Program name has been changed from `pbalign.py` in version 0.1.0 
+#to `pbalign` in 0.2.0, pseudo namespace pbtools has been removed also.
+
+CURDIR=$TESTDIR
+DATDIR=$CURDIR/../data
+OUTDIR=$CURDIR/../out
+STDDIR=$CURDIR/../stdout
+SIVDIR=/pbi/dept/secondary/siv/testdata/pbalign-unittest
+DATSIVDIR=$SIVDIR/data/
+STDSIVDIR=$SIVDIR/stdout/
+
+TESTDATASETS=/pbi/dept/secondary/siv/testdata/SA3-RS
+REFDIR=/pbi/dept/secondary/siv/references
+
+TMP1=$$.tmp.out
+TMP2=$$.tmp.stdout
+SAMTOM4=samtom4
+CMPH5TOOLS="/mnt/secondary/Smrtanalysis/smrtcmds/bin/smrtwrap cmph5tools.py "
+
diff --git a/tests/cram/test_mask_aligned_reads.t b/tests/cram_h5/test_mask_aligned_reads.t
similarity index 96%
rename from tests/cram/test_mask_aligned_reads.t
rename to tests/cram_h5/test_mask_aligned_reads.t
index 552a7bc..7311758 100644
--- a/tests/cram/test_mask_aligned_reads.t
+++ b/tests/cram_h5/test_mask_aligned_reads.t
@@ -1,6 +1,6 @@
 Test mask_aligned_reads.py 
   $ CURDIR=$TESTDIR
-  $ DATDIR=/mnt/secondary-siv/testdata/BlasrTestData/pbalign/data/test_mask_aligned_reads
+  $ DATDIR=/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/test_mask_aligned_reads
   $ OUTDIR=$CURDIR/../out
   $ STDDIR=$DATDIR/../../stdout
 
diff --git a/tests/data/1.config b/tests/data/1.config
index 8d43a60..ab02ab6 100644
--- a/tests/data/1.config
+++ b/tests/data/1.config
@@ -14,10 +14,9 @@
 --maxHits       = 20
 --minAnchorSize = 1
 --minLength     = 100
---algorithmOptions = "-noSplitSubreads -maxMatch 30 -nCandidates 30"
+--algorithmOptions = "--noSplitSubreads --maxMatch 30 --nCandidates 30"
 
 # SamFilter's filtering criteria and hit policy.
---scoreFunction = blasr 
 --hitPolicy     = randombest
 --maxDivergence = 40
 
diff --git a/tests/data/ecoli_lp.fofn b/tests/data/ecoli_lp.fofn
index 06c29ac..cb6e86b 100644
--- a/tests/data/ecoli_lp.fofn
+++ b/tests/data/ecoli_lp.fofn
@@ -1,2 +1,2 @@
-/home/UNIXHOME/yli/yliWorkspace/private/yli/data/testLoadPulses/m121215_065521_richard_c100425710150000001823055001121371_s1_p0.pls.h5
-/home/UNIXHOME/yli/yliWorkspace/private/yli/data/testLoadPulses/m121215_065521_richard_c100425710150000001823055001121371_s2_p0.pls.h5
+/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/testLoadPulses/Analysis_Results/m121215_065521_richard_c100425710150000001823055001121371_s1_p0.pls.h5
+/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/testLoadPulses/Analysis_Results/m121215_065521_richard_c100425710150000001823055001121371_s2_p0.pls.h5
diff --git a/tests/data/lambda_bax.fofn b/tests/data/lambda_bax.fofn
index 80b3bd2..22aecb6 100644
--- a/tests/data/lambda_bax.fofn
+++ b/tests/data/lambda_bax.fofn
@@ -1 +1 @@
-/home/UNIXHOME/yli/yliWorkspace/private/yli/data/testLoadPulses/m130302_011223_pd1_c000000092559900001500000112311511_s1_p0.2.bax.h5
+/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/testLoadPulses/Analysis_Results/m130302_011223_pd1_c000000092559900001500000112311511_s1_p0.2.bax.h5
diff --git a/tests/data/reference_lambda.xml b/tests/data/reference_lambda.xml
new file mode 100644
index 0000000..288a913
--- /dev/null
+++ b/tests/data/reference_lambda.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" ?>
+<ReferenceSet CreatedAt="2015-07-16T10:48:02" MetaType="PacBio.DataSet.ReferenceSet" Name="" Tags="" UniqueId="1078368a-d5c5-433b-82b9-38ae124259c5" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+	<ExternalResources>
+		<ExternalResource MetaType="PacBio.ReferenceFile.ReferenceFastaFile" ResourceId="/pbi/dept/secondary/siv/references/lambdaNEB/sequence/lambdaNEB.fasta">
+			<FileIndices>
+				<FileIndex MetaType="PacBio.Index.SamIndex" ResourceId="/pbi/dept/secondary/siv/references/lambdaNEB/sequence/lambdaNEB.fasta.fai"/>
+			</FileIndices>
+		</ExternalResource>
+	</ExternalResources>
+	<DataSetMetadata>
+		<NumRecords>1</NumRecords>
+		<TotalLength>48502</TotalLength>
+	</DataSetMetadata>
+</ReferenceSet>
diff --git a/tests/data/subreads_dataset1.xml b/tests/data/subreads_dataset1.xml
new file mode 100644
index 0000000..82a9b6c
--- /dev/null
+++ b/tests/data/subreads_dataset1.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" ?>
+<AlignmentSet CreatedAt="2015-07-16T11:35:50" MetaType="PacBio.DataSet.AlignmentSet" Name="" Tags="" UniqueId="2e8eaedb-0928-2310-bffd-6d3b36062e53" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+	<ExternalResources>
+		<ExternalResource MetaType="PacBio.SubreadFile.SubreadBamFile" ResourceId="/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/test_bam/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.subreads.bam">
+			<FileIndices>
+				<FileIndex MetaType="PacBio.Index.PacBioIndex" ResourceId="/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/test_bam/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.subreads.bam.pbi"/>
+				<FileIndex MetaType="PacBio.Index.BamIndex" ResourceId="/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/test_bam/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.subreads.bam.bai"/>
+			</FileIndices>
+		</ExternalResource>
+		<ExternalResource MetaType="PacBio.SubreadFile.SubreadBamFile" ResourceId="/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/test_bam/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.1.subreads.bam">
+			<FileIndices>
+				<FileIndex MetaType="PacBio.Index.PacBioIndex" ResourceId="/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/test_bam/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.1.subreads.bam.pbi"/>
+				<FileIndex MetaType="PacBio.Index.BamIndex" ResourceId="/pbi/dept/secondary/siv/testdata/BlasrTestData/ctest/data/test_bam/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.1.subreads.bam.bai"/>
+			</FileIndices>
+		</ExternalResource>
+	</ExternalResources>
+	<DataSetMetadata>
+		<NumRecords>-1</NumRecords>
+		<TotalLength>-1</TotalLength>
+	</DataSetMetadata>
+</AlignmentSet>
diff --git a/tests/data/subreads_dataset2.xml b/tests/data/subreads_dataset2.xml
new file mode 100644
index 0000000..931dbef
--- /dev/null
+++ b/tests/data/subreads_dataset2.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" ?>
+<SubreadSet CreatedAt="2015-07-16T11:39:29" MetaType="PacBio.DataSet.SubreadSet" Name="" Tags="" UniqueId="a263117b-9f93-6ae5-9c11-7ac144df4ae5" Version="2.3.0" xmlns="http://pacificbiosciences.com/PacBioDataModel.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pacificbiosciences.com/PacBioDataModel.xsd">
+	<ExternalResources>
+        <ExternalResource MetaType="PacBio.SubreadFile.SubreadBamFile" ResourceId="/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/test_bam/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.1.subreads.bam"/>
+	</ExternalResources>
+	<DataSetMetadata>
+		<NumRecords>-1</NumRecords>
+		<TotalLength>-1</TotalLength>
+	</DataSetMetadata>
+</SubreadSet>
diff --git a/tests/data/test_bas.fofn b/tests/data/test_bas.fofn
index dd962ae..99b60ff 100644
--- a/tests/data/test_bas.fofn
+++ b/tests/data/test_bas.fofn
@@ -1 +1 @@
-/mnt/secondary-siv/testdata/BlasrTestData/pbalign/data/ccs_P4-C2/m130328_211423_ethan_c100499512550000001823070408081371_s1_p0.bas.h5
+/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/ccs_P4-C2/m130328_211423_ethan_c100499512550000001823070408081371_s1_p0.bas.h5
diff --git a/tests/data/test_ccs.fofn b/tests/data/test_ccs.fofn
index 1352ec4..a744657 100644
--- a/tests/data/test_ccs.fofn
+++ b/tests/data/test_ccs.fofn
@@ -1 +1 @@
-/mnt/secondary-siv/testdata/BlasrTestData/pbalign/data/new.ccs.h5
+/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/new.ccs.h5
diff --git a/tests/data/test_filterAdapterOnly.fofn b/tests/data/test_filterAdapterOnly.fofn
index 62697c8..826ce4e 100755
--- a/tests/data/test_filterAdapterOnly.fofn
+++ b/tests/data/test_filterAdapterOnly.fofn
@@ -1 +1 @@
-/mnt/data3/vol57/2820011/0006/Analysis_Results/m130302_124313_42130_c100502672550000001523078308081365_s1_p0.bas.h5
+/pbi/dept/secondary/siv/testdata/LIMS/2820011/0006/Analysis_Results/m130302_124313_42130_c100502672550000001523078308081365_s1_p0.bas.h5
diff --git a/tests/data/test_leftmost_query.fasta b/tests/data/test_leftmost_query.fasta
index 520bad1..e507cbd 100644
--- a/tests/data/test_leftmost_query.fasta
+++ b/tests/data/test_leftmost_query.fasta
@@ -1,4 +1,4 @@
->ref000001|gi|49175990|ref|NC_000913.2| Escherichia coli str. K-12 substr. MG1655 chromosome, complete genome
+>fakemovie/0/0_2940
 AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTC
 TGATAGCAGCTTCTGAACTGGTTACCTGCCGTGAGTAAATTAAAATTTTATTGACTTAGG
 TCACTAAATACTTTAACCAATATAGGCATAGCGCACAGACAGATAAAAATTACAGAGTAC
diff --git a/tests/data/test_pulseFile.fofn b/tests/data/test_pulseFile.fofn
index 7f4e3da..4adee68 100644
--- a/tests/data/test_pulseFile.fofn
+++ b/tests/data/test_pulseFile.fofn
@@ -1,4 +1,4 @@
-/mnt/data3/vol57/2650250/0001/Analysis_Results/m121004_000921_42130_c100440700060000001523060402151341_s1_p0.bas.h5
-/mnt/data3/vol53/2450496/0001/Analysis_Results/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.1.bax.h5
-/mnt/data3/vol53/2450496/0001/Analysis_Results/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.2.bax.h5
-/mnt/data3/vol53/2450496/0001/Analysis_Results/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.3.bax.h5
+/pbi/dept/secondary/siv/testdata/LIMS/2650250/0001/Analysis_Results/m121004_000921_42130_c100440700060000001523060402151341_s1_p0.bas.h5
+/pbi/dept/secondary/siv/testdata/LIMS/2450496/0001/Analysis_Results/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.1.bax.h5
+/pbi/dept/secondary/siv/testdata/LIMS/2450496/0001/Analysis_Results/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.2.bax.h5
+/pbi/dept/secondary/siv/testdata/LIMS/2450496/0001/Analysis_Results/m130406_011850_42141_c100513442550000001823074308221310_s1_p0.3.bax.h5
diff --git a/tests/out/readme b/tests/out/readme
index 8606a09..26ecd90 100644
--- a/tests/out/readme
+++ b/tests/out/readme
@@ -1,2 +1 @@
-# This directory saves output files generated by pbalign's 
-# unit tests and cram tests.
+FIXME this directory needs to be retired
diff --git a/tests/unit/nose.cfg b/tests/unit/nose.cfg
new file mode 100644
index 0000000..5ed666c
--- /dev/null
+++ b/tests/unit/nose.cfg
@@ -0,0 +1,17 @@
+# pbalign unittest config, copied from pbreports.
+
+[DEFAULT]
+
+[log]
+level=debug
+file=pbalign-unittest.log
+
+[data]
+dataDir=/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/
+stdDir=/pbi/dept/secondary/siv/testdata/pbalign-unittest/stdout/
+
+#This is for generating documentation of report attribute ids
+[doc]
+jinja-templates=doc/report-metrics
+html-dir=doc/_build/html
+
diff --git a/tests/unit/test_fileutil.py b/tests/unit/test_fileutil.py
index d6d89ff..cee5432 100755
--- a/tests/unit/test_fileutil.py
+++ b/tests/unit/test_fileutil.py
@@ -1,18 +1,31 @@
 """Test pbalign.util/fileutil.py"""
 
+import tempfile
 import unittest
+import filecmp
+import shutil
 from os import path
+
+from pbcore.io import DataSet
+
 from pbalign.utils.fileutil import getFileFormat, \
     isValidInputFormat, isValidOutputFormat, getFilesFromFOFN, \
     checkInputFile, checkOutputFile, checkReferencePath, \
     real_upath, real_ppath, isExist
-import filecmp
+
+from test_setpath import ROOT_DIR, DATA_DIR
 
 
 class Test_fileutil(unittest.TestCase):
     """Test pbalign.util/fileutil.py"""
     def setUp(self):
-        self.rootDir = path.dirname(path.dirname(path.abspath(__file__)))
+        self.rootDir = ROOT_DIR
+        self.outDir = tempfile.mkdtemp()
+        self.dataDir = path.join(DATA_DIR,
+                                 "testLoadPulses/Analysis_Results/")
+
+    def tearDown(self):
+        shutil.rmtree(self.outDir)
 
     def test_isValidInputFormat(self):
         """Test isValidInputFormat()."""
@@ -42,40 +55,43 @@ class Test_fileutil(unittest.TestCase):
 
     def test_getFilesFromFOFN(self):
         """Test getFilesFromFOFN()."""
-        fofnFN = "{0}/data/ecoli_lp.fofn".format(self.rootDir)
-        fns = ["/home/UNIXHOME/yli/yliWorkspace/private/yli/data" + \
-               "/testLoadPulses/m121215_065521_richard_c10042571" + \
-               "0150000001823055001121371_s1_p0.pls.h5",
-               "/home/UNIXHOME/yli/yliWorkspace/private/yli/data" + \
-               "/testLoadPulses/m121215_065521_richard_c10042571" + \
-               "0150000001823055001121371_s2_p0.pls.h5"]
+        fofnFN = path.join(self.rootDir, "data/ecoli_lp.fofn")
+        fns = [self.dataDir +
+               "m121215_065521_richard_c100425710150000001823055001121371_s1_p0.pls.h5",
+               self.dataDir +
+               "m121215_065521_richard_c100425710150000001823055001121371_s2_p0.pls.h5"]
         self.assertEqual(fns, getFilesFromFOFN(fofnFN))
 
     def test_checkInputFile(self):
         """Test checkInputFile()."""
-        fastaFN = "{0}/data/ecoli.fasta".format(self.rootDir)
-        plsFN = "/home/UNIXHOME/yli/yliWorkspace/private/yli/" + \
-                "data/testLoadPulses/m121215_065521_richard_" + \
-                "c100425710150000001823055001121371_s1_p0.pls.h5"
+        fastaFN = path.join(self.rootDir,  "data/ecoli.fasta")
+        plsFN = self.dataDir + \
+                "m121215_065521_richard_c100425710150000001823055001121371_s1_p0.pls.h5"
         self.assertTrue(filecmp.cmp(fastaFN, checkInputFile(fastaFN)))
         self.assertTrue(filecmp.cmp(plsFN, checkInputFile(plsFN)))
 
-        fofnFN = "{0}/data/ecoli_lp.fofn".format(self.rootDir)
+        fofnFN = path.join(self.rootDir,  "data/ecoli_lp.fofn")
         self.assertTrue(filecmp.cmp(fofnFN, checkInputFile(fofnFN)))
 
+        xmlFN = path.join(self.rootDir,  "data/subreads_dataset1.xml")
+        ret = checkInputFile(xmlFN)
+        self.assertTrue(ret.endswith('.xml'))
+        fs = DataSet(ret).toExternalFiles()
+        self.assertTrue(fs[0].endswith("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.subreads.bam"))
+        self.assertTrue(fs[1].endswith("m130406_011850_42141_c100513442550000001823074308221310_s1_p0.1.subreads.bam"))
+
 
     def test_checkOutputFile(self):
         """Test checkOutputFile()."""
-        samFN = "{0}/out/lambda_out.sam".format(self.rootDir)
-        cmpFN = "{0}/out/lambda_out.cmp.h5".format(self.rootDir)
+        samFN = path.join(self.outDir, "lambda_out.sam")
+        cmpFN = path.join(self.outDir, "lambda_out.cmp.h5")
         self.assertTrue(filecmp.cmp(samFN, checkOutputFile(samFN)))
         self.assertTrue(filecmp.cmp(cmpFN, checkOutputFile(cmpFN)))
 
 
     def test_checkReferencePath(self):
         """Test checkReferencePath()."""
-        refDir = "/mnt/secondary/Smrtanalysis/opt/smrtanalysis/common/" + \
-            "references"
+        refDir = "/pbi/dept/secondary/siv/references/"
         refPath = path.join(refDir, "lambda")
         refPath, refFastaOut, refSaOut, isWithinRepository, annotation = \
             checkReferencePath(refPath)
@@ -97,22 +113,27 @@ class Test_fileutil(unittest.TestCase):
                                               "sequence/lambda.fasta.sa")))
         self.assertTrue(isWithinRepository)
 
-
         fastaFN = "{0}/data/ecoli.fasta".format(self.rootDir)
+
         refpath, refFastaOut, refSaOut, isWithinRepository, annotation = \
                 checkReferencePath(fastaFN)
         self.assertTrue(filecmp.cmp(refpath, refFastaOut))
         self.assertIsNone(refSaOut)
         self.assertFalse(isWithinRepository)
 
-        refPathWithAnnotation = "/mnt/secondary-siv/" + \
-            "testdata/BlasrTestData/pbalign/data/references/H1_6_Scal_6x/"
+        refPathWithAnnotation = path.join(DATA_DIR, "references/H1_6_Scal_6x/")
         _refPath, _refFaOut, _refSaOut, _isWithinRepository, annotation = \
             checkReferencePath(refPathWithAnnotation)
         self.assertEqual(path.abspath(annotation),
             path.abspath(path.join(refPathWithAnnotation,
             "annotations/H1_6_Scal_6x_adapters.gff")))
 
+        xmlFN = path.join(self.rootDir,  "data/reference_lambda.xml")
+        refpath, refFastaOut, refSaOut, isWithinRepository, annotation = \
+                checkReferencePath(xmlFN)
+        self.assertTrue(filecmp.cmp(refFastaOut,
+            "/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/lambda_ref.fasta"))
+
     def test_isExist(self):
         """Test isExist(ff)."""
         self.assertFalse(isExist(None))
diff --git a/tests/unit/test_options.py b/tests/unit/test_options.py
index 6b5c8e5..4136212 100755
--- a/tests/unit/test_options.py
+++ b/tests/unit/test_options.py
@@ -1,26 +1,46 @@
-from pbalign.options import *
+
+import tempfile
 from argparse import *
 from os import path
+import os
 import filecmp
 import unittest
 
-rootDir = path.dirname(path.dirname(path.abspath(__file__)))
-configFile = path.join(rootDir, "data/2.config")
-configFile3 = path.join(rootDir, "data/3.config")
+from pbalign.options import *
+
+
+def get_argument_parser():
+    return get_contract_parser().arg_parser.parser
 
 def writeConfigFile(configFile, configOptions):
     """Write configs to a file."""
     with open (configFile, 'w') as f:
         f.write("\n".join(configOptions))
 
+def parseOptions(args):
+    return get_argument_parser().parse_args(args)
+
 class Test_Options(unittest.TestCase):
 
+    @classmethod
+    def setUpClass(cls):
+        cls.infiles = ["tmp_readfile.bam", "tmp_reffile.fasta"]
+        for file_name in cls.infiles:
+            open(file_name, "w").write("\n")
+        cls.configFile = tempfile.NamedTemporaryFile(suffix=".config").name
+        cls.configFile3 = tempfile.NamedTemporaryFile(suffix=".config").name
+
+    @classmethod
+    def tearDownClass(cls):
+        for file_name in cls.infiles:
+            os.remove(file_name)
+
     def test_importConfigOptions(self):
         """Test importConfigOptions()."""
         configOptions = ("--minAccuracy     = 40",
                          "--maxHits         = 20")
-        writeConfigFile(configFile, configOptions)
-        options = Namespace(configFile=configFile,
+        writeConfigFile(self.configFile, configOptions)
+        options = Namespace(configFile=self.configFile,
                             minAccuracy=10,
                             maxHits=12)
 
@@ -29,51 +49,48 @@ class Test_Options(unittest.TestCase):
         self.assertEqual(int(newOptions.maxHits),     20)
         self.assertEqual(int(newOptions.minAccuracy), 40)
 
-    def test_ConstructOptionParser(self):
-        """Test constructOptionParser()."""
-        ret = constructOptionParser()
-        self.assertEqual(type(ret), argparse.ArgumentParser)
+    def test_get_argument_parser(self):
+        """Test get_argument_parser()."""
+        ret = get_argument_parser()
+        self.assertTrue(isinstance(ret, argparse.ArgumentParser))
 
     def test_parseOptions(self):
-        """Test parseOptions()."""
+        """Test parseOptions with a config file."""
         configOptions = (
             "--maxHits       = 20",
             "--minAnchorSize = 15",
             "--minLength     = 100",
-            "--algorithmOptions = '-noSplitSubreads " + \
-            "-maxMatch 30 -nCandidates 30'",
+            "--algorithmOptions = '--noSplitSubreads " + \
+            "--maxMatch 30 --nCandidates 30'",
             "# Some comments",
-            "--scoreFunction = blasr",
+            #"--scoreFunction = blasr",
             "--hitPolicy     = random",
             "--maxDivergence = 40",
             "--debug")
-        writeConfigFile(configFile, configOptions)
-
-    def test_parseOptions_with_config(self):
-        """Test parseOptions with a config file."""
-        # With the above config file
-        argumentList = ['--configFile', configFile,
-                        '--maxHits', '30',
-                        '--minAccuracy', '50',
-                        'readfile', 'reffile', 'outfile']
-        parser, options, infoMsg = parseOptions(argumentList)
-
-        self.assertTrue(filecmp.cmp(options.configFile, configFile))
+        writeConfigFile(self.configFile, configOptions)
+        argumentList = [
+            '--configFile', self.configFile,
+            '--maxHits', '30',
+            '--minAccuracy', '50',
+        ] + self.infiles + ['outfile']
+        options = parseOptions(argumentList)
+
+        self.assertTrue(filecmp.cmp(options.configFile, self.configFile))
         self.assertEqual(int(options.maxHits),       30)
         self.assertEqual(int(options.minAccuracy),   50)
 
         self.assertEqual("".join(options.algorithmOptions),
-                         "-noSplitSubreads -maxMatch 30 -nCandidates 30")
-        self.assertEqual(options.scoreFunction,      "blasr")
+                         "--noSplitSubreads --maxMatch 30 --nCandidates 30")
+        #self.assertEqual(options.scoreFunction,      "blasr")
         self.assertEqual(options.hitPolicy,          "random")
         self.assertEqual(int(options.maxDivergence), 40)
 
     def test_parseOptions_without_config(self):
         """Test parseOptions without any config file."""
         argumentList = ['--maxHits=30',
-                        '--minAccuracy=50',
-                        'readfile', 'reffile', 'outfile']
-        parser, options,infoMsg = parseOptions(argumentList)
+                        '--minAccuracy=50'
+        ] + self.infiles + ['outfile']
+        options = parseOptions(argumentList)
 
         self.assertIsNone(options.configFile)
         self.assertEqual(int(options.maxHits),       30)
@@ -83,15 +100,15 @@ class Test_Options(unittest.TestCase):
 
     def test_parseOptions_multi_algorithmOptions(self):
         """Test parseOptions with multiple algorithmOptions."""
-        algo1 = " -holeNumbers 1"
-        algo2 = " -nCandidate 25"
-        algo3 = " ' -bestn 11 '"
-        argumentList = ['--algorithmOptions=%s' % algo1,
-                        '--algorithmOptions=%s' % algo2,
-                        'readfile', 'reffile', 'outfile']
-
+        algo1 = " --holeNumbers 1"
+        algo2 = " --nCandidate 25"
+        algo3 = " ' --bestn 11 '"
+        argumentList = [
+            '--algorithmOptions=%s' % algo1,
+            '--algorithmOptions=%s' % algo2,
+        ] + self.infiles + ["outfile"]
         print argumentList
-        parser, options, infoMsg = parseOptions(argumentList)
+        options = parseOptions(argumentList)
         # Both algo1 and algo2 should be in algorithmOptions.
         print options.algorithmOptions
         #self.assertTrue(algo1 in options.algorithmOptions)
@@ -99,11 +116,11 @@ class Test_Options(unittest.TestCase):
 
         # Construct a config file.
         configOptions = ("--algorithmOptions = \"%s\"" % algo3)
-        writeConfigFile(configFile3, [configOptions])
+        writeConfigFile(self.configFile3, [configOptions])
 
-        argumentList.append("--configFile={0}".format(configFile3))
+        argumentList.append("--configFile={0}".format(self.configFile3))
         print argumentList
-        parser, options, infoMsg = parseOptions(argumentList)
+        options = parseOptions(argumentList)
         # Make sure algo3 have been overwritten.
         print options.algorithmOptions
         self.assertTrue(algo1 in options.algorithmOptions)
@@ -114,16 +131,14 @@ class Test_Options(unittest.TestCase):
         """Test parseOptions without specifying maxHits and minAccuracy."""
         # Test if maxHits and minAccuracy are not set,
         # whether both options.minAnchorSize and maxHits are None
-        argumentList = ["--minAccuracy", "50",
-                        'readfile', 'reffile', 'outfile']
-        parser, options,infoMsg = parseOptions(argumentList)
+        argumentList = ["--minAccuracy", "50"] + self.infiles + ["outfile.bam"]
+        options = parseOptions(argumentList)
         self.assertIsNone(options.minAnchorSize)
         self.assertIsNone(options.maxHits)
 
     def test_importDefaultOptions(self):
         """Test importDefaultOptions"""
-        options = Namespace(configFile=configFile,
-                            minAccuracy=10,
+        options = Namespace(minAccuracy=10,
                             maxHits=12)
         defaultOptions = {"minAccuracy":30, "maxHits":14}
         newOptions, infoMsg = importDefaultOptions(options, defaultOptions)
diff --git a/tests/unit/test_pbalign.py b/tests/unit/test_pbalign.py
index 369ae79..dca8cfc 100755
--- a/tests/unit/test_pbalign.py
+++ b/tests/unit/test_pbalign.py
@@ -1,40 +1,47 @@
+
 import unittest
+import tempfile
+import shutil
 from os import path
+
 from pbalign.pbalignrunner import PBAlignRunner
 
+from test_setpath import ROOT_DIR, DATA_DIR
+
 class Test_PBAlignRunner(unittest.TestCase):
     def setUp(self):
-        self.rootDir = path.dirname(path.dirname(path.abspath(__file__)))
-        self.queryFile = path.join(self.rootDir, "data/lambda_query.fasta")
-        self.referenceFile = "/mnt/secondary/Smrtanalysis/opt/" + \
-                             "smrtanalysis/common/references/" + \
-                             "lambda/sequence/lambda.fasta"
+        self.rootDir = ROOT_DIR
+        self.queryFile = path.join(self.rootDir, "data/subreads_dataset1.xml")
+        self.referenceFile = path.join(self.rootDir, "data/reference_lambda.xml")
         self.configFile = path.join(self.rootDir, "data/1.config")
-        self.samOut = path.join(self.rootDir, "out/lambda_out.sam")
-        self.cmph5Out = path.join(self.rootDir, "out/lambda_out.cmp.h5")
+        self.OUT_DIR = tempfile.mkdtemp()
+        self.bamOut = path.join(self.OUT_DIR, "lambda_out.bam")
+
+    def tearDown(self):
+        shutil.rmtree(self.OUT_DIR)
 
     def test_init(self):
         """Test PBAlignRunner.__init__()."""
         argumentList = ['--minAccuracy', '70', '--maxDivergence', '30',
                         self.queryFile, self.referenceFile,
-                        self.samOut]
+                        self.bamOut]
         pbobj = PBAlignRunner(argumentList = argumentList)
         self.assertEqual(pbobj.start(), 0)
 
     def test_init_with_algorithmOptions(self):
         """Test PBAlignRunner.__init__() with --algorithmOptions."""
-        argumentList = ['--algorithmOptions', '-minMatch 10 -useccsall',
+        argumentList = ['--algorithmOptions', '--minMatch 10 --useccsall',
                         self.queryFile, self.referenceFile,
-                        self.cmph5Out]
+                        self.bamOut]
         pbobj = PBAlignRunner(argumentList = argumentList)
         self.assertEqual(pbobj.start(), 0)
 
     def test_init_with_config_algorithmOptions(self):
         """Test PBAlignRunner.__init__() with a config file and --algorithmOptions."""
-        argumentList = ['--algorithmOptions', '-maxMatch 20 -nCandidates 30',
+        argumentList = ['--algorithmOptions', '--maxMatch 20 --nCandidates 30',
                         '--configFile', self.configFile,
                         self.queryFile, self.referenceFile,
-                        self.cmph5Out]
+                        self.bamOut]
 
         pbobj = PBAlignRunner(argumentList = argumentList)
         self.assertEqual(pbobj.start(), 0)
@@ -42,13 +49,13 @@ class Test_PBAlignRunner(unittest.TestCase):
     def test_init_expect_conflicting_options(self):
         """Test PBAlignRunner.__init__() with a config file and --algorithmOptions
         and expect a ValueError for conflicting options."""
-        argumentList = ['--algorithmOptions', '-minMatch 10 -useccsall',
+        argumentList = ['--algorithmOptions', '--minMatch 10 --useccsall',
                         '--configFile', self.configFile,
                         self.queryFile, self.referenceFile,
-                        self.cmph5Out]
+                        self.bamOut]
         pbobj = PBAlignRunner(argumentList = argumentList)
         with self.assertRaises(ValueError) as cm:
-            # Expect a ValueError since -minMatch and --minAnchorSize conflicts.
+            # Expect a ValueError since --minMatch and --minAnchorSize conflicts.
             pbobj.start()
 
 
diff --git a/tests/unit/test_pbalignfiles.py b/tests/unit/test_pbalignfiles.py
index 30a241b..34c999b 100755
--- a/tests/unit/test_pbalignfiles.py
+++ b/tests/unit/test_pbalignfiles.py
@@ -1,18 +1,27 @@
+
 import unittest
+import tempfile
 import filecmp
+import shutil
 from os import path
+
 from pbalign.pbalignfiles import PBAlignFiles
 
+from test_setpath import ROOT_DIR
+
 class Test_PbAlignFiles_Ecoli(unittest.TestCase):
     def setUp(self):
-        self.rootDir = path.dirname(path.dirname(path.abspath(__file__)))
+        self.rootDir = ROOT_DIR
         self.inputFileName    = path.join(self.rootDir, "data/ecoli.fasta")
-        self.referencePath    = "/mnt/secondary/Smrtanalysis/opt/" + \
-                                "smrtanalysis/common/references/ecoli_K12_MG1655/"
+        self.referencePath    = "/pbi/dept/secondary/siv/references/ecoli_k12_MG1655/"
         self.targetFileName   = path.join(self.referencePath,
-                                          "sequence/ecoli_K12_MG1655.fasta")
+                                          "sequence/ecoli_k12_MG1655.fasta")
         self.sawriterFileName = self.targetFileName + ".sa"
-        self.outputFileName   = path.join(self.rootDir, "out/tmp.sam")
+        self.OUT_DIR = tempfile.mkdtemp()
+        self.outputFileName   = path.join(self.OUT_DIR, "tmp.sam")
+
+    def tearDown(self):
+        shutil.rmtree(self.OUT_DIR)
 
     def test_init(self):
         """Test PBAlignFiles.__init__() with a reference repository."""
@@ -30,12 +39,14 @@ class Test_PbAlignFiles_Ecoli(unittest.TestCase):
 
 class Test_PbAlignFiles(unittest.TestCase):
     def setUp(self):
-        self.rootDir = path.dirname(path.dirname(path.abspath(__file__)))
+        self.rootDir = ROOT_DIR
         self.inputFileName = path.join(self.rootDir, "data/lambda_bax.fofn")
-        self.referenceFile = "/mnt/secondary/Smrtanalysis/opt/" + \
-                             "smrtanalysis/common/references/" + \
-                             "lambda/sequence/lambda.fasta"
-        self.outputFileName = path.join(self.rootDir, "out/tmp.sam")
+        self.referenceFile = "/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta"
+        self.OUT_DIR = tempfile.mkdtemp()
+        self.outputFileName = path.join(self.OUT_DIR, "tmp.sam")
+
+    def tearDown(self):
+        shutil.rmtree(self.OUT_DIR)
 
     def test_init(self):
         """Test PBAlignFiles.__init__()."""
diff --git a/tests/unit/test_referenceInfo.py b/tests/unit/test_referenceInfo.py
index c6f7fc2..9712ef8 100755
--- a/tests/unit/test_referenceInfo.py
+++ b/tests/unit/test_referenceInfo.py
@@ -5,8 +5,7 @@ class Test_ReferenceInfo(unittest.TestCase):
 
     def test_init(self):
         """Test ReferenceInfo.__init__() with a valid reference.info.xml."""
-        rootDir = "/mnt/secondary/Smrtanalysis/opt/" + \
-                  "smrtanalysis/common/references/lambda/"
+        rootDir = "/pbi/dept/secondary/siv/references/lambda/"
         r = ReferenceInfo(rootDir + "reference.info.xml")
         self.assertEqual(r.refFastaFile, rootDir + "sequence/lambda.fasta")
         self.assertEqual(r.refSawriterFile, rootDir + "sequence/lambda.fasta.sa")
diff --git a/tests/unit/test_setpath.py b/tests/unit/test_setpath.py
new file mode 100755
index 0000000..fd71f17
--- /dev/null
+++ b/tests/unit/test_setpath.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+from os import path
+import ConfigParser
+
+"""Define test data path for pbalign."""
+
+THIS_DIR = path.dirname(path.abspath(__file__))
+ROOT_DIR = path.dirname(THIS_DIR)
+NOSE_CFG = path.join(THIS_DIR, "nose.cfg")
+
+def _get_data_std_dir():
+    """Get the data directory which contains all the unittests files.
+    """
+    nosecfg = ConfigParser.SafeConfigParser()
+    nosecfg.readfp(open(NOSE_CFG), 'r')
+    if nosecfg.has_section('data'):
+        data_dir = path.abspath(nosecfg.get('data', 'dataDir'))
+        std_dir = path.abspath(nosecfg.get('data', 'stdDir'))
+        return data_dir, std_dir
+    else:
+        msg = "Unable to find section [DATA] option [dataDir]" + \
+              "and [stdDir] in config file {f}.".format(f=NOSE_CFG)
+        raise KeyError(msg)
+
+DATA_DIR, STD_DIR = _get_data_std_dir()
diff --git a/tests/unit/test_tool_contract.py b/tests/unit/test_tool_contract.py
new file mode 100755
index 0000000..0a41d2a
--- /dev/null
+++ b/tests/unit/test_tool_contract.py
@@ -0,0 +1,91 @@
+
+import subprocess
+import tempfile
+import unittest
+import os.path
+import sys
+
+import pbcommand.testkit
+from pbcore.io import AlignmentSet, ConsensusAlignmentSet, openDataSet
+
+import pbtestdata
+
+DATA_DIR = "/pbi/dept/secondary/siv/testdata/SA3-DS"
+DATA2 = "/pbi/dept/secondary/siv/testdata/pbalign-unittest2/data"
+REF_DIR = "/pbi/dept/secondary/siv/references"
+
+
+class TestPbalign(pbcommand.testkit.PbTestApp):
+    DRIVER_BASE = "pbalign "
+    REQUIRES_PBCORE = True
+    INPUT_FILES = [
+        pbtestdata.get_file("subreads-xml"),
+        pbtestdata.get_file("lambdaNEB")
+    ]
+    TASK_OPTIONS = {
+        "pbalign.task_options.algorithm_options": "--holeNumbers 1-1000,30000-30500,60000-60600,100000-100500",
+    }
+
+    def run_after(self, rtc, output_dir):
+        ds_out = openDataSet(rtc.task.output_files[0])
+        self.assertTrue(isinstance(ds_out, AlignmentSet),
+                        type(ds_out).__name__)
+
+
+class TestPbalignCCS(pbcommand.testkit.PbTestApp):
+    DRIVER_BASE = "python -m pbalign.ccs"
+    INPUT_FILES = [
+        pbtestdata.get_file("rsii-ccs"),
+        pbtestdata.get_file("lambdaNEB")
+    ]
+
+    def run_after(self, rtc, output_dir):
+        ds_out = openDataSet(rtc.task.output_files[0])
+        self.assertTrue(isinstance(ds_out, ConsensusAlignmentSet),
+                        type(ds_out).__name__)
+
+
+HAVE_BAMTOOLS = False
+try:
+    with tempfile.TemporaryFile() as O, \
+         tempfile.TemporaryFile() as E:
+        assert subprocess.call(["bamtools", "--help"], stdout=O, stderr=E) == 0
+except Exception as e:
+    sys.stderr.write(str(e)+"\n")
+    sys.stderr.write("bamtools missing, skipping test\n")
+else:
+    HAVE_BAMTOOLS = True
+
+
+ at unittest.skipUnless(HAVE_BAMTOOLS, "bamtools not installed")
+class TestConsolidateBam(pbcommand.testkit.PbTestApp):
+    DRIVER_BASE = "python -m pbalign.tasks.consolidate_alignments"
+    INPUT_FILES = [pbtestdata.get_file("aligned-ds-2")]
+    TASK_OPTIONS = {
+        "pbalign.task_options.consolidate_aligned_bam": True,
+    }
+
+    def run_after(self, rtc, output_dir):
+        with AlignmentSet(rtc.task.output_files[0]) as f:
+            f.assertIndexed()
+            self.assertEqual(len(f.toExternalFiles()), 1)
+            # test for bug 33778
+            qnames = set()
+            for rec in f:
+                qnames.add(rec.qName)
+            self.assertEqual(len(qnames), len(f))
+
+
+ at unittest.skipUnless(HAVE_BAMTOOLS, "bamtools not installed")
+class TestConsolidateBamDisabled(TestConsolidateBam):
+    TASK_OPTIONS = {
+        "pbalign.task_options.consolidate_aligned_bam": False,
+    }
+
+    def run_after(self, rtc, output_dir):
+        with AlignmentSet(rtc.task.output_files[0]) as f:
+            self.assertEqual(len(f.toExternalFiles()), 2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/test_unrolled.py b/tests/unit/test_unrolled.py
new file mode 100755
index 0000000..dbfdc94
--- /dev/null
+++ b/tests/unit/test_unrolled.py
@@ -0,0 +1,87 @@
+
+"""
+Test unrolled alignment support for SubreadSet input.  The output should be
+identical to the same command using pseudo-polymerase reads reconstructed by
+bam2bam as input.
+"""
+
+import subprocess
+import tempfile
+import unittest
+import os.path as op
+
+from pbcore.io import AlignmentSet
+import pbcommand.testkit
+
+DATA = "/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/unrolled"
+REFERENCE = "/pbi/analysis/smrtportal/internal/userdata/references/All4mer_V2_11_V2_13_V2_15_V2_44_circular_72x_l50256/sequence/All4mer_V2_11_V2_13_V2_15_V2_44_circular_72x_l50256.fasta"
+BASE_ARGS = [
+    "pbalign",
+    "--nproc", "8",
+    "--hitPolicy=leftmost",
+    "--algorithmOptions", "--bestn 1 --forwardOnly --fastMaxInterval --maxAnchorsPerPosition 30000 --ignoreHQRegions --minPctIdentity 60",
+]
+
+skip_unless_files_present = unittest.skipUnless(
+    op.isdir(DATA) and op.isfile(REFERENCE),
+    "missing {d} or {r}".format(r=REFERENCE, d=DATA))
+
+def _verify_alignment(self, aln_file):
+    with AlignmentSet(aln_file) as ds:
+        self.assertEqual(len(ds), 1)
+        bam = ds.resourceReaders()[0]
+        qlen = bam[0].qEnd - bam[0].qStart
+        alen = bam[0].aEnd - bam[0].aStart
+        # length of unrolled polymerase read (unaligned)
+        self.assertEqual(qlen, 22395)
+        self.assertTrue(alen > 21000,
+                        "alignment length is only {l}".format(l=alen))
+
+
+ at skip_unless_files_present
+class TestUnrolledBAM(unittest.TestCase):
+
+    def _run_args(self, args, aln_file):
+        self.assertEqual(subprocess.call(args), 0)
+        _verify_alignment(self, aln_file)
+
+    def test_polymerase_bam(self):
+        aln_file = tempfile.NamedTemporaryFile(suffix=".polymerase.bam").name
+        args = BASE_ARGS + [
+            op.join(DATA, "m54006_151021_185942.polymerase.bam"),
+            REFERENCE,
+            aln_file,
+        ]
+        self._run_args(args, aln_file)
+
+    def test_subreadset(self):
+        aln_file = tempfile.NamedTemporaryFile(suffix=".unrolled.bam").name
+        args = BASE_ARGS + [
+            "--noSplitSubreads",
+            op.join(DATA, "m54006_151021_185942.subreadset.xml"),
+            REFERENCE,
+            aln_file,
+        ]
+        self._run_args(args, aln_file)
+
+
+ at skip_unless_files_present
+class TestToolContract(pbcommand.testkit.PbTestApp): 
+    DRIVER_BASE = "pbalign"
+    INPUT_FILES = [
+        op.join(DATA, "m54006_151021_185942.subreadset.xml"),
+        REFERENCE
+    ]
+    TASK_OPTIONS = {
+        "pbalign.task_options.no_split_subreads": True,
+        "pbalign.task_options.hit_policy": "leftmost",
+        "pbalign.task_options.concordant": False,
+        "pbalign.task_options.algorithm_options": "--bestn 1 --forwardOnly --fastMaxInterval --maxAnchorsPerPosition 30000 --ignoreHQRegions --minPctIdentity 60"
+    }
+
+    def run_after(self, rtc, output_dir):
+        _verify_alignment(self, rtc.task.output_files[0])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit/test_xmlout.py b/tests/unit/test_xmlout.py
new file mode 100755
index 0000000..25c81be
--- /dev/null
+++ b/tests/unit/test_xmlout.py
@@ -0,0 +1,38 @@
+
+import unittest
+import tempfile
+import shutil
+from os import path
+
+from pbcore.io import AlignmentSet, ReferenceSet
+
+from pbalign.pbalignrunner import PBAlignRunner
+
+from test_setpath import ROOT_DIR
+
+class Test_PBAlignRunner(unittest.TestCase):
+    def setUp(self):
+        self.rootDir = ROOT_DIR
+        self.queryFile = path.join(self.rootDir, "data/subreads_dataset1.xml")
+        self.referenceFile = path.join(self.rootDir, "data/reference_lambda.xml")
+        self.configFile = path.join(self.rootDir, "data/1.config")
+        self.OUT_DIR = tempfile.mkdtemp()
+        self.bamOut = path.join(self.OUT_DIR, "lambda_out.bam")
+        self.xmlOut = path.join(self.OUT_DIR, "lambda_out.xml")
+
+    def tearDown(self):
+        shutil.rmtree(self.OUT_DIR)
+
+    def test_init_xml(self):
+        """Test PBAlignRunner.__init__() to XML."""
+        argumentList = ['--minAccuracy', '70', '--maxDivergence', '30',
+                        self.queryFile, self.referenceFile,
+                        self.xmlOut]
+        pbobj = PBAlignRunner(argumentList=argumentList)
+        self.assertEqual(pbobj.start(), 0)
+        aln = AlignmentSet(self.xmlOut)
+        self.assertEqual(aln.externalResources[0].reference,
+                         ReferenceSet(self.referenceFile).toExternalFiles()[0])
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/unit_h5/nose.cfg b/tests/unit_h5/nose.cfg
new file mode 100644
index 0000000..5ed666c
--- /dev/null
+++ b/tests/unit_h5/nose.cfg
@@ -0,0 +1,17 @@
+# pbalign unittest config, copied from pbreports.
+
+[DEFAULT]
+
+[log]
+level=debug
+file=pbalign-unittest.log
+
+[data]
+dataDir=/pbi/dept/secondary/siv/testdata/pbalign-unittest/data/
+stdDir=/pbi/dept/secondary/siv/testdata/pbalign-unittest/stdout/
+
+#This is for generating documentation of report attribute ids
+[doc]
+jinja-templates=doc/report-metrics
+html-dir=doc/_build/html
+
diff --git a/tests/unit/test_filterservice.py b/tests/unit_h5/test_filterservice.py
similarity index 85%
rename from tests/unit/test_filterservice.py
rename to tests/unit_h5/test_filterservice.py
index 02aaca4..c754281 100755
--- a/tests/unit/test_filterservice.py
+++ b/tests/unit_h5/test_filterservice.py
@@ -2,6 +2,7 @@
 from pbalign.filterservice import FilterService
 from os import path
 import unittest
+from test_setpath import ROOT_DIR, DATA_DIR, OUT_DIR, STD_DIR
 
 class Opt(object):
     """The Option class."""
@@ -18,14 +19,12 @@ class Opt(object):
 class Test_FilterService(unittest.TestCase):
     """Test pbalign.filterservice."""
     def setUp(self):
-        self.testDir = path.dirname(path.dirname(path.abspath(__file__)))
+        self.testDir = ROOT_DIR
         self.alignedSam    = path.join(self.testDir,
                                        "data/lambda.sam")
-        self.targetFileName = "/mnt/secondary/Smrtanalysis/opt/" + \
-                               "smrtanalysis/common/references/" + \
-                               "lambda/sequence/lambda.fasta"
-        self.filteredSam   = path.join(self.testDir,
-                                       "out/lambda_filtered.sam")
+        self.targetFileName = "/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta"
+        self.filteredSam   = path.join(OUT_DIR,
+                                       "lambda_filtered.sam")
 
     def test_init(self):
         """Test FilterService.__init__()."""
diff --git a/tests/unit/test_forquiverservice.py b/tests/unit_h5/test_forquiverservice.py
similarity index 72%
rename from tests/unit/test_forquiverservice.py
rename to tests/unit_h5/test_forquiverservice.py
index ccaf9d2..ac407eb 100755
--- a/tests/unit/test_forquiverservice.py
+++ b/tests/unit_h5/test_forquiverservice.py
@@ -5,6 +5,7 @@ from shutil import copyfile
 from pbalign.forquiverservice.forquiver import ForQuiverService
 from pbalign.pbalignfiles import PBAlignFiles
 from tempfile import mkstemp
+from test_setpath import DATA_DIR
 
 class Opt(object):
     """Simulate PBAlign options."""
@@ -18,17 +19,13 @@ class Opt(object):
 class Test_ForQuiverService(unittest.TestCase):
     """Test pbalign.forquiverservice.forquiver."""
     def setUp(self):
-        self.rootDir = "/mnt/secondary-siv/" + \
-                       "testdata/BlasrTestData/pbalign"
-        self.inCmpFile = path.join(self.rootDir, "data/testforquiver.cmp.h5")
-        #self.outCmpFile = path.join(self.rootDir, "out/testforquiver.cmp.h5")
+        self.inCmpFile = path.join(DATA_DIR, "testforquiver.cmp.h5")
         self.outCmpFile = mkstemp(suffix=".cmp.h5")[1]
 
         copyfile(self.inCmpFile, self.outCmpFile)
-        self.basFile = path.join(self.rootDir, "data/lambda_bax.fofn")
+        self.basFile = path.join(DATA_DIR, "lambda_bax.fofn")
 
-        refpath = "/mnt/secondary/Smrtanalysis/opt/" + \
-                  "smrtanalysis/common/references/lambda/"
+        refpath = "/pbi/dept/secondary/siv/references/lambda/"
 
         self.fileNames = PBAlignFiles()
         self.fileNames.SetInOutFiles(self.basFile, refpath,
diff --git a/tests/unit/test_gmap.py b/tests/unit_h5/test_gmap.py
similarity index 90%
rename from tests/unit/test_gmap.py
rename to tests/unit_h5/test_gmap.py
index 68c5013..4155b5c 100755
--- a/tests/unit/test_gmap.py
+++ b/tests/unit_h5/test_gmap.py
@@ -5,20 +5,20 @@ from pbalign.alignservice.gmap import GMAPService
 from pbalign.pbalignfiles import PBAlignFiles
 from pbalign.options import parseOptions
 import argparse
+from test_setpath import ROOT_DIR, OUT_DIR
 
 
 class Test_GMAPService(unittest.TestCase):
     """Test pbalign.alignservices.gmap."""
     def setUp(self):
         """Set up test data."""
-        self.rootDir = path.dirname(path.dirname(path.abspath(__file__)))
-        self.outDir = path.join(self.rootDir, "out/")
+        self.rootDir = ROOT_DIR
+        self.outDir = OUT_DIR
         self.outSam = path.join(self.outDir, "test_gmap_01.sam")
         self.dataDir = path.join(self.rootDir, "data/")
         self.queryFofn = path.join(self.dataDir, "ecoli_lp.fofn")
         self.refFa = path.join(self.dataDir, "ecoli.fasta")
-        self.repoPath = "/mnt/secondary/Smrtanalysis/opt/smrtanalysis/" + \
-                        "common/references/ecoli/"
+        self.repoPath = "/pbi/dept/secondary/siv/references/ecoli/"
 
     def test_gmapCreateDB_case1(self):
         """Test _gmapCreateDB(refFile, isWithinRepository, tempRootDir).
diff --git a/tests/unit/test_loadpulsesservice.py b/tests/unit_h5/test_loadpulsesservice.py
similarity index 73%
rename from tests/unit/test_loadpulsesservice.py
rename to tests/unit_h5/test_loadpulsesservice.py
index 6a094ba..a78ee8c 100755
--- a/tests/unit/test_loadpulsesservice.py
+++ b/tests/unit_h5/test_loadpulsesservice.py
@@ -5,19 +5,17 @@ from shutil import copyfile
 from pbalign.forquiverservice.loadpulses import LoadPulsesService
 from argparse import Namespace
 from tempfile import mkstemp
+from test_setpath import ROOT_DIR, DATA_DIR
 
 
 class Test_LoadPulsesService(unittest.TestCase):
     """Test pbalign.forquiverservice.loadpulses."""
     def setUp(self):
         """Set up tests."""
-        self.rootDir = "/mnt/secondary-siv/" + \
-                       "testdata/BlasrTestData/pbalign"
-        self.inCmpFile = path.join(self.rootDir, "data/testloadpulses.cmp.h5")
-        #self.outCmpFile = path.join(self.rootDir, "out/testloadpulses.cmp.h5")
+        self.inCmpFile = path.join(DATA_DIR, "testloadpulses.cmp.h5")
         self.outCmpFile = mkstemp(suffix=".cmp.h5")[1]
 
-        self.basFile = path.join(self.rootDir, "data/lambda_bax.fofn")
+        self.basFile = path.join(ROOT_DIR, "data/lambda_bax.fofn")
         copyfile(self.inCmpFile, self.outCmpFile)
         self.options = Namespace(metrics="DeletionQV", byread=False)
         self.obj = LoadPulsesService(self.basFile,
diff --git a/tests/unit/test_pbalign.py b/tests/unit_h5/test_pbalign.py
similarity index 84%
copy from tests/unit/test_pbalign.py
copy to tests/unit_h5/test_pbalign.py
index 369ae79..eef6262 100755
--- a/tests/unit/test_pbalign.py
+++ b/tests/unit_h5/test_pbalign.py
@@ -1,17 +1,16 @@
 import unittest
 from os import path
 from pbalign.pbalignrunner import PBAlignRunner
+from test_setpath import ROOT_DIR, DATA_DIR, OUT_DIR
 
 class Test_PBAlignRunner(unittest.TestCase):
     def setUp(self):
-        self.rootDir = path.dirname(path.dirname(path.abspath(__file__)))
+        self.rootDir = ROOT_DIR
         self.queryFile = path.join(self.rootDir, "data/lambda_query.fasta")
-        self.referenceFile = "/mnt/secondary/Smrtanalysis/opt/" + \
-                             "smrtanalysis/common/references/" + \
-                             "lambda/sequence/lambda.fasta"
+        self.referenceFile = "/pbi/dept/secondary/siv/references/lambda/sequence/lambda.fasta"
         self.configFile = path.join(self.rootDir, "data/1.config")
-        self.samOut = path.join(self.rootDir, "out/lambda_out.sam")
-        self.cmph5Out = path.join(self.rootDir, "out/lambda_out.cmp.h5")
+        self.samOut = path.join(OUT_DIR, "lambda_out.sam")
+        self.cmph5Out = path.join(OUT_DIR, "lambda_out.cmp.h5")
 
     def test_init(self):
         """Test PBAlignRunner.__init__()."""
diff --git a/tests/unit/test_repackservice.py b/tests/unit_h5/test_repackservice.py
similarity index 72%
rename from tests/unit/test_repackservice.py
rename to tests/unit_h5/test_repackservice.py
index 19d9555..0050c8f 100755
--- a/tests/unit/test_repackservice.py
+++ b/tests/unit_h5/test_repackservice.py
@@ -4,20 +4,17 @@ from os import path, remove
 from shutil import copyfile
 from pbalign.forquiverservice.repack import RepackService
 from tempfile import mkstemp
+from test_setpath import DATA_DIR
 
 
 class Test_RepackService(unittest.TestCase):
     """Test pbalign.forquiverservice.repack."""
     def setUp(self):
         """Set up the tests."""
-        self.rootDir = "/mnt/secondary-siv/" + \
-                       "testdata/BlasrTestData/pbalign"
-        self.inCmpFile = path.join(self.rootDir, "data/testrepack.cmp.h5")
-        #self.outCmpFile = path.join(self.rootDir, "out/testrepack.cmp.h5")
+        self.inCmpFile = path.join(DATA_DIR, "testrepack.cmp.h5")
         self.outCmpFile = mkstemp(suffix=".cmp.h5")[1]
         self.tmpCmpFile = self.outCmpFile + ".tmp"
 
-        #self.tmpCmpFile = path.join(self.rootDir, "out/testrepack.cmp.h5.tmp")
         copyfile(self.inCmpFile, self.outCmpFile)
         self.options = {}
         self.obj = RepackService(self.outCmpFile, self.tmpCmpFile)
diff --git a/tests/unit/test_rgnh5io.py b/tests/unit_h5/test_rgnh5io.py
similarity index 92%
rename from tests/unit/test_rgnh5io.py
rename to tests/unit_h5/test_rgnh5io.py
index 6083bb2..c09208f 100755
--- a/tests/unit/test_rgnh5io.py
+++ b/tests/unit_h5/test_rgnh5io.py
@@ -6,6 +6,7 @@ from pbalign.utils.RgnH5IO import Region, RegionTable, RgnH5Reader, \
     RgnH5Writer, addStrListAttr
 from os import path
 import h5py
+from test_setpath import ROOT_DIR, DATA_DIR, OUT_DIR
 
 
 class Test_RgnH5IO(unittest.TestCase):
@@ -13,11 +14,10 @@ class Test_RgnH5IO(unittest.TestCase):
 
     def setUp(self):
         """Set up test data."""
-        self.rootDir = path.dirname(path.dirname(path.abspath(__file__)))
-        self.inRgnFN = "/mnt/secondary-siv/testdata/" + \
-            "BlasrTestData/pbalign/data/test_rgnh5io.rgn.h5"
-        self.outRgnFN = path.join(self.rootDir, "out/test_rgnh5io_out.rgn.h5")
-        self.outTmpFN = path.join(self.rootDir, "out/test_rgnh5io_tmp.h5")
+        self.rootDir = ROOT_DIR
+        self.inRgnFN = path.join(DATA_DIR, "test_rgnh5io.rgn.h5")
+        self.outRgnFN = path.join(OUT_DIR, "test_rgnh5io_out.rgn.h5")
+        self.outTmpFN = path.join(OUT_DIR, "test_rgnh5io_tmp.h5")
         self.movieName = "m130427_152935_42178_" + \
             "c100518602550000001823079209281316_s1_p0"
 
diff --git a/tests/unit_h5/test_setpath.py b/tests/unit_h5/test_setpath.py
new file mode 100755
index 0000000..c7c9e67
--- /dev/null
+++ b/tests/unit_h5/test_setpath.py
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+from os import path
+import ConfigParser
+
+"""Define test data path for pbalign."""
+
+THIS_DIR = path.dirname(path.abspath(__file__))
+ROOT_DIR = path.dirname(THIS_DIR)
+NOSE_CFG = path.join(THIS_DIR, "nose.cfg")
+
+def _get_data_std_dir():
+    """Get the data directory which contains all the unittests files.
+    """
+    nosecfg = ConfigParser.SafeConfigParser()
+    nosecfg.readfp(open(NOSE_CFG), 'r')
+    if nosecfg.has_section('data'):
+        data_dir = path.abspath(nosecfg.get('data', 'dataDir'))
+        std_dir = path.abspath(nosecfg.get('data', 'stdDir'))
+        return data_dir, std_dir
+    else:
+        msg = "Unable to find section [DATA] option [dataDir]" + \
+              "and [stdDir] in config file {f}.".format(f=NOSE_CFG)
+        raise KeyError(msg)
+
+OUT_DIR =  path.join(ROOT_DIR, "out")
+DATA_DIR, STD_DIR = _get_data_std_dir()
+
diff --git a/tests/unit/test_sortservice.py b/tests/unit_h5/test_sortservice.py
similarity index 84%
rename from tests/unit/test_sortservice.py
rename to tests/unit_h5/test_sortservice.py
index dad10a4..0eb27c3 100755
--- a/tests/unit/test_sortservice.py
+++ b/tests/unit_h5/test_sortservice.py
@@ -4,6 +4,7 @@ from os import path, remove
 from shutil import copyfile
 from pbalign.forquiverservice.sort import SortService
 from tempfile import mkstemp
+from test_setpath import DATA_DIR
 
 
 class Opt(object):
@@ -16,9 +17,7 @@ class Test_SortService(unittest.TestCase):
     """Test pbalign.forquiverservice.sort."""
     def setUp(self):
         """Set up tests."""
-        self.rootDir = "/mnt/secondary-siv/" + \
-                       "testdata/BlasrTestData/pbalign"
-        self.inCmpFile = path.join(self.rootDir, "data/testsort.cmp.h5")
+        self.inCmpFile = path.join(DATA_DIR, "testsort.cmp.h5")
         self.outCmpFile = mkstemp(suffix=".cmp.h5")[1]
 
         copyfile(self.inCmpFile, self.outCmpFile)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbalign.git