[med-svn] [pbgenomicconsensus] 01/03: Imported Upstream version 2.0.0+20151210

Wed Feb 3 08:56:20 UTC 2016

This is an automated email from the git hooks/post-receive script.

afif-guest pushed a commit to branch master
in repository pbgenomicconsensus.

commit da58626ead20f88776bbbbc16823eb667134ea85
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Wed Feb 3 00:51:14 2016 -0800

    Imported Upstream version 2.0.0+20151210
---
 GenomicConsensus/ResultCollector.py               |   6 +-
 GenomicConsensus/Worker.py                        |   2 +-
 GenomicConsensus/consensus.py                     |   6 +-
 GenomicConsensus/io/utils.py                      |   3 +-
 GenomicConsensus/main.py                          | 121 ++++++---
 GenomicConsensus/options.py                       | 145 +++++++----
 GenomicConsensus/plurality/plurality.py           |   2 +-
 GenomicConsensus/quiver/model.py                  |   2 +-
 GenomicConsensus/quiver/quiver.py                 |   7 +-
 GenomicConsensus/quiver/utils.py                  |   2 +-
 GenomicConsensus/reference.py                     |  53 ++--
 GenomicConsensus/utils.py                         |  61 ++++-
 README.md                                         |  24 +-
 bin/gffToBed                                      | 209 ++++++++++++++++
 bin/gffToBed.py                                   | 141 -----------
 bin/{gffToVcf.py => gffToVcf}                     |  94 +++++--
 bin/makePbi.py                                    | 115 ---------
 bin/plurality                                     |   2 +-
 bin/quiver                                        |   2 +-
 bin/{summarizeConsensus.py => summarizeConsensus} | 114 +++++++--
 bin/{variantCaller.py => variantCaller}           |   0
 doc/HowToQuiver.rst                               |  79 +++---
 setup.py                                          |  15 +-
 tests/cram/bad_input.t                            |   5 +
 tests/cram/extra/convert-to-bed.t                 |   4 +-
 tests/cram/extra/convert-to-vcf.t                 |   2 +-
 tests/cram/extra/coverage-bed.t                   |   4 +-
 tests/cram/extra/plurality-compressed.t           |   2 +-
 tests/cram/extra/plurality-fluidigm.t             |   2 +-
 tests/cram/internal/alignment_summary.t           |   4 +-
 tests/cram/internal/quiver-compatibility.t        |   4 +-
 tests/cram/internal/quiver-staph.t                |  21 +-
 tests/cram/makePbi.t.off                          | 284 ----------------------
 tests/cram/plurality-hcv.t                        |   2 +-
 tests/cram/version.t                              |  11 +-
 tests/unit/test_tool_contract.py                  |  82 +++++++
 36 files changed, 829 insertions(+), 803 deletions(-)

diff --git a/GenomicConsensus/ResultCollector.py b/GenomicConsensus/ResultCollector.py
index 41f26ba..45b3692 100644
--- a/GenomicConsensus/ResultCollector.py
+++ b/GenomicConsensus/ResultCollector.py
@@ -77,7 +77,7 @@ class ResultCollector(object):
 
     def onStart(self):
         self.referenceBasesProcessedById = OrderedDict()
-        for refId in reference.byId:
+        for refId in reference.byName:
             self.referenceBasesProcessedById[refId] = 0
         self.variantsByRefId             = defaultdict(list)
         self.consensusChunksByRefId      = defaultdict(list)
@@ -91,7 +91,7 @@ class ResultCollector(object):
         if options.gffOutputFilename:
             self.gffWriter = VariantsGffWriter(options.gffOutputFilename,
                                                vars(options),
-                                               reference.byId.values())
+                                               reference.byName.values())
 
     def onResult(self, result):
         window, cssAndVariants = result
@@ -114,7 +114,7 @@ class ResultCollector(object):
 
     def _flushContigIfCompleted(self, window):
         refId, _, _ = window
-        refEntry = reference.byId[refId]
+        refEntry = reference.byName[refId]
         refName = refEntry.fullName
         basesProcessed = self.referenceBasesProcessedById[refId]
         requiredBases = reference.numReferenceBases(refId, options.referenceWindows)
diff --git a/GenomicConsensus/Worker.py b/GenomicConsensus/Worker.py
index 0006caa..37e6c03 100644
--- a/GenomicConsensus/Worker.py
+++ b/GenomicConsensus/Worker.py
@@ -81,7 +81,7 @@ class Worker(object):
 
 
     def run(self):
-        if options.doDebugging:
+        if options.debug:
             import ipdb
             with ipdb.launch_ipdb_on_exception():
                 self._run()
diff --git a/GenomicConsensus/consensus.py b/GenomicConsensus/consensus.py
index 2699dca..9a2a6a4 100644
--- a/GenomicConsensus/consensus.py
+++ b/GenomicConsensus/consensus.py
@@ -63,17 +63,17 @@ class Consensus(object):
         length = len(referenceSequence)
         seq = np.empty(length, dtype="S1")
         seq.fill("N")
-        conf = np.zeros(length, dtype=np.uint)
+        conf = np.zeros(length, dtype=np.uint8)
         return cls(refWin, seq.tostring(), conf)
 
     @classmethod
     def referenceAsConsensus(cls, refWin, referenceSequence):
-        conf = np.zeros(len(referenceSequence), dtype=np.uint)
+        conf = np.zeros(len(referenceSequence), dtype=np.uint8)
         return cls(refWin, referenceSequence, conf)
 
     @classmethod
     def lowercaseReferenceAsConsensus(cls, refWin, referenceSequence):
-        conf = np.zeros(len(referenceSequence), dtype=np.uint)
+        conf = np.zeros(len(referenceSequence), dtype=np.uint8)
         return cls(refWin, referenceSequence.lower(), conf)
 
     @classmethod
diff --git a/GenomicConsensus/io/utils.py b/GenomicConsensus/io/utils.py
index 8e9928e..c974553 100644
--- a/GenomicConsensus/io/utils.py
+++ b/GenomicConsensus/io/utils.py
@@ -45,6 +45,5 @@ def loadCmpH5(filename, referenceFname, disableChunkCache=False):
 
 def loadBam(filename, referenceFname):
     filename = os.path.abspath(os.path.expanduser(filename))
-    aln = AlignmentSet(filename)
-    aln.addReference(referenceFname)
+    aln = AlignmentSet(filename, referenceFastaFname=referenceFname)
     return aln
diff --git a/GenomicConsensus/main.py b/GenomicConsensus/main.py
index 09b2d8b..988220e 100644
--- a/GenomicConsensus/main.py
+++ b/GenomicConsensus/main.py
@@ -35,12 +35,19 @@ from __future__ import absolute_import
 
 import argparse, atexit, cProfile, gc, glob, h5py, logging, multiprocessing
 import os, pstats, random, shutil, tempfile, time, threading, Queue, traceback
+import re
+import sys
 
-from pbcore.io import AlignmentSet
+import pysam
+
+from pbcommand.utils import setup_log
+from pbcommand.cli import pbparser_runner
+from pbcore.io import AlignmentSet, ContigSet
 
 from GenomicConsensus import reference
-from GenomicConsensus.options import (options,
-                                      parseOptions,
+from GenomicConsensus.options import (options, Constants,
+                                      get_parser,
+                                      processOptions,
                                       resolveOptions,
                                       consensusCoreVersion)
 from GenomicConsensus.utils import (IncompatibleDataException,
@@ -52,7 +59,9 @@ from GenomicConsensus.plurality import plurality
 
 class ToolRunner(object):
     """
-    The main driver class for the GenomicConsensus tool.
+    The main driver class for the GenomicConsensus tool.  It is assumed that
+    arguments have already been parsed and used to populate the global
+    'options' namespace before instantiating this class.
     """
     def __init__(self):
         self._inCmpH5 = None
@@ -72,8 +81,8 @@ class ToolRunner(object):
             logLevel = logging.INFO
         else:
             logLevel = logging.WARNING
-        logFormat = '[%(levelname)s] %(message)s'
-        logging.basicConfig(level=logLevel, format=logFormat)
+        log = logging.getLogger()
+        log.setLevel(logLevel)
 
     def _makeTemporaryDirectory(self):
         """
@@ -178,7 +187,7 @@ class ToolRunner(object):
         #    return datasetCountExceedsThreshold(cmpH5, threshold)
         #else:
         #    return False
-        return False
+        return True
 
     def _configureAlgorithm(self, options, cmpH5):
         assert self._algorithm != None
@@ -257,7 +266,6 @@ class ToolRunner(object):
         # essentially harmless.
         gc.disable()
 
-        parseOptions()
         self._algorithm = self._algorithmByName(options.algorithm)
         self._setupLogging()
         random.seed(42)
@@ -272,35 +280,21 @@ class ToolRunner(object):
         if options.doProfiling:
             self._makeTemporaryDirectory()
 
-        if options.usingBam:
-            logging.warn("'fancyChunking' not yet available for BAM, disabling")
-            options.fancyChunking = False
-
-            # Peek at the bam file to build tables
-            with AlignmentSet(options.inputFilename) as peekCmpH5:
-                logging.info("Peeking at BAM file %s" % options.inputFilename)
-                logging.info("Input BAM data: numAlnHits=%d" % len(peekCmpH5))
-                resolveOptions(peekCmpH5)
-                self._loadReference(peekCmpH5)
-                self._checkFileCompatibility(peekCmpH5)
-                self._configureAlgorithm(options, peekCmpH5)
-        else:
-            # We need to peek at the cmp.h5 file to build the The
-            # refGroupId<->refGroupFullName mapping, and to determine
-            # whether the selected algorithm parameters (Quiver) are
-            # compatible with the data.  But we then have to close the
-            # file, and let the "real" open happen after the fork.
-            with AlignmentSet(options.inputFilename) as peekCmpH5:
-                logging.info("Peeking at CmpH5 file %s" % options.inputFilename)
-                logging.info("Input CmpH5 data: numAlnHits=%d" % len(peekCmpH5))
-                resolveOptions(peekCmpH5)
-                self._loadReference(peekCmpH5)
-                self._checkFileCompatibility(peekCmpH5)
-                self._configureAlgorithm(options, peekCmpH5)
-                options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekCmpH5)
-                if options.disableHdf5ChunkCache:
-                    logging.info("Will disable HDF5 chunk cache (large number of datasets)")
-            logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count())
+        with AlignmentSet(options.inputFilename) as peekFile:
+            if not peekFile.isCmpH5 and not peekFile.hasPbi:
+                die("Genomic Consensus only works with cmp.h5 files and BAM "
+                    "files with accompanying .pbi files")
+            logging.info("Peeking at file %s" % options.inputFilename)
+            logging.info("Input data: numAlnHits=%d" % len(peekFile))
+            resolveOptions(peekFile)
+            self._loadReference(peekFile)
+            self._checkFileCompatibility(peekFile)
+            self._configureAlgorithm(options, peekFile)
+            options.disableHdf5ChunkCache = True
+            #options.disableHdf5ChunkCache = self._shouldDisableChunkCache(peekFile)
+            #if options.disableHdf5ChunkCache:
+            #    logging.info("Will disable HDF5 chunk cache (large number of datasets)")
+            #logging.debug("After peek, # hdf5 objects open: %d" % h5py.h5f.get_obj_count())
 
         if options.dumpEvidence:
             self._setupEvidenceDumpDirectory(options.evidenceDirectory)
@@ -319,7 +313,7 @@ class ToolRunner(object):
                                 filename=os.path.join(options.temporaryDirectory,
                                                       "profile-main.out"))
 
-            elif options.doDebugging:
+            elif options.debug:
                 if not options.threaded:
                     die("Debugging only works with -T (threaded) mode")
                 logging.info("PID: %d", os.getpid())
@@ -367,6 +361,55 @@ def monitorSlaves(driver):
             return 0
         time.sleep(1)
 
-def main():
+def args_runner(args):
+    options.__dict__.update(args.__dict__)
+    processOptions()
     tr = ToolRunner()
     return tr.main()
+
+def resolved_tool_contract_runner(resolved_contract):
+    rc = resolved_contract
+    alignment_path = rc.task.input_files[0]
+    reference_path = rc.task.input_files[1]
+    gff_path = rc.task.output_files[0]
+    dataset_path = rc.task.output_files[1]
+    fasta_path = re.sub(".contigset.xml", ".fasta", dataset_path)
+    fastq_path = rc.task.output_files[2]
+    args = [
+        alignment_path,
+        "--reference", reference_path,
+        "--outputFilename", gff_path,
+        "--outputFilename", fasta_path,
+        "--outputFilename", fastq_path,
+        "--numWorkers", str(rc.task.nproc),
+        "--minCoverage", str(rc.task.options[Constants.MIN_COVERAGE_ID]),
+        "--minConfidence", str(rc.task.options[Constants.MIN_CONFIDENCE_ID]),
+        "--algorithm", rc.task.options[Constants.ALGORITHM_ID],
+        "--alignmentSetRefWindows",
+    ]
+    if rc.task.options[Constants.DIPLOID_MODE_ID]:
+        args.append("--diploid")
+    args_ = get_parser().arg_parser.parser.parse_args(args)
+    rc = args_runner(args_)
+    if rc == 0:
+        pysam.faidx(fasta_path)
+        ds = ContigSet(fasta_path, strict=True)
+        ds.write(dataset_path)
+    return rc
+
+def main(argv=sys.argv):
+    logFormat = '[%(levelname)s] %(message)s'
+    logging.basicConfig(level=logging.WARN, format=logFormat)
+    log = logging.getLogger()
+    def dummy_setup(*args, **kwargs):
+        pass
+    return pbparser_runner(
+        argv=argv[1:],
+        parser=get_parser(),
+        args_runner_func=args_runner,
+        contract_runner_func=resolved_tool_contract_runner,
+        alog=log,
+        setup_log_func=dummy_setup)
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
diff --git a/GenomicConsensus/options.py b/GenomicConsensus/options.py
index e063575..ed6672e 100644
--- a/GenomicConsensus/options.py
+++ b/GenomicConsensus/options.py
@@ -48,7 +48,13 @@
 #  and get the loaded options dictionary.
 #
 from __future__ import absolute_import
-import argparse, h5py, os, os.path, sys
+import argparse, h5py, os, os.path, sys, json
+
+from pbcommand.models import FileTypes, SymbolTypes, get_pbparser
+from pbcommand.common_options import (add_resolved_tool_contract_option,)
+# FIXME                                     add_subcomponent_versions_option)
+from pbcommand.cli import get_default_argparser
+
 from .utils import fileFormat
 from . import __VERSION__
 
@@ -61,28 +67,85 @@ def consensusCoreVersion():
     except:
         return None
 
-def parseOptions():
+class Constants(object):
+    TOOL_ID = "genomic_consensus.tasks.variantcaller"
+    DRIVER_EXE = "variantCaller --resolved-tool-contract "
+    ALGORITHM_ID = "genomic_consensus.task_options.algorithm"
+    MIN_CONFIDENCE_ID = "genomic_consensus.task_options.min_confidence"
+    MIN_COVERAGE_ID = "genomic_consensus.task_options.min_coverage"
+    DIPLOID_MODE_ID = "genomic_consensus.task_options.diploid"
+
+    DEFAULT_ALGORITHM = "quiver"
+    DEFAULT_MIN_CONFIDENCE = 40
+    DEFAULT_MIN_COVERAGE = 5
+    DEFAULT_MAX_COVERAGE = 100
+    DEFAULT_MIN_MAPQV = 10
+
+def get_parser():
     """
-    Parse the options and perform some due diligence on them
+    Construct a hybrid PbParser with most tool contract parameters defined
+    separately from argparser parameters.
     """
-    desc = "Compute genomic consensus and call variants relative to the reference."
-    parser = argparse.ArgumentParser(description=desc, add_help=False)
+    p = get_pbparser(
+        tool_id=Constants.TOOL_ID,
+        version=__VERSION__,
+        name="variantCaller",
+        description="Compute genomic consensus and call variants relative to the reference.",
+        driver_exe=Constants.DRIVER_EXE,
+        nproc=SymbolTypes.MAX_NPROC,
+        resource_types=())
+    tcp = p.tool_contract_parser
+    tcp.add_input_file_type(FileTypes.DS_ALIGN, "infile",
+        "Alignment DataSet", "BAM or Alignment DataSet")
+    tcp.add_input_file_type(FileTypes.DS_REF, "reference",
+        "Reference DataSet", "Fasta or Reference DataSet")
+    tcp.add_output_file_type(FileTypes.GFF, "variants",
+        name="Consensus GFF",
+        description="Consensus GFF",
+        default_name="variants.gff")
+    tcp.add_output_file_type(FileTypes.DS_CONTIG, "consensus",
+        name="Consensus ContigSet",
+        description="Consensus sequence in Fasta format",
+        default_name="consensus.contigset.xml")
+    tcp.add_output_file_type(FileTypes.FASTQ, "consensus_fastq",
+        name="Consensus fastq",
+        description="Consensus fastq",
+        default_name="consensus.fastq")
+    tcp.add_str(
+        option_id=Constants.ALGORITHM_ID,
+        option_str="algorithm",
+        default=Constants.DEFAULT_ALGORITHM,
+        name="Algorithm",
+        description="Algorithm name") # FIXME
+    tcp.add_int(
+        option_id=Constants.MIN_CONFIDENCE_ID,
+        option_str="minConfidence",
+        default=Constants.DEFAULT_MIN_CONFIDENCE,
+        name="Minimum confidence",
+        description="The minimum confidence for a variant call to be output "+\
+                    "to variants.gff")
+    tcp.add_int(
+        option_id=Constants.MIN_COVERAGE_ID,
+        option_str="minCoverage",
+        default=Constants.DEFAULT_MIN_COVERAGE,
+        name="Minimum coverage",
+        description="The minimum site coverage that must be achieved for " +\
+                    "variant calls and consensus to be calculated for a site.")
+
+    tcp.add_boolean(
+        option_id=Constants.DIPLOID_MODE_ID,
+        option_str="diploid",
+        default=True, # XXX i.e. --diploid=True, not the actual default!
+        name="Diploid mode (experimental)",
+        description="Enable detection of heterozygous variants (experimental)")
+    add_options_to_argument_parser(p.arg_parser.parser)
+    return p
+
+def add_options_to_argument_parser(parser):
 
     def canonicalizedFilePath(path):
         return os.path.abspath(os.path.expanduser(path))
 
-    def checkInputFile(path):
-        if not os.path.isfile(path):
-            parser.error("Input file %s not found." % (path,))
-
-    def checkOutputFile(path):
-        try:
-            f = open(path, "a")
-            f.close()
-        except:
-            parser.error("Output file %s cannot be written." % (path,))
-
-
     basics = parser.add_argument_group("Basic required options")
     basics.add_argument(
         "inputFilename",
@@ -119,13 +182,13 @@ def parseOptions():
         action="store",
         dest="minConfidence",
         type=int,
-        default=40,
+        default=Constants.DEFAULT_MIN_CONFIDENCE,
         help="The minimum confidence for a variant call to be output to variants.gff")
     filtering.add_argument(
         "--minCoverage", "-x",
         action="store",
         dest="minCoverage",
-        default=5,
+        default=Constants.DEFAULT_MIN_COVERAGE,
         type=int,
         help="The minimum site coverage that must be achieved for variant calls and " + \
              "consensus to be calculated for a site.")
@@ -143,7 +206,7 @@ def parseOptions():
         action="store",
         dest="coverage",
         type=int,
-        default=100,
+        default=Constants.DEFAULT_MAX_COVERAGE,
         help="A designation of the maximum coverage level to be used for analysis." + \
              " Exact interpretation is algorithm-specific.")
     readSelection.add_argument(
@@ -151,7 +214,7 @@ def parseOptions():
         action="store",
         dest="minMapQV",
         type=float,
-        default=10,
+        default=Constants.DEFAULT_MIN_MAPQV,
         help="The minimum MapQV for reads that will be used for analysis.")
     # Since the reference isn't loaded at options processing time, we
     # can't grok the referenceWindow specified until later.  We store
@@ -235,21 +298,8 @@ def parseOptions():
              "which selects the best parameter set from the cmp.h5")
 
     debugging = parser.add_argument_group("Verbosity and debugging/profiling")
-    debugging.add_argument("--help", "-h",
-                           action="help")
-    class PrintVersionAction(argparse.Action):
-        def __call__(self, parser, namespace, values, option_string=None):
-            print "  GenomicConsensus version: %s" % __VERSION__
-            print "  ConsensusCore version: %s" % \
-                (consensusCoreVersion() or "ConsensusCore unavailable")
-            print "  h5py version: %s" % h5py.version.version
-            print "  hdf5 version: %s" % h5py.version.hdf5_version
-            sys.exit(0)
-    debugging.add_argument("--version",
-                           nargs=0,
-                           action=PrintVersionAction)
     debugging.add_argument(
-        "--verbose", "-v",
+        "--verbose",
         dest="verbosity",
         action="count",
         help="Set the verbosity level.")
@@ -259,12 +309,6 @@ def parseOptions():
         action="store_true",
         help="Turn off all logging, including warnings")
     debugging.add_argument(
-        "--debug",
-        action="store_true",
-        dest="doDebugging",
-        default=False,
-        help="Enable Python-level debugging (using pdb).")
-    debugging.add_argument(
         "--profile",
         action="store_true",
         dest="doProfiling",
@@ -363,7 +407,24 @@ def parseOptions():
              "that has no aligned coverage.  Outputs emptyish files if there are no remaining "    \
              "non-degenerate windows.  Only intended for use by smrtpipe scatter/gather.")
 
-    parser.parse_args(namespace=options)
+    return parser
+
+def processOptions():
+    """
+    Various additions to the global 'options' object, assuming that the
+    command-line arguments have already been processed.
+    """
+    parser = get_parser().arg_parser.parser
+    def checkInputFile(path):
+        if not os.path.isfile(path):
+            parser.error("Input file %s not found." % (path,))
+
+    def checkOutputFile(path):
+        try:
+            f = open(path, "a")
+            f.close()
+        except:
+            parser.error("Output file %s cannot be written." % (path,))
 
     options.gffOutputFilename   = None
     options.fastaOutputFilename = None
diff --git a/GenomicConsensus/plurality/plurality.py b/GenomicConsensus/plurality/plurality.py
index f275315..5051683 100644
--- a/GenomicConsensus/plurality/plurality.py
+++ b/GenomicConsensus/plurality/plurality.py
@@ -388,7 +388,7 @@ class PluralityWorker(object):
         alnHits = readsInWindow(self._inCmpH5, referenceWindow,
                                    depthLimit=options.coverage,
                                    minMapQV=options.minMapQV,
-                                   strategy="longest",
+                                   strategy="long-and-strand-balanced",
                                    stratum=options.readStratum,
                                    barcode=options.barcode)
         return (referenceWindow,
diff --git a/GenomicConsensus/quiver/model.py b/GenomicConsensus/quiver/model.py
index 0c9c234..f12453c 100644
--- a/GenomicConsensus/quiver/model.py
+++ b/GenomicConsensus/quiver/model.py
@@ -388,7 +388,7 @@ def loadParameterSets(parametersFile=None, spec=None, cmpH5=None):
         chemistryName, modelName = spec.split(".")
     else:
         chemistryName, modelName = spec, None
-    assert cmpH5 or (chemistryName and modelName)
+    assert (cmpH5 is not None) or (chemistryName and modelName)
 
     parametersFile = _findParametersFile(parametersFile)
     logging.info("Using Quiver parameters file %s" % parametersFile)
diff --git a/GenomicConsensus/quiver/quiver.py b/GenomicConsensus/quiver/quiver.py
index 2aca757..1e2fea5 100644
--- a/GenomicConsensus/quiver/quiver.py
+++ b/GenomicConsensus/quiver/quiver.py
@@ -66,8 +66,9 @@ def consensusAndVariantsForWindow(cmpH5, refWindow, referenceContig,
         # 1) identify the intervals with adequate coverage for quiver
         #    consensus; restrict to intervals of length > 10
         alnHits = U.readsInWindow(cmpH5, refWindow,
+                                  depthLimit=20000,
                                   minMapQV=quiverConfig.minMapQV,
-                                  strategy="longest",
+                                  strategy="long-and-strand-balanced",
                                   stratum=options.readStratum,
                                   barcode=options.barcode)
         starts = np.fromiter((hit.tStart for hit in alnHits), np.int)
@@ -97,7 +98,7 @@ def consensusAndVariantsForWindow(cmpH5, refWindow, referenceContig,
         alns = U.readsInWindow(cmpH5, subWin,
                                depthLimit=depthLimit,
                                minMapQV=quiverConfig.minMapQV,
-                               strategy="longest",
+                               strategy="long-and-strand-balanced",
                                stratum=options.readStratum,
                                barcode=options.barcode)
         clippedAlns_ = [ aln.clippedTo(*interval) for aln in alns ]
@@ -185,7 +186,7 @@ class QuiverWorker(object):
         # to the reference and clip the consensus at the implied
         # bounds.  This seems to be more reliable thank cutting the
         # consensus bluntly
-        refContig = reference.byId[refId].sequence
+        refContig = reference.byName[refId].sequence
         refSequenceInEnlargedWindow = refContig[eStart:eEnd]
 
         #
diff --git a/GenomicConsensus/quiver/utils.py b/GenomicConsensus/quiver/utils.py
index 8dcff0f..9d8470c 100644
--- a/GenomicConsensus/quiver/utils.py
+++ b/GenomicConsensus/quiver/utils.py
@@ -400,7 +400,7 @@ def coverageInWindow(refWin, hits):
     winId, winStart, winEnd = refWin
     a = np.array([(hit.referenceStart, hit.referenceEnd)
                   for hit in hits
-                  if hit.referenceId == winId])
+                  if hit.referenceName == winId])
     tStart = a[:,0]
     tEnd   = a[:,1]
     cov = projectIntoRange(tStart, tEnd, winStart, winEnd)
diff --git a/GenomicConsensus/reference.py b/GenomicConsensus/reference.py
index 62bc2ef..5c265d6 100644
--- a/GenomicConsensus/reference.py
+++ b/GenomicConsensus/reference.py
@@ -71,10 +71,12 @@ byId         = OrderedDict()   # CmpH5 local id (integer)          -> FastaRecor
 byPacBioName = OrderedDict()   # pacbio name ("ref000001")         -> FastaRecord
 
 def idToName(_id):
-    return byId[_id].name
+    # At this point ids should always be names
+    return byName[_id].name
 
 def idToFullName(_id):
-    return byId[_id].fullName
+    # At this point ids should always be names
+    return byName[_id].fullName
 
 # Interpret a string key (one of name, or id (as string))
 # and find the associated id.  Only to be used in interpretation of
@@ -82,18 +84,19 @@ def idToFullName(_id):
 def anyKeyToId(stringKey):
     assert isLoaded()
     if stringKey in byName:
-        return byName[stringKey].id
+        return byName[stringKey].name
     elif stringKey in byPacBioName:
-        return byPacBioName[stringKey].id
+        return byPacBioName[stringKey].name
     elif stringKey.isdigit():
         refId = int(stringKey)
-        return byId[refId].id
+        # at this  point, refId can still be the old numeric identifier
+        return byId[refId].name
     else:
         raise Exception, "Unknown reference name: %s" % stringKey
 
 def sequenceInWindow(window):
     refId, refStart, refEnd = window
-    return byId[refId].sequence[refStart:refEnd]
+    return byName[refId].sequence[refStart:refEnd]
 
 filename = None
 
@@ -144,9 +147,9 @@ def loadFromFile(filename_, cmpH5):
             byPacBioName[pacBioName] = contig
     loadedFastaContigNames = set(byName.keys())
     logging.info("Loaded %d of %d reference groups from %s " %
-                 (len(byId), len(loadedFastaContigNames), filename_))
+                 (len(byName), len(loadedFastaContigNames), filename_))
 
-    if len(byId) == 0:
+    if len(byName) == 0:
         die("No reference groups in the FASTA file were aligned against.  " \
             "Did you select the wrong reference FASTA file?")
     elif (cmpContigNames - loadedFastaContigNames):
@@ -167,11 +170,11 @@ def stringToWindow(s):
     if m:
         refId    = anyKeyToId(m.group(1))
         refStart = int(m.group(2))
-        refEnd   = min(int(m.group(3)), byId[refId].length)
+        refEnd   = min(int(m.group(3)), byName[refId].length)
     else:
         refId    = anyKeyToId(s)
         refStart = 0
-        refEnd   = byId[refId].length
+        refEnd   = byName[refId].length
     return (refId, refStart, refEnd)
 
 def windowToString(referenceWindow):
@@ -187,7 +190,7 @@ def enumerateSpans(refId, referenceWindows=()):
     are to be analyzed.
     """
     assert isLoaded()
-    referenceEntry = byId[refId]
+    referenceEntry = byName[refId]
     referenceEntrySpan = (refId, 0, referenceEntry.length)
 
     for refWin in (referenceWindows or [referenceEntrySpan]):
@@ -217,15 +220,25 @@ def fancyEnumerateChunks(cmpH5, refId, referenceStride,
     tStart = []
     tEnd = []
     for reader in cmpH5.resourceReaders(refId):
-        startRow = reader.referenceInfo(refId).StartRow
-        endRow = reader.referenceInfo(refId).EndRow
-        goodMapQVs = (reader.MapQV[startRow:endRow] >= minMapQV)
-        tStart.extend(
-            reader.tStart[startRow:endRow][goodMapQVs].view(np.int32))
-        tEnd.extend(reader.tEnd[startRow:endRow][goodMapQVs].view(np.int32))
+        if cmpH5.isCmpH5:
+            startRow = reader.referenceInfo(refId).StartRow
+            endRow = reader.referenceInfo(refId).EndRow
+            rows = slice(startRow, endRow)
+        else:
+            refLen = reader.referenceInfo(refId).Length
+            rows = reader.readsInRange(refId, 0, refLen, justIndices=True)
+        goodMapQVs = (reader.mapQV[rows] >= minMapQV)
+        tStart.extend(reader.tStart[rows][goodMapQVs].view(np.int32))
+        tEnd.extend(reader.tEnd[rows][goodMapQVs].view(np.int32))
+
     # Sort the intervals (not sure if it matters, but might as well be
     # consistent with the inputs:
-    tStart, tEnd = map(list, zip(*sorted(zip(tStart, tEnd))))
+    if tStart and tEnd:
+        tStart = np.array(tStart)
+        tEnd = np.array(tEnd)
+        sort_order = tStart.argsort()
+        tStart = tStart[sort_order]
+        tEnd = tEnd[sort_order]
 
     for span in enumerateSpans(refId, referenceWindows):
         _, spanStart, spanEnd = span
@@ -258,7 +271,7 @@ def enumerateIds(referenceWindows=()):
     """
     assert isLoaded()
     if referenceWindows == ():
-        for refId in byId:
+        for refId in byName:
             yield refId
     else:
         for refId in nub(refId for (refId, _, _) in referenceWindows):
@@ -267,7 +280,7 @@ def enumerateIds(referenceWindows=()):
 def enlargedReferenceWindow(refWin, overlap):
     assert isLoaded()
     refId, refStart, refEnd = refWin
-    contigLength = byId[refId].length
+    contigLength = byName[refId].length
     return (refId,
             max(0, refStart - overlap),
             min(refEnd + overlap, contigLength))
diff --git a/GenomicConsensus/utils.py b/GenomicConsensus/utils.py
index 0597dbd..5661c31 100644
--- a/GenomicConsensus/utils.py
+++ b/GenomicConsensus/utils.py
@@ -31,6 +31,7 @@
 # Author: David Alexander
 
 from __future__ import absolute_import
+import ast
 import math, numpy as np, os.path, sys, itertools
 
 def die(msg):
@@ -108,30 +109,40 @@ def readsInWindow(cmpH5, window, depthLimit=None,
       - "spanning" --- get only the reads spanning the window
       - "fileorder" --- get the reads in file order
     """
-    assert strategy in {"longest", "spanning", "fileorder"}
+    assert strategy in {"longest", "spanning", "fileorder",
+                        "long-and-strand-balanced"}
 
     if stratum is not None:
         raise ValueError, "stratum needs to be reimplemented"
 
     def depthCap(iter):
         if depthLimit is not None:
-            return list(itertools.islice(iter, 0, depthLimit))
+            return cmpH5[list(itertools.islice(iter, 0, depthLimit))]
         else:
-            return list(iter)
+            return cmpH5[list(iter)]
 
     def lengthInWindow(hit):
-        return min(hit.tEnd, winEnd) - max(hit.tStart, winStart)
+        return (min(cmpH5.index.tEnd[hit], winEnd) -
+                max(cmpH5.index.tStart[hit], winStart))
 
     winId, winStart, winEnd = window
+    alnHits = np.array(list(cmpH5.readsInRange(winId, winStart, winEnd,
+                                               justIndices=True)))
+    if len(alnHits) == 0:
+        return []
+
+    # Different naming between cmpH5 and pbi
+    mapQV = lambda x: x.mapQV
+    if cmpH5.isCmpH5:
+        mapQV = lambda x: x.MapQV
     if barcode == None:
-        alnHits = ( hit
-                    for hit in cmpH5.readsInRange(winId, winStart, winEnd)
-                    if hit.MapQV >= minMapQV )
+        alnHits = alnHits[mapQV(cmpH5.index)[alnHits] >= minMapQV]
     else:
-        alnHits = ( hit
-                    for hit in cmpH5.readsInRange(winId, winStart, winEnd)
-                    if ((hit.MapQV >= minMapQV) and
-                        (hit.barcode == barcode)) )
+        # this wont work with cmpH5 (no bc in index):
+        barcode = ast.literal_eval(barcode)
+        alnHits = alnHits[(mapQV(cmpH5.index)[alnHits] >= minMapQV) &
+                          (cmpH5.index.bcLeft[alnHits] == barcode[0]) &
+                          (cmpH5.index.bcRight[alnHits] == barcode[1])]
 
     if strategy == "fileorder":
         return depthCap(alnHits)
@@ -140,9 +151,33 @@ def readsInWindow(cmpH5, window, depthLimit=None,
         return depthCap( hit for hit in alnHits
                          if lengthInWindow(hit) == winLen )
     elif strategy == "longest":
-        # Well, this defeats the iterable/laziness, performs poorly if
-        # deep coverage.  sorted is stable.
         return depthCap(sorted(alnHits, key=lengthInWindow, reverse=True))
+    elif strategy == "long-and-strand-balanced":
+        # Longest (in window) is great, but bam sorts by tStart then strand.
+        # With high coverage, this bias resulted in variants. Here we lexsort
+        # by tStart and tEnd. Longest in window is the final criteria in
+        # either case.
+
+        # lexical sort:
+        ends = cmpH5.index.tEnd[alnHits]
+        starts = cmpH5.index.tStart[alnHits]
+        lex_sort = np.lexsort((ends, starts))
+
+        # reorder based on sort:
+        sorted_ends = ends[lex_sort]
+        sorted_starts = starts[lex_sort]
+        sorted_alnHits = alnHits[lex_sort]
+
+        # get lengths in window:
+        post = sorted_ends > winEnd
+        sorted_ends[post] = winEnd
+        pre = sorted_starts < winStart
+        sorted_starts[pre] = winStart
+        lens = sorted_ends - sorted_starts
+
+        # coerce a descending sort:
+        win_sort = ((winEnd - winStart) - lens).argsort(kind="mergesort")
+        return depthCap(sorted_alnHits[win_sort])
 
 
 def datasetCountExceedsThreshold(cmpH5, threshold):
diff --git a/README.md b/README.md
index 5fddad9..e756ad3 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,17 @@
 GenomicConsensus (quiver)
 -------------------------
 
-The ``GenomicConsensus`` package provides the ``quiver`` tool, PacBio's flagship consensus
-and variant caller.  The backend logic is provided by the
-``ConsensusCore`` library, which you must install first.
+The ``GenomicConsensus`` package provides the ``quiver`` tool,
+PacBio's flagship consensus and variant caller.  The backend logic is
+provided by the ``ConsensusCore`` library, which you must install
+first.
 
 
 Installing
 ----------
-Make sure you have set up and activated your virtualenv, and
-installed ``pbcore`` and ``ConsensusCore`` (which cannot be
-installed automatically by pip or setuptools).  Then:
+Make sure you have set up and activated your virtualenv, and installed
+``pbcore`` and ``ConsensusCore`` (which cannot be installed
+automatically by pip or setuptools at this time).  Then:
 
 ```sh
 % python setup.py install
@@ -21,10 +22,17 @@ Running
 Basic usage is as follows:
 
 ```sh
-% quiver aligned_reads.cmp.h5 -r reference.fasta -o variants.gff -o consensus.fasta -o consensus.fastq
+% quiver aligned_reads{.cmp.h5, .bam, .fofn, or .xml}    \
+>     -r reference{.fasta or .xml} -o variants.gff       \
+>     -o consensus.fasta -o consensus.fastq
 ```
 
-in this example we perform haploid consensus and variant calling on the mapped reads in the ``aligned_reads.cmp.h5`` which was aligned to ``reference.fasta``.  The ``reference.fasta`` is only used for designating variant calls, not for computing the consensus.  The consensus quality score for every position can be found in the output FASTQ file.
+in this example we perform haploid consensus and variant calling on
+the mapped reads in the ``aligned_reads.bam`` which was aligned to
+``reference.fasta``.  The ``reference.fasta`` is only used for
+designating variant calls, not for computing the consensus.  The
+consensus quality score for every position can be found in the output
+FASTQ file.
 
 
 Documentation
diff --git a/bin/gffToBed b/bin/gffToBed
new file mode 100755
index 0000000..b4911ac
--- /dev/null
+++ b/bin/gffToBed
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+
+"""
+Convert .gff to .bed format.
+"""
+
+import sys
+import os
+import time
+import json
+import logging
+import argparse
+import traceback
+
+from pbcommand.models import FileTypes, get_pbparser
+from pbcommand.cli import pbparser_runner
+from pbcommand.utils import setup_log
+from pbcore.io import GffReader, WriterBase
+
+__version__ = "3.0"
+
+class Constants(object):
+    TASK_ID = "genomic_consensus.tasks.gff2bed"
+    PURPOSE_ID = "genomic_consensus.task_options.gff2bed_purpose"
+    TRACK_NAME_ID = "genomic_consensus.task_options.track_name"
+    DESCRIPTION_ID = 'genomic_consensus.task_options.track_description'
+    USE_SCORE_ID = "genomic_consensus.task_options.use_score"
+    DRIVER_EXE = "gffToBed --resolved-tool-contract "
+
+#
+# (Ported from pbpy)
+#
+
+class BedRecord:
+    """Models a record in a BED file format"""
+    def __init__(self):
+        self.chrom=''
+        self.chromStart = 0
+        self.chromEnd = 0
+        self.name = ''
+        self.score = -1.00
+        self.strand = '+'
+
+    def __str__(self):
+        return '%s\t%d\t%d\t%s\t%.3f\t%s' % \
+            (self.chrom, self.chromStart, self.chromEnd, self.name, \
+              self.score, self.strand)
+
+class CoverageBedRecord(BedRecord):
+    @staticmethod
+    def fromAlignmentSummaryGffRecord(gff):
+        bed = CoverageBedRecord()
+        bed.chrom = gff.seqid
+        bed.chromStart = gff.start - 1
+        bed.chromEnd = gff.end
+        bed.name = 'meanCov'
+        bed.score = float(gff.cov2.split(',')[0])
+        bed.strand = gff.strand
+        return bed
+
+class VariantsBedRecord(BedRecord):
+    @staticmethod
+    def fromVariantGffRecord(gff):
+        bed = VariantsBedRecord()
+        bed.chrom = gff.seqid
+        bed.chromStart = gff.start - 1
+        bed.score = float(gff.confidence)
+        bed.strand = gff.strand
+
+        feature = gff.type
+        #GFF3 coordinates are 1-based and inclusive
+        #BED coordinates are 0-based and exclusive
+        if feature == 'insertion':
+            bed.chromEnd = bed.chromStart + 1
+            bed.name = '%d_%dins%s' % (bed.chromStart + 1,
+                                       bed.chromEnd + 1,
+                                       gff.variantSeq)
+        elif feature == 'deletion':
+            featureLen = len(gff.reference)
+            bed.chromEnd = bed.chromStart + featureLen
+            if featureLen == 1:
+                bed.name = "%ddel" % (bed.chromStart + 1)
+            else:
+                bed.name = '%d_%ddel' % (bed.chromStart + 1, bed.chromEnd)
+        elif feature == 'substitution':
+            bed.chromEnd = bed.chromStart + 1
+            bed.name = '%d%s>%s' % (bed.chromStart + 1,
+                                    gff.reference,
+                                    gff.variantSeq)
+        else:
+            print >> sys.stderr, 'Unsupported feature %s found in GFF3 file.' % feature
+
+        return bed
+
+class BedWriter(WriterBase):
+    """Outputs BED annotation track file"""
+    def __init__(self, outfile):
+        self._outfile = outfile
+
+    def close(self):
+        self._outfile.close()
+
+    def flush(self):
+        self._outfile.flush()
+
+    def writeHeader(self, name, description, useScore):
+        print >> self._outfile, 'track name=%s description="%s" useScore=%d' \
+            % (name, description, useScore)
+
+    def writeRecord(self, record):
+        print >> self._outfile, str(record)
+
+class GffToBed:
+    """
+    Utility for converting GFF3 to BED format. Currently supports
+    regional coverage or variant .bed output.
+    """
+    def __init__(self, args):
+        self.purpose = args.purpose
+        self.gffFile = args.gff
+        self.args = args
+
+        if self.purpose not in [ "variants", "coverage" ]:
+            raise ValueError(
+                "Purpose %s not supported. Must be one of: [variants|coverage]" % (self.purpose))
+
+
+    def run(self, out=sys.stdout):
+        with GffReader(self.gffFile) as reader, \
+             BedWriter(out)          as writer:
+
+            writer.writeHeader(self.args.name,
+                               self.args.description,
+                               self.args.useScore)
+            for gff in reader:
+                if self.purpose == 'coverage':
+                    bedRecord = CoverageBedRecord.fromAlignmentSummaryGffRecord(gff)
+                else:
+                    bedRecord = VariantsBedRecord.fromVariantGffRecord(gff)
+                writer.writeRecord(bedRecord)
+        return 0
+
+def args_runner(args, out=sys.stdout):
+    return GffToBed(args).run(out=out)
+
+def resolved_tool_contract_runner(resolved_tool_contract):
+    rtc = resolved_tool_contract
+    assert rtc.task.options[Constants.PURPOSE_ID] in ["coverage", "variants"]
+    args = [
+        rtc.task.options[Constants.PURPOSE_ID],
+        rtc.task.input_files[0],
+        "--useScore", str(rtc.task.options[Constants.USE_SCORE_ID]),
+   #     "--name", str(rtc.task.options[Constants.TRACK_NAME_ID]),
+   #     "--description", str(rtc.task.options[Constants.DESCRIPTION_ID]),
+    ]
+    # XXX HACK
+    args_ = get_contract_parser().arg_parser.parser.parse_args(args)
+    with open(rtc.task.output_files[0], "w") as f:
+        return args_runner(args_, out=f)
+
+def get_contract_parser():
+    p = get_pbparser(
+        tool_id=Constants.TASK_ID,
+        version=__version__,
+        name="gffToBed",
+        description=__doc__,
+        driver_exe=Constants.DRIVER_EXE)
+    ap = p.arg_parser.parser
+    tcp = p.tool_contract_parser
+    ap.add_argument("purpose", choices=["variants","coverage"],
+        help="Run purpose")
+    p.add_input_file_type(FileTypes.GFF, "gff",
+        "GFF file", "GFF file")
+    tcp.add_output_file_type(FileTypes.BED, "bed",
+        "BED file", "BED file", "output.bed")
+    tcp.add_str(Constants.PURPOSE_ID, "purpose",
+        default="",
+        name="Purpose",
+        description="Run mode ('variants' or 'coverage')")
+    p.add_str(Constants.TRACK_NAME_ID, "name",
+        default="variants",
+        name="Track name",
+        description="track name to display in header")
+    p.add_str(Constants.DESCRIPTION_ID, 'description',
+        default="PacBio: snps, insertions, and deletions derived from consensus calls against reference",
+        name="Track description",
+        description="track description to display in header")
+    p.add_int(Constants.USE_SCORE_ID, "useScore",
+        default=0,
+        name="Use score",
+        description="whether or not to use score for feature display")
+    return p
+
+def main(argv=sys.argv):
+    logging.basicConfig(level=logging.WARN)
+    log = logging.getLogger()
+    mp = get_contract_parser()
+    def dummy_log(*args, **kwargs):
+        pass
+    return pbparser_runner(
+        argv=argv[1:],
+        parser=mp,
+        args_runner_func=args_runner,
+        contract_runner_func=resolved_tool_contract_runner,
+        alog=log,
+        setup_log_func=dummy_log)
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/bin/gffToBed.py b/bin/gffToBed.py
deleted file mode 100755
index b4112fe..0000000
--- a/bin/gffToBed.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python
-import sys
-import os
-import time
-import traceback
-from optparse import OptionParser
-from pbcore.io import GffReader, WriterBase
-
-#
-# (Ported from pbpy)
-#
-
-class BedRecord:
-    """Models a record in a BED file format"""
-    def __init__(self):
-        self.chrom=''
-        self.chromStart = 0
-        self.chromEnd = 0
-        self.name = ''
-        self.score = -1.00
-        self.strand = '+'
-
-    def __str__(self):
-        return '%s\t%d\t%d\t%s\t%.3f\t%s' % \
-            (self.chrom, self.chromStart, self.chromEnd, self.name, \
-              self.score, self.strand)
-
-class CoverageBedRecord(BedRecord):
-    @staticmethod
-    def fromAlignmentSummaryGffRecord(gff):
-        bed = CoverageBedRecord()
-        bed.chrom = gff.seqid
-        bed.chromStart = gff.start - 1
-        bed.chromEnd = gff.end
-        bed.name = 'meanCov'
-        bed.score = float(gff.cov2.split(',')[0])
-        bed.strand = gff.strand
-        return bed
-
-class VariantsBedRecord(BedRecord):
-    @staticmethod
-    def fromVariantGffRecord(gff):
-        bed = VariantsBedRecord()
-        bed.chrom = gff.seqid
-        bed.chromStart = gff.start - 1
-        bed.score = float(gff.confidence)
-        bed.strand = gff.strand
-
-        feature = gff.type
-        #GFF3 coordinates are 1-based and inclusive
-        #BED coordinates are 0-based and exclusive
-        if feature == 'insertion':
-            bed.chromEnd = bed.chromStart + 1
-            bed.name = '%d_%dins%s' % (bed.chromStart + 1,
-                                       bed.chromEnd + 1,
-                                       gff.variantSeq)
-        elif feature == 'deletion':
-            featureLen = len(gff.reference)
-            bed.chromEnd = bed.chromStart + featureLen
-            if featureLen == 1:
-                bed.name = "%ddel" % (bed.chromStart + 1)
-            else:
-                bed.name = '%d_%ddel' % (bed.chromStart + 1, bed.chromEnd)
-        elif feature == 'substitution':
-            bed.chromEnd = bed.chromStart + 1
-            bed.name = '%d%s>%s' % (bed.chromStart + 1,
-                                    gff.reference,
-                                    gff.variantSeq)
-        else:
-            print >> sys.stderr, 'Unsupported feature %s found in GFF3 file.' % feature
-
-        return bed
-
-class BedWriter(WriterBase):
-    """Outputs BED annotation track file"""
-    def __init__(self, outfile):
-        self._outfile = outfile
-
-    def close(self):
-        self._outfile.close()
-
-    def flush(self):
-        self._outfile.flush()
-
-    def writeHeader(self, name, description, useScore):
-        print >> self._outfile, 'track name=%s description="%s" useScore=%d' \
-            % (name, description, useScore)
-
-    def writeRecord(self, record):
-        print >> self._outfile, str(record)
-
-class GffToBed:
-    """
-    Utility for converting GFF3 to BED format. Currently supports
-    regional coverage or variant .bed output.
-    """
-    def __init__(self, argv):
-        self.__parseOptions(argv)
-
-    def __parseOptions(self, argv):
-        usage = 'Usage: %prog [--help] [options] purpose[coverage|variants] input.gff > output.bed'
-        parser = OptionParser(usage=usage, description=GffToBed.__doc__)
-
-        parser.add_option('--name', type="string",
-                          help="track name to display in header")
-        parser.add_option('--description', type="string",
-                          help="track description to display in header")
-        parser.add_option('--useScore', type="int", default=0,
-                          help="whether or not to use score for feature display")
-
-        self.options, self.args=parser.parse_args(argv)
-        if len(self.args)!=3:
-            parser.error('Expected 2 argument')
-
-        self.purpose = self.args[1]
-
-        if self.purpose not in [ "variants", "coverage" ]:
-            print >> sys.stderr, \
-                "Purpose %s not supported. Must be one of: [variants|coverage]" % (self.purpose)
-            sys.exit(-1)
-
-        self.gffFile = self.args[2]
-
-    def run(self):
-        with GffReader(self.gffFile) as reader, \
-             BedWriter(sys.stdout)   as writer:
-
-            writer.writeHeader(self.options.name,
-                               self.options.description,
-                               self.options.useScore)
-            for gff in reader:
-                if self.purpose == 'coverage':
-                    bedRecord = CoverageBedRecord.fromAlignmentSummaryGffRecord(gff)
-                else:
-                    bedRecord = VariantsBedRecord.fromVariantGffRecord(gff)
-                writer.writeRecord(bedRecord)
-
-
-if __name__ == '__main__':
-    app = GffToBed(sys.argv)
-    sys.exit(app.run())
diff --git a/bin/gffToVcf.py b/bin/gffToVcf
similarity index 58%
rename from bin/gffToVcf.py
rename to bin/gffToVcf
index faefc6b..ed1d947 100755
--- a/bin/gffToVcf.py
+++ b/bin/gffToVcf
@@ -1,15 +1,31 @@
 #!/usr/bin/env python
+
+"""Utility for converting variant GFF3 files to 1000 Genomes VCF"""
+
 import sys
 import os
 import time
+import json
+import logging
+import argparse
 import traceback
-from optparse import OptionParser
+
+from pbcommand.models import FileTypes, get_pbparser
+from pbcommand.cli import pbparser_runner
+from pbcommand.utils import setup_log
 from pbcore.io import GffReader, WriterBase
 
 #
 # (Ported from pbpy)
 #
 
+__version__ = "3.0"
+
+class Constants(object):
+    TASK_ID = "genomic_consensus.tasks.gff2vcf"
+    DRIVER_EXE = "gffToVcf --resolved-tool-contract "
+    GLOBAL_REFERENCE_ID = "genomic_consensus.task_options.global_reference"
+
 class VcfRecord:
     """Models a record in a VCF3.3 file."""
     def __init__(self):
@@ -90,23 +106,12 @@ class VcfWriter(WriterBase):
     def writeRecord( self, record ):
         print >> self._outfile, str(record)
 
+
 class GffToVcf(object):
     """Utility for converting variant GFF3 files to 1000 Genomes VCF"""
-    def __init__(self, argv):
-        self.__parseOptions(argv)
-
-    def __parseOptions(self, argv):
-        usage = 'Usage: %prog [--help] [options] variants.gff > variants.vcf'
-        parser = OptionParser(usage=usage, description=GffToVcf.__doc__)
-        parser.add_option('--globalReference', type="string",
-                          help="Name of global reference to put in Meta field")
-        parser.set_defaults(globalReference=None)
-
-        self.options, self.args=parser.parse_args(argv)
-        if len(self.args)!=2:
-            parser.error('Expected 1 argument')
-
-        self.gffFile = self.args[1]
+    def __init__(self, gffFile, globalReference=None):
+        self.gffFile = gffFile
+        self.globalReference = globalReference
 
     def _writeMetaData(self, writer):
         currentTime = time.localtime()
@@ -115,22 +120,65 @@ class GffToVcf(object):
         writer.writeMetaData('fileDate', '%d%d%d' % \
                              (currentTime[0], currentTime[1], currentTime[2]))
         writer.writeMetaData('source', cmdLine)
-        if self.options.globalReference is not None:
-            writer.writeMetaData('reference', self.options.globalReference)
+        if self.globalReference is not None:
+            writer.writeMetaData('reference', self.globalReference)
         writer.writeMetaData('INFO', 'NS,1,Integer,"Number of Samples with Data"')
         writer.writeMetaData('INFO', 'DP,1,Integer,"Total Depth of Coverage"')
 
         writer.writeHeader()
 
-    def run(self):
+    def run(self, out=sys.stdout):
         with GffReader(self.gffFile) as reader, \
-             VcfWriter(sys.stdout) as writer:
+             VcfWriter(out) as writer:
             self._writeMetaData(writer)
             for gff in reader:
                 vcf = VcfRecord.fromVariantGffRecord(gff)
                 writer.writeRecord(vcf)
-
+        return 0
+
+def args_runner(args, out=sys.stdout):
+    return GffToVcf(
+        gffFile=args.gffFile,
+        globalReference=args.globalReference).run(out=out)
+
+def resolved_tool_contract_runner(resolved_tool_contract):
+    rtc = resolved_tool_contract
+    with open(rtc.task.output_files[0], "w") as f:
+        gr = None #rtc.task.options[Constants.GLOBAL_REFERENCE_ID]
+        return GffToVcf(
+            gffFile=rtc.task.input_files[0],
+            globalReference=gr).run(out=f)
+
+def get_contract_parser():
+    p = get_pbparser(
+        tool_id=Constants.TASK_ID,
+        version=__version__,
+        name="gffToVcf",
+        description=__doc__,
+        driver_exe=Constants.DRIVER_EXE)
+    ap = p.arg_parser.parser
+    tcp = p.tool_contract_parser
+    p.add_input_file_type(FileTypes.GFF, "gffFile",
+        "GFF file", "GFF file")
+    tcp.add_output_file_type(FileTypes.VCF, "vcf", "VCF file", "VCF file",
+        default_name="output.vcf")
+    ap.add_argument("--globalReference", action="store", default=None,
+        help="Name of global reference to put in Meta field")
+    return p
+
+def main(argv=sys.argv):
+    logging.basicConfig(level=logging.WARN)
+    log = logging.getLogger()
+    p = get_contract_parser()
+    def dummy_log(*args, **kwargs):
+        pass
+    return pbparser_runner(
+        argv=argv[1:],
+        parser=p,
+        args_runner_func=args_runner,
+        contract_runner_func=resolved_tool_contract_runner,
+        alog=log,
+        setup_log_func=dummy_log)
 
 if __name__ == '__main__':
-    app = GffToVcf(sys.argv)
-    sys.exit(app.run())
+    sys.exit(main())
diff --git a/bin/makePbi.py b/bin/makePbi.py
deleted file mode 100755
index ac57182..0000000
--- a/bin/makePbi.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/env python
-
-import argparse
-import h5py
-import pysam
-import numpy as np
-from collections import Counter, OrderedDict
-
-from pbcore.io import BamReader, BamAlignment
-
-# Call: makePbi.py bamfile referenceFasta
-# Warning: this is a very inefficient reference implementation.
-
-PBI_VERSION = "3.0"
-
-PBI_COLUMNS_AND_TYPES = [ ("tId"               , np.int32),
-                          ("tStart"            , np.int32),
-                          ("tEnd"              , np.int32),
-                          ("qId"               , np.int32),
-                          ("qStart"            , np.int32),
-                          ("qEnd"              , np.int32),
-                          ("aStart"            , np.int32),
-                          ("aEnd"              , np.int32),
-                          ("holeNumber"        , np.int32),
-                          ("isReverseStrand"   , np.int8),
-                          ("nM"                , np.int32),
-                          ("nMM"               , np.int32),
-                          ("mapQV"             , np.uint8),
-                          ("virtualFileOffset" , np.uint64) ]
-
-def main():
-    parser = argparse.ArgumentParser(description="Build PacBio BAM index for a bam")
-    parser.add_argument("bamFile", type=argparse.FileType("r"))
-    parser.add_argument("--referenceFasta", type=argparse.FileType("r"))
-    parser.add_argument("--lite", action="store_true")
-
-    args = parser.parse_args()
-
-    bamFname = args.bamFile.name
-    if args.referenceFasta:
-        refFname = args.referenceFasta.name
-    else:
-        refFname = None
-
-    pbi = h5py.File(bamFname + ".pbi", "w")
-    p = pysam.Samfile(bamFname, check_sq=False)
-    B = BamReader(bamFname, refFname)
-
-    lsts = dict((columnName, [])
-                for (columnName, dtype_) in PBI_COLUMNS_AND_TYPES)
-    p.reset()
-    while True:
-        offset = p.tell()
-        try:
-            rawAln = next(p)
-        except StopIteration:
-            break
-
-        aln = BamAlignment(B, rawAln)
-
-        if aln.isMapped:
-            lsts["tId"               ].append(aln.tId)
-            lsts["tStart"            ].append(aln.tStart)
-            lsts["tEnd"              ].append(aln.tEnd)
-            lsts["qId"               ].append(aln.qId)
-            lsts["qStart"            ].append(aln.qStart)
-            lsts["qEnd"              ].append(aln.qEnd)
-            lsts["aStart"            ].append(aln.aStart)
-            lsts["aEnd"              ].append(aln.aEnd)
-            lsts["holeNumber"        ].append(aln.HoleNumber)
-            lsts["isReverseStrand"   ].append(aln.isReverseStrand)
-            lsts["mapQV"             ].append(aln.MapQV)
-            lsts["virtualFileOffset" ].append(offset)
-            if not args.lite:
-                transcript = aln.transcript()
-                moveCounts = Counter(transcript)
-                lsts["nM"            ].append(moveCounts["M"])
-                lsts["nMM"           ].append(moveCounts["R"])
-
-        else:
-            # Unmapped
-            lsts["tId"               ].append(-1)
-            lsts["tStart"            ].append(-1)
-            lsts["tEnd"              ].append(-1)
-            lsts["qId"               ].append(aln.qId)
-            lsts["qStart"            ].append(aln.qStart)
-            lsts["qEnd"              ].append(aln.qEnd)
-            lsts["aStart"            ].append(-1)
-            lsts["aEnd"              ].append(-1)
-            lsts["holeNumber"        ].append(aln.HoleNumber)
-            lsts["isReverseStrand"   ].append(-1)
-            lsts["mapQV"             ].append(-1)
-            lsts["virtualFileOffset" ].append(offset)
-            lsts["nM"                ].append(-1)
-            lsts["nMM"               ].append(-1)
-
-
-    # Lists to arrays
-    dsets = {}
-    for (name, dtype) in PBI_COLUMNS_AND_TYPES:
-        dsets[name] = np.fromiter(lsts[name], dtype)
-        del lsts[name]
-
-    # Write to file, gzipped
-    grp = pbi.create_group("PacBioBamIndex")
-    grp.attrs["Version"] = np.string_(PBI_VERSION)
-    columnsGrp = grp.create_group("Columns")
-    for (name, ds) in dsets.iteritems():
-        columnsGrp.create_dataset(name, data=ds, chunks=True, compression="gzip")
-    pbi.close()
-
-
-
-if __name__ == '__main__':
-    main()
diff --git a/bin/plurality b/bin/plurality
index e0e5d1b..993597d 100755
--- a/bin/plurality
+++ b/bin/plurality
@@ -1,2 +1,2 @@
 #!/bin/sh
-variantCaller.py --algorithm=plurality $*
+variantCaller --algorithm=plurality $*
diff --git a/bin/quiver b/bin/quiver
index 16a98a2..f9048a9 100755
--- a/bin/quiver
+++ b/bin/quiver
@@ -1,2 +1,2 @@
 #!/bin/sh
-variantCaller.py --algorithm=quiver $*
+variantCaller --algorithm=quiver $*
diff --git a/bin/summarizeConsensus.py b/bin/summarizeConsensus
similarity index 52%
rename from bin/summarizeConsensus.py
rename to bin/summarizeConsensus
index c0340bf..78fc434 100755
--- a/bin/summarizeConsensus.py
+++ b/bin/summarizeConsensus
@@ -1,9 +1,24 @@
 #!/usr/bin/env python
 
-import argparse, gzip, numpy as np, sys
-from collections import namedtuple
+"""
+Augment the alignment_summary.gff file with consensus and variants information.
+"""
 
+from collections import namedtuple
+import argparse
+import logging
+import json
+import gzip
+import sys
+
+import numpy as np
+
+from pbcommand.utils import setup_log
+from pbcommand.cli import pacbio_args_or_contract_runner, get_default_argparser
+from pbcommand.models import FileTypes, get_pbparser
+from pbcommand.common_options import add_resolved_tool_contract_option
 from pbcore.io import GffReader, GffWriter, Gff3Record
+
 from GenomicConsensus.utils import error_probability_to_qv
 from GenomicConsensus import __VERSION__
 
@@ -12,15 +27,16 @@ from GenomicConsensus import __VERSION__
 #
 Region = namedtuple("Region", ("seqid", "start", "end"))
 
-def main():
-    headers = [
-        ("source", "GenomicConsensus %s" % __VERSION__),
-        ("pacbio-alignment-summary-version", "0.6"),
-        ("source-commandline", " ".join(sys.argv)),
-        ]
+class Constants(object):
+    TOOL_ID = "genomic_consensus.tasks.summarize_consensus"
+    DRIVER_EXE = "summarizeConsensus --resolved-tool-contract "
 
-    desc = "Augment the alignment_summary.gff file with consensus and variants information."
-    parser = argparse.ArgumentParser(description=desc)
+def get_argument_parser():
+    parser = get_default_argparser(__VERSION__, __doc__)
+    parser.add_argument("inputAlignmentSummaryGff",
+                        type=str,
+                        help="Input alignment_summary.gff filename")
+    # FIXME not optional, should be positional
     parser.add_argument("--variantsGff",
                         type=str,
                         help="Input variants.gff or variants.gff.gz filename",
@@ -29,11 +45,56 @@ def main():
                         "-o",
                         type=str,
                         help="Output alignment_summary.gff filename")
-    parser.add_argument("inputAlignmentSummaryGff",
-                        type=str,
-                        help="Input alignment_summary.gff filename")
-
-    options = parser.parse_args()
+    add_resolved_tool_contract_option(parser)
+    # FIXME temporary workaround for parser chaos
+    class EmitToolContractAction(argparse.Action):
+        def __call__(self, parser, namespace, values, option_string=None):
+            parser2 = get_contract_parser()
+            sys.stdout.write(json.dumps(parser2.to_contract().to_dict(), indent=4)+'\n')
+            sys.exit(0)
+    parser.add_argument("--emit-tool-contract",
+                        nargs=0,
+                        action=EmitToolContractAction)
+    return parser
+
+def get_contract_parser():
+    """
+    Used to generate emitted tool contract, but not (yet) to actually process
+    command-line options.
+    """
+    driver_exe = "variantCaller --resolved-tool-contract "
+    p = get_pbparser(
+        "genomic_consensus.tasks.summarize_consensus",
+        __VERSION__,
+        "Summarize Consensus",
+        __doc__,
+        Constants.DRIVER_EXE)
+    p.add_input_file_type(FileTypes.GFF, "alignment_summary",
+        "Alignment summary GFF", "Alignment summary GFF file")
+    p.add_input_file_type(FileTypes.GFF, "variants",
+        "Variants GFF", "Variants GFF file")
+    p.add_output_file_type(FileTypes.GFF, "output",
+        name="Output GFF file",
+        description="New alignment summary GFF file",
+        default_name="alignment_summary_variants.gff")
+    return p
+
+def get_args_from_resolved_tool_contract(resolved_tool_contract):
+    rtc = resolved_tool_contract
+    p = get_argument_parser()
+    args = [
+        rtc.task.input_files[0],
+        "--variantsGff", rtc.task.input_files[1],
+        "--output", rtc.task.output_files[0],
+    ]
+    return p.parse_args(args)
+
+def run(options):
+    headers = [
+        ("source", "GenomicConsensus %s" % __VERSION__),
+        ("pacbio-alignment-summary-version", "0.6"),
+        ("source-commandline", " ".join(sys.argv)),
+        ]
 
     inputVariantsGff = GffReader(options.variantsGff)
     inputAlignmentSummaryGff = GffReader(options.inputAlignmentSummaryGff)
@@ -94,6 +155,27 @@ def main():
                 if counterName in summary:
                     line += ";%s=%d" % (counterName, summary[counterName])
             print >>outputAlignmentSummaryGff, line
+    return 0
+
+def args_runner(args):
+    return run(options=args)
+
+def resolved_tool_contract_runner(resolved_tool_contract):
+    args = get_args_from_resolved_tool_contract(resolved_tool_contract)
+    return run(options=args)
+
+def main(argv=sys.argv):
+    logging.basicConfig(level=logging.WARN)
+    mp = get_argument_parser()
+    log = logging.getLogger()
+    def dummy_log(*args, **kwargs):
+        pass
+    return pacbio_args_or_contract_runner(argv[1:],
+                                          mp,
+                                          args_runner,
+                                          resolved_tool_contract_runner,
+                                          log,
+                                          dummy_log)
 
 if __name__ == "__main__":
-    main()
+    sys.exit(main())
diff --git a/bin/variantCaller.py b/bin/variantCaller
similarity index 100%
rename from bin/variantCaller.py
rename to bin/variantCaller
diff --git a/doc/HowToQuiver.rst b/doc/HowToQuiver.rst
index 3ab76d7..136c3b9 100644
--- a/doc/HowToQuiver.rst
+++ b/doc/HowToQuiver.rst
@@ -66,22 +66,8 @@ If you are using an older version than SMRTportal/SMRTanalysis 1.3.3,
 please upgrade.
 
 
-Automatic installation instructions
------------------------------------
-If your system meets the installation requirements, you can perform an
-automatic installation of the PacBio software for Quiver by
-executing::
-
-    $ curl -L http://git.io/JR7TnQ | bash
-
-
-Manual installation instructions
---------------------------------
-If your SWIG or BOOST installations are in non-standard locations or
-you encounter a problem with the automatic installation script, you
-can follow these steps to install Quiver manually.
-
-
+Installation instructions
+-------------------------
 
 Step 1: Set up your Python environment
 ``````````````````````````````````````
@@ -95,18 +81,13 @@ and activate the virtualenv using ::
 
     $ source ~/VE-QUIVER/bin/activate
 
-There are some additional Python libraries required (NumPy and h5py),
-which can be installed via ::
-
-    $ pip install numpy==1.6.1
-    $ pip install h5py==2.0.1
-
 
 Step 2: Install PacBio libraries
 ````````````````````````````````
 To install the PacBio software, execute ::
 
-    $ pip install git+https://github.com/PacificBiosciences/pbcore
+    $ pip install pbcore
+
     $ git clone https://github.com/PacificBiosciences/ConsensusCore
     $ cd ConsensusCore; python setup.py install --swig=$SWIG --boost=$BOOST
     $ pip install git+https://github.com/PacificBiosciences/GenomicConsensus
@@ -126,20 +107,38 @@ virtualenv (from `GenomicConsensus`).
 
 For example, ::
 
-    $ quiver -j8 aligned_reads.cmp.h5           \
+    $ quiver -j8 aligned_reads.bam              \
     >        -r path/to/lambda.fasta            \
     >        -o variants.gff -o consensus.fasta
 
-will use 8 CPUs to run Quiver on ``aligned_reads.cmp.h5``, outputting
+will use 8 CPUs to run Quiver on ``aligned_reads.bam``, outputting
 the consensus sequence and variants.
 
-Note that if you have not used the `RS_Mapping_QVs` protocol to
-generate the cmp.h5 file---or if the source bas.h5 file was generated
-by pre-1.3.1 instrument software---the cmp.h5 will not contain the
-full battery of QV metrics required for optimal Quiver accuracy.  The
-command will still work, but it will give a warning that its accuracy
-will be suboptimal.
+Note that if you are using a cmp.h5 file and have not used the `RS_Mapping_QVs`
+protocol to generate that file---or if the source bas.h5 file was generated by
+pre-1.3.1 instrument software---the cmp.h5 will not contain the full battery of
+QV metrics required for optimal Quiver accuracy.  The command will still work,
+but it will give a warning that its accuracy will be suboptimal.
+
+Step 3.5: Run Quiver on Multiple Input Files
+````````````````````````````````````````````
+Multiple alignment files in a FOFN (File of File Names) can be quivered against
+a single reference (`GenomicConsensus` >= 1.1.0).
+
+An example input FOFN::
 
+    $ cat aligned_reads.fofn
+    /path/to/reads1.bam
+    /path/to/reads2.bam
+
+can be used instead of a reads file::
+
+    $ quiver -j8 aligned_reads.fofn             \
+    >        -r path/to/lambda.fasta            \
+    >        -o variants.gff -o consensus.fasta
+
+Quiver can also be used with DataSet XML files. See pbcore for details on
+generating new DataSet XML files for your alignment files.
 
 Step 4: Highly-accurate assembly consensus
 ``````````````````````````````````````````
@@ -148,22 +147,6 @@ approaching or even exceeding Q60 (one error per million bases).  If
 you use the HGAP assembly protocol in SMRTportal 2.0 or later, Quiver
 runs automatically as the final "assembly polishing" step.
 
-If you want to use Quiver to *manually* polish an assembly, you need to:
-
-- upload your draft assembly to SMRTportal as a new reference,
-- run the Resequencing protocol to call the consensus of your PacBio
-  reads as oriented by the draft assembly.  The variants output will
-  show the "corrections" made by Quiver, while the consensus
-  FASTA/FASTQ output contain the sequence and quality of the polished
-  assembly.
-
-
-Known issues
-------------
-There is a bug in the `multiprocessing` module in Python 2.7.2 and
-lower that causes the interpreter to crash during shutdown.  Use
-Python 2.7.3 or newer.
-
 
 Resources
 ---------
@@ -175,4 +158,4 @@ Methods* `HGAP paper`_
 
 
 .. _`FAQ document`: https://github.com/PacificBiosciences/GenomicConsensus/blob/master/doc/QuiverFAQ.rst
-.. _`HGAP paper`:
+.. _`HGAP paper`: http://www.nature.com/nmeth/journal/v10/n6/full/nmeth.2474.html
diff --git a/setup.py b/setup.py
index 8c0341c..28a734b 100755
--- a/setup.py
+++ b/setup.py
@@ -15,11 +15,10 @@ setup(
     author='Pacific Biosciences',
     author_email='devnet at pacificbiosciences.com',
     license=open('LICENSES').read(),
-    scripts = ['bin/variantCaller.py',
-               'bin/summarizeConsensus.py',
-               'bin/gffToVcf.py',
-               'bin/gffToBed.py',
-               'bin/makePbi.py',
+    scripts = ['bin/variantCaller',
+               'bin/summarizeConsensus',
+               'bin/gffToVcf',
+               'bin/gffToBed',
                'bin/plurality',
                'bin/quiver'],
     packages = find_packages(),
@@ -27,10 +26,10 @@ setup(
     include_package_data=True,
     zip_safe = False,
     install_requires=[
-        'pbcore >= 0.9.2',
+        'pbcore >= 1.2.2',
+        'pbcommand >= 0.2.0',
         'numpy >= 1.6.0',
         'h5py >= 2.0.1',
-        'ConsensusCore >= 0.9.1',
-        'pysam == 0.8.1'
+        'ConsensusCore >= 1.0.1',
         ]
     )
diff --git a/tests/cram/bad_input.t b/tests/cram/bad_input.t
new file mode 100644
index 0000000..58fdbe5
--- /dev/null
+++ b/tests/cram/bad_input.t
@@ -0,0 +1,5 @@
+
+Test for sane behavior in the presence of bogus arguments.
+
+  $ variantCaller fake.alignmentset.xml -r fake.referenceset.xml -o test.fasta 2>&1 | tail -1
+  variantCaller: error: Input file */fake.alignmentset.xml not found. (glob)
diff --git a/tests/cram/extra/convert-to-bed.t b/tests/cram/extra/convert-to-bed.t
index c128030..358abaa 100644
--- a/tests/cram/extra/convert-to-bed.t
+++ b/tests/cram/extra/convert-to-bed.t
@@ -4,8 +4,8 @@ Test conversion variants GFF -> BED.
   $ export DATA=$TESTDIR/../../data
   $ export INPUT=$DATA/converters/variants.gff.gz
 
-  $ gffToBed.py --name=variants \
-  >             --description="PacBio variant calls" \
+  $ gffToBed --name=variants \
+  >          --description="PacBio variant calls" \
   > variants $INPUT
   track name=variants description="PacBio variant calls" useScore=0
   gi|160367075|gb|CP000730.1| Staphylococcus aureus subsp. aureus USA300_TCH1516, complete genome\t701414\t701415\t701415del\t46.000\t. (esc)
diff --git a/tests/cram/extra/convert-to-vcf.t b/tests/cram/extra/convert-to-vcf.t
index b85eb4c..522ce33 100644
--- a/tests/cram/extra/convert-to-vcf.t
+++ b/tests/cram/extra/convert-to-vcf.t
@@ -4,7 +4,7 @@ Test conversion GFF -> VCF
   $ export DATA=$TESTDIR/../../data
   $ export INPUT=$DATA/converters/variants.gff.gz
 
-  $ gffToVcf.py --globalReference=Staphylococcus_aureus_USA300_TCH1516 $INPUT
+  $ gffToVcf --globalReference=Staphylococcus_aureus_USA300_TCH1516 $INPUT
   ##fileformat=VCFv3.3
   ##fileDate=* (glob)
   ##source=* (glob)
diff --git a/tests/cram/extra/coverage-bed.t b/tests/cram/extra/coverage-bed.t
index fab4c61..eaa4915 100644
--- a/tests/cram/extra/coverage-bed.t
+++ b/tests/cram/extra/coverage-bed.t
@@ -3,8 +3,8 @@ Test conversion of alignment summary GFF to coverage BED.
   $ export DATA=$TESTDIR/../../data
   $ export INPUT=$DATA/fluidigm_amplicons/alignment_summary.gff
 
-  $ gffToBed.py --name=coverage \
-  >             --description="PacBio coverage" \
+  $ gffToBed --name=coverage \
+  >          --description="PacBio coverage" \
   > coverage $INPUT > coverage.bed
   $ head -20 coverage.bed
   track name=coverage description="PacBio coverage" useScore=0
diff --git a/tests/cram/extra/plurality-compressed.t b/tests/cram/extra/plurality-compressed.t
index 79af7ac..3de32d4 100644
--- a/tests/cram/extra/plurality-compressed.t
+++ b/tests/cram/extra/plurality-compressed.t
@@ -4,7 +4,7 @@ output files are created correctly.
   $ export DATA=$TESTDIR/../../data
   $ export INPUT=$DATA/hcv/aligned_reads.cmp.h5
   $ export REFERENCE=$DATA/hcv/HCV_Ref_For_187140.fasta
-  $ variantCaller.py --algorithm=plurality -q 10 -r $REFERENCE -o variants.gff.gz -o consensus.fq.gz $INPUT
+  $ variantCaller --algorithm=plurality -q 10 -r $REFERENCE -o variants.gff.gz -o consensus.fq.gz $INPUT
 
 I like to show the head of the output files inline here so that glaringly obvious changes will
 pop right out, but I verify that the files are exactly correct by looking at the md5 sums.
diff --git a/tests/cram/extra/plurality-fluidigm.t b/tests/cram/extra/plurality-fluidigm.t
index 4437160..e664e95 100644
--- a/tests/cram/extra/plurality-fluidigm.t
+++ b/tests/cram/extra/plurality-fluidigm.t
@@ -7,7 +7,7 @@ Some tests of a "fluidigm amplicons" dataset
 
 Set the QV threshold to 10.
 
-  $ variantCaller.py --algorithm=plurality -r $REFERENCE -q 10 -o variants.gff -o consensus.csv -o consensus.fastq $INPUT
+  $ variantCaller --algorithm=plurality -r $REFERENCE -q 10 -o variants.gff -o consensus.csv -o consensus.fastq $INPUT
 
 There are two true SNVs (and one diploid SNV that we miss right now).
 
diff --git a/tests/cram/internal/alignment_summary.t b/tests/cram/internal/alignment_summary.t
index 336f2f7..c097cfc 100644
--- a/tests/cram/internal/alignment_summary.t
+++ b/tests/cram/internal/alignment_summary.t
@@ -1,11 +1,11 @@
 
-Test the (augmentation) of the alignment_summary.gff file by summarizeConsensus.py
+Test the (augmentation) of the alignment_summary.gff file by summarizeConsensus
 
   $ export DATA=/mnt/secondary/Share/Quiver/TestData/tinyLambda/
   $ export PATH=$TESTDIR/..:$PATH
   $ export VARIANTSGFF=$DATA/variants.gff.gz
   $ export ALIGNMENTSUMMARYGFF=$DATA/alignment_summary.gff
-  $ summarizeConsensus.py           \
+  $ summarizeConsensus              \
   >   --variantsGff $VARIANTSGFF    \
   >   $ALIGNMENTSUMMARYGFF          \
   >   -o alignment_summary.out.gff
diff --git a/tests/cram/internal/quiver-compatibility.t b/tests/cram/internal/quiver-compatibility.t
index 81db985..8e9ac39 100644
--- a/tests/cram/internal/quiver-compatibility.t
+++ b/tests/cram/internal/quiver-compatibility.t
@@ -22,12 +22,12 @@ Tiny lambda file.  Make sure it recognizes this cmp.h5 has an imcomplete set of
 
 It should handle the request of a parameter set by complete name:
 
-  $ quiver -v -p C2.NoQVsModel -r $REFERENCE -o variants.gff $INPUT 2>&1 | grep "Using Quiver parameter set"
+  $ quiver --verbose -p C2.NoQVsModel -r $REFERENCE -o variants.gff $INPUT 2>&1 | grep "Using Quiver parameter set"
   [INFO] Using Quiver parameter set(s): C2.NoQVsModel
 
 ... or by chemistry name:
 
-  $ quiver -v -p C2 -r $REFERENCE -o variants.gff $INPUT 2>&1 | grep "Using Quiver parameter set"
+  $ quiver --verbose -p C2 -r $REFERENCE -o variants.gff $INPUT 2>&1 | grep "Using Quiver parameter set"
   [INFO] Using Quiver parameter set(s): C2.NoQVsModel
 
 
diff --git a/tests/cram/internal/quiver-staph.t b/tests/cram/internal/quiver-staph.t
index deeaaaf..ece5239 100644
--- a/tests/cram/internal/quiver-staph.t
+++ b/tests/cram/internal/quiver-staph.t
@@ -1,32 +1,29 @@
 This input data is taken from the output of the mapping job in Pysiv:
 pysiv_jobs/jobs/BAMMapping/saureus_p6c4
 
-  $ export BAM=/mnt/secondary/Share/Quiver/TestData/staph/aligned_reads.bam
+# FIXME this file needs updating to the new BAM spec -Nat 2015-09-30
+#  $ export BAM=/mnt/secondary/Share/Quiver/TestData/staph/m140911_084715_42139_c100702390480000001823141103261514_s1_p0.aligned_subreads.bam
+  $ export BAM=/pbi/dept/secondary/siv/testdata/genomic_consensus-unittest/Quiver/staph/m140911_084715_42139_c100702390480000001823141103261514_s1_p0.aligned_subreads.bam
   $ export REFERENCE=/mnt/secondary/Share/Quiver/TestData/staph/S_aureus_USA300_TCH1516.fasta
+  $ export MASK=/mnt/secondary/Share/Quiver/GenomeMasks/S_aureus_USA300_TCH1516-mask.gff 
 
   $ quiver -j${JOBS-8} $BAM -r $REFERENCE -o variants.gff -o css.fasta -o css.fastq
-  [WARNING] 'fancyChunking' not yet available for BAM, disabling
 
 Inspect the variant calls.  The first variant call might be an error
 (follow up on this) but the latter is an error in the reference, it
 seems.
 
-  $ grep -v "#" variants.gff | sed 's/\t/ /g'
-  Staphylococcus_aureus_subsp_aureus_USA300_TCH1516 . deletion 1592553 1592553 . . . reference=T;variantSeq=.;coverage=100;confidence=50
-  Staphylococcus_aureus_subsp_aureus_USA300_TCH1516 . deletion 2179435 2179435 . . . reference=C;variantSeq=.;coverage=100;confidence=49
+  $ gffsubtract.pl variants.gff $MASK | grep -v "#" | sed 's/\t/ /g'
 
 One window is nocalled.  Follow up on this.
 
   $ fastacomposition css.fasta
-  css.fasta A 960146 C 470678 G 470218 T 971370 a 176 c 74 g 91 t 159
+  css.fasta A 960306 C 470724 G 470270 T 971459
 
-No gaps.
+One gap
 
   $ nucmer -mum $REFERENCE css.fasta 2>/dev/null
   $ show-diff -H out.delta | sed 's/\t/ /g'
+  Staphylococcus_aureus_subsp_aureus_USA300_TCH1516 GAP 2149110 2149328 219 67 152
 
-SNPs in consensus the same as called.
-
-  $ show-snps -H -C out.delta
-   1040006   A .   1040005   |   552550  1040005  |  1  1  Staphylococcus_aureus_subsp_aureus_USA300_TCH1516\tStaphylococcus_aureus_subsp_aureus_USA300_TCH1516|quiver (esc)
-   1592556   T .   1592554   |   552550  1280359  |  1  1  Staphylococcus_aureus_subsp_aureus_USA300_TCH1516\tStaphylococcus_aureus_subsp_aureus_USA300_TCH1516|quiver (esc)
+(Not showing the SNPs here as they just correspond to the masked-out region that we know doesn't match the reference)
diff --git a/tests/cram/makePbi.t.off b/tests/cram/makePbi.t.off
deleted file mode 100644
index d2e613a..0000000
--- a/tests/cram/makePbi.t.off
+++ /dev/null
@@ -1,284 +0,0 @@
-
-Run makePbi.py on the lil' BAM in pbcore, make sure output is as
-expected.  We have to copy it and the bam.bai locally because we can't
-write to the pbcore directory.
-
-  $ export REMOTE_BAM=`python -c "import pbcore.data as D; print D.getBamAndCmpH5()[0]"`
-  $ cp $REMOTE_BAM .
-  $ cp ${REMOTE_BAM}.bai .
-  $ export BAM=`echo *.bam`
-  $ export REF=`python -c "import pbcore.data as D; print D.getLambdaFasta()"`
-
-  $ makePbi.py $BAM $REF
-
-  $ h5dump *.bam.pbi
-  HDF5 "bam_mapping.bam.pbi" {
-  GROUP "/" {
-     GROUP "PacBioBamIndex" {
-        ATTRIBUTE "Version" {
-           DATATYPE  H5T_STRING {
-              STRSIZE 3;
-              STRPAD H5T_STR_NULLPAD;
-              CSET H5T_CSET_ASCII;
-              CTYPE H5T_C_S1;
-           }
-           DATASPACE  SCALAR
-           DATA {
-           (0): "0.1"
-           }
-        }
-        GROUP "Columns" {
-           DATASET "HoleNumber" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 49050, 32328, 32328, 6469, 6469, 30983, 13473, 13473, 19915,
-              (9): 30983, 19915, 7247, 7247, 38025, 13473, 36363, 36363, 31174,
-              (18): 31174, 38025, 50257, 50257, 14743, 14743, 39571, 39571,
-              (26): 39571, 32901, 24494, 24494, 24962, 14743, 14743, 42165,
-              (34): 35858, 35858, 42827, 35858, 42827, 32861, 32861, 32861,
-              (42): 32861, 32861, 32861, 12736, 12736, 38754, 32861, 32861,
-              (50): 45203, 2771, 7670, 2771, 36628, 45203, 49194, 49194, 47698,
-              (59): 47698, 47698, 44356, 19837, 50621, 50621, 20211, 20211,
-              (67): 32560, 32560, 32560, 7957, 7957, 26262, 52206, 6251, 6251,
-              (76): 6251, 6251, 6251, 6251, 6251, 6251, 6251, 6251, 54396,
-              (85): 16996, 16996, 46835, 46835, 49521, 46835, 16996, 1650,
-              (93): 1650, 37134, 37134, 37134, 37134, 37134, 37134, 37134,
-              (101): 37134, 37134, 37134, 37134, 37134, 37134, 37134, 51534,
-              (109): 29843, 23454, 23454, 19360, 15641, 49050
-              }
-           }
-           DATASET "MapQV" {
-              DATATYPE  H5T_STD_U8LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (12): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (24): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (36): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (48): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (60): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (72): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (84): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (96): 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
-              (108): 254, 254, 254, 254, 254, 254, 254
-              }
-           }
-           DATASET "ReadGroupID" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (5): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (10): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (15): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (20): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (25): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (30): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (35): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (40): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (45): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (50): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (55): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (60): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (65): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (70): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (75): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (80): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (85): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (90): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (95): 3771511568, 3771511568, 3771511568, 3771511568, 3771511568,
-              (100): 3771511568, 3771511568, 3771511568, 3771511568,
-              (104): 3771511568, 3771511568, 3771511568, 3771511568,
-              (108): 3771511568, 3771511568, 3771511568, 3771511568,
-              (112): 3771511568, 3771511568, 3771511568
-              }
-           }
-           DATASET "isReverseStrand" {
-              DATATYPE  H5T_STD_U8LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0,
-              (20): 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0,
-              (40): 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1,
-              (60): 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1,
-              (80): 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1,
-              (100): 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0
-              }
-           }
-           DATASET "nDel" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 11, 15, 12, 7, 11, 51, 38, 37, 29, 11, 13, 21, 20, 37, 5,
-              (15): 11, 9, 9, 16, 12, 15, 10, 41, 46, 13, 36, 26, 30, 11, 8,
-              (30): 12, 14, 13, 20, 36, 39, 32, 7, 30, 19, 14, 15, 8, 14, 10,
-              (45): 14, 0, 19, 6, 11, 4, 14, 24, 5, 9, 6, 19, 8, 5, 13, 5, 3,
-              (62): 1, 26, 16, 4, 3, 0, 26, 12, 1, 3, 16, 12, 6, 11, 9, 12, 5,
-              (79): 18, 13, 8, 10, 7, 21, 19, 12, 5, 15, 2, 4, 1, 25, 4, 10, 9,
-              (96): 16, 13, 8, 11, 10, 10, 7, 10, 7, 6, 8, 5, 2, 20, 10, 6, 3,
-              (113): 3, 13
-              }
-           }
-           DATASET "nIns" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 16, 37, 4, 9, 9, 46, 80, 56, 17, 10, 15, 16, 6, 63, 5, 5, 9,
-              (17): 6, 35, 46, 12, 13, 66, 67, 14, 72, 50, 9, 17, 9, 14, 25,
-              (32): 18, 21, 8, 23, 32, 11, 44, 26, 47, 35, 25, 40, 40, 26, 5,
-              (47): 19, 18, 25, 19, 22, 8, 2, 10, 28, 32, 33, 8, 10, 0, 19, 5,
-              (63): 17, 6, 3, 1, 2, 67, 27, 0, 2, 25, 11, 26, 25, 30, 32, 35,
-              (79): 23, 27, 21, 27, 6, 28, 25, 12, 7, 31, 16, 10, 1, 44, 15,
-              (94): 26, 27, 13, 14, 16, 8, 19, 17, 17, 17, 15, 28, 26, 14, 10,
-              (109): 41, 25, 16, 16, 11, 29
-              }
-           }
-           DATASET "nM" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 460, 699, 337, 151, 216, 1385, 1578, 1313, 580, 341, 359,
-              (11): 448, 233, 1574, 185, 281, 236, 181, 990, 419, 378, 341,
-              (22): 1405, 1398, 430, 1345, 1272, 399, 323, 173, 380, 582, 383,
-              (33): 636, 512, 954, 1019, 217, 937, 737, 738, 741, 748, 739,
-              (44): 741, 614, 74, 803, 454, 403, 325, 514, 269, 131, 210, 492,
-              (56): 755, 661, 222, 633, 146, 411, 71, 594, 258, 115, 62, 67,
-              (68): 1412, 656, 53, 56, 516, 366, 596, 591, 589, 586, 594, 582,
-              (80): 585, 593, 451, 130, 692, 535, 402, 129, 601, 113, 217, 88,
-              (92): 1046, 219, 432, 427, 425, 427, 432, 430, 431, 427, 425,
-              (103): 429, 427, 429, 428, 288, 189, 701, 281, 424, 354, 96, 569
-              }
-           }
-           DATASET "nMM" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 0, 3, 2, 0, 0, 7, 3, 3, 2, 1, 1, 8, 0, 2, 2, 1, 1, 0, 4, 3,
-              (20): 4, 3, 3, 4, 0, 3, 3, 0, 1, 0, 4, 1, 0, 3, 0, 1, 2, 0, 1, 0,
-              (40): 3, 0, 0, 2, 0, 2, 0, 4, 1, 2, 0, 2, 1, 0, 0, 0, 4, 2, 1, 1,
-              (60): 0, 0, 0, 4, 0, 1, 1, 0, 3, 0, 0, 0, 1, 0, 1, 0, 1, 2, 1, 2,
-              (80): 2, 0, 0, 1, 5, 1, 0, 0, 0, 1, 1, 0, 6, 1, 0, 0, 0, 0, 1, 0,
-              (100): 0, 0, 1, 0, 1, 1, 0, 1, 0, 5, 1, 1, 1, 3, 2
-              }
-           }
-           DATASET "rEnd" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 1129, 1134, 344, 10394, 10185, 8906, 7235, 8657, 1040, 7418,
-              (10): 382, 7820, 7290, 7894, 5505, 1194, 853, 1262, 1029, 6211,
-              (20): 7351, 6903, 4055, 5571, 445, 1918, 3290, 11588, 570, 185,
-              (30): 407, 6227, 2531, 661, 1822, 1259, 1053, 228, 2107, 2064,
-              (40): 3718, 1255, 2884, 4549, 5389, 763, 79, 827, 5908, 431, 357,
-              (51): 9480, 5699, 8874, 1925, 888, 798, 1541, 10609, 10332, 9628,
-              (61): 4803, 5653, 3208, 3516, 9443, 9552, 2356, 2213, 684, 9734,
-              (71): 9619, 546, 380, 3818, 1808, 4484, 1145, 2488, 5139, 3146,
-              (81): 5805, 479, 5993, 733, 11857, 11253, 4868, 4684, 134, 4005,
-              (91): 11992, 3296, 2155, 1318, 3325, 5293, 6271, 1816, 3812,
-              (100): 4805, 5785, 6766, 4306, 2823, 2330, 808, 308, 200, 756,
-              (110): 2211, 2705, 371, 3562, 653
-              }
-           }
-           DATASET "rStart" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 653, 395, 1, 10234, 9960, 7468, 5574, 7285, 441, 7066, 7,
-              (11): 7348, 7051, 6255, 5313, 907, 607, 1075, 0, 5743, 6957,
-              (21): 6546, 2581, 4102, 1, 498, 1965, 11180, 229, 3, 9, 5619,
-              (32): 2130, 1, 1302, 281, 0, 0, 1125, 1301, 2930, 479, 2111,
-              (43): 3768, 4608, 121, 0, 1, 5435, 1, 13, 8942, 5421, 8741, 1705,
-              (55): 368, 7, 845, 10378, 9688, 9482, 4373, 5577, 2593, 3252,
-              (65): 9324, 9488, 2287, 731, 1, 9681, 9561, 4, 3, 3195, 1192,
-              (76): 3864, 525, 1858, 4532, 2532, 5191, 1, 5856, 8, 11296,
-              (86): 10839, 4732, 4052, 4, 3777, 11903, 2200, 1920, 860, 2871,
-              (96): 4855, 5830, 1367, 3374, 4355, 5341, 6323, 3860, 2380, 1872,
-              (106): 354, 5, 1, 9, 1904, 2264, 0, 3452, 53
-              }
-           }
-           DATASET "tEnd" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 471, 1019, 1026, 2326, 2397, 5015, 6125, 5860, 5203, 5011,
-              (10): 5215, 5380, 5388, 7039, 6134, 6550, 6557, 6676, 7496, 7039,
-              (20): 7338, 7352, 9902, 9903, 8962, 9909, 9909, 9160, 9316, 9162,
-              (30): 9631, 9900, 9903, 10847, 10830, 11277, 12062, 11286, 12083,
-              (39): 14130, 14129, 14130, 14130, 14131, 14130, 14039, 13483,
-              (47): 14494, 14130, 14130, 14480, 16490, 16359, 16477, 18449,
-              (55): 18989, 20659, 20659, 21954, 22375, 22386, 25815, 25627,
-              (63): 28077, 28077, 28300, 28300, 28865, 30239, 30238, 29805,
-              (71): 29823, 32290, 33539, 33947, 33947, 33944, 33945, 33945,
-              (79): 33947, 33946, 33947, 33947, 33945, 34651, 34524, 34384,
-              (87): 34170, 34653, 34455, 34653, 34524, 41729, 40876, 41668,
-              (95): 41662, 41667, 41666, 41667, 41667, 41667, 41663, 41659,
-              (103): 41667, 41663, 41667, 41667, 41667, 42245, 43431, 43707,
-              (111): 43852, 46021, 47300, 48502
-              }
-           }
-           DATASET "tId" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-              (20): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-              (40): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-              (60): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-              (80): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-              (100): 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-              }
-           }
-           DATASET "tStart" {
-              DATATYPE  H5T_STD_U32LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 0, 302, 675, 2168, 2170, 3572, 4506, 4507, 4592, 4658, 4842,
-              (11): 4903, 5135, 5426, 5942, 6257, 6311, 6486, 6486, 6605, 6941,
-              (21): 6998, 8453, 8455, 8519, 8525, 8608, 8731, 8981, 8981, 9235,
-              (31): 9303, 9507, 10188, 10282, 10283, 11009, 11062, 11115,
-              (39): 13374, 13374, 13374, 13374, 13376, 13379, 13409, 13409,
-              (47): 13668, 13669, 13714, 14151, 15960, 16065, 16341, 18230,
-              (55): 18491, 19881, 19988, 21726, 21728, 22235, 25401, 25555,
-              (63): 27453, 27803, 28180, 28234, 28798, 28798, 29570, 29751,
-              (71): 29764, 31757, 33161, 33344, 33345, 33345, 33345, 33345,
-              (79): 33345, 33346, 33346, 33486, 33807, 33933, 33969, 33970,
-              (87): 34036, 34037, 34339, 34431, 34435, 40652, 40652, 41226,
-              (95): 41226, 41226, 41226, 41226, 41226, 41226, 41226, 41226,
-              (103): 41228, 41228, 41231, 41231, 41373, 42054, 42705, 43415,
-              (111): 43421, 45663, 47198, 47918
-              }
-           }
-           DATASET "virtualFileOffset" {
-              DATATYPE  H5T_STD_U64LE
-              DATASPACE  SIMPLE { ( 115 ) / ( 115 ) }
-              DATA {
-              (0): 37289984, 37297464, 37302901, 37305464, 37306835, 37308840,
-              (6): 37319177, 37331170, 37340990, 37345579, 37348282, 37351180,
-              (12): 1400766464, 1400768474, 1400780090, 1400781672, 1400783950,
-              (17): 1400785921, 1400787532, 1400794809, 1400798441, 1400801586,
-              (22): 1400804273, 1400814861, 1400825478, 2693922816, 2693933061,
-              (27): 2693942476, 2693945620, 2693948306, 2693949849, 2693953028,
-              (32): 2693957573, 2693960750, 2693965565, 2693969455, 2693976553,
-              (37): 2693984256, 4005036032, 4005043189, 4005048711, 4005054536,
-              (42): 4005060173, 4005065686, 4005071343, 4005077071, 4005081765,
-              (47): 4005082502, 4005088435, 4005091906, 5185470464, 5185476664,
-              (52): 5185480882, 5185483212, 5185484548, 5185486594, 5185492867,
-              (57): 5185498641, 5185503674, 5185505475, 5185510163, 5185511365,
-              (62): 5185514585, 5185516413, 5185520969, 5185523099, 5185524146,
-              (67): 5185524807, 6259802112, 6259812658, 6259817624, 6259818185,
-              (72): 6259818965, 6259822993, 6259825854, 6259830337, 6259834829,
-              (77): 6259839381, 6259843935, 6259848543, 6259852994, 6259857544,
-              (82): 6259861981, 6259865591, 7562395648, 7562401014, 7562405209,
-              (87): 7562408308, 7562409500, 7562414175, 7562415337, 7562417257,
-              (92): 7562418063, 7562425937, 7562427821, 7562431267, 7562434728,
-              (97): 7562438021, 7562441318, 7562444627, 7562447840, 7562451195,
-              (102): 7562454497, 8838053888, 8838057225, 8838060537,
-              (106): 8838064023, 8838067483, 8838069828, 8838071554,
-              (110): 8838077140, 8838079550, 8838082848, 8838085829, 8838086898
-              }
-           }
-        }
-     }
-  }
-  }
diff --git a/tests/cram/plurality-hcv.t b/tests/cram/plurality-hcv.t
index d674274..afc0f3e 100644
--- a/tests/cram/plurality-hcv.t
+++ b/tests/cram/plurality-hcv.t
@@ -6,7 +6,7 @@ CSV output is correct.
   $ export DATA=$TESTDIR/../data
   $ export INPUT=$DATA/hcv/aligned_reads.cmp.h5
   $ export REFERENCE=$DATA/hcv/HCV_Ref_For_187140.fasta
-  $ variantCaller.py --algorithm=plurality -q 10 -r $REFERENCE -o variants.gff -o consensus.fq $INPUT
+  $ variantCaller --algorithm=plurality -q 10 -r $REFERENCE -o variants.gff -o consensus.fq $INPUT
 
 I like to show the head of the output files inline here so that glaringly obvious changes will
 pop right out, but I verify that the files are exactly correct by looking at the md5 sums.
diff --git a/tests/cram/version.t b/tests/cram/version.t
index 5d58c75..7233c86 100644
--- a/tests/cram/version.t
+++ b/tests/cram/version.t
@@ -1,8 +1,9 @@
 This actually failed once because of a missing import, so we might as
 well test it.
 
-  $ variantCaller.py --version
-    GenomicConsensus version: * (glob)
-    ConsensusCore version: * (glob)
-    h5py version: * (glob)
-    hdf5 version: * (glob)
+  $ variantCaller --version
+  1.1.0
+
+This will break if the parser setup is messed up.
+
+  $ variantCaller --help >/dev/null
diff --git a/tests/unit/test_tool_contract.py b/tests/unit/test_tool_contract.py
new file mode 100644
index 0000000..6764dd1
--- /dev/null
+++ b/tests/unit/test_tool_contract.py
@@ -0,0 +1,82 @@
+
+import unittest
+import os.path
+
+from pbcore.io import openDataSet, ContigSet
+import pbcommand.testkit
+
+# XXX local data directory, absolutely required
+DATA_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
+assert os.path.isdir(DATA_DIR)
+
+# optional (but required for TestSummarizeConsensus)
+DATA_DIR_2 = "/mnt/secondary/Share/Quiver/TestData/tinyLambda/"
+
+ at unittest.skipUnless(os.path.isdir("/pbi/dept/secondary/siv/testdata"),
+                     "Missing /pbi/dept/secondary/siv/testdata")
+class TestQuiver(pbcommand.testkit.PbTestApp):
+    DRIVER_BASE = "variantCaller "
+    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
+    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
+    REQUIRES_PBCORE = True
+    INPUT_FILES = [
+        "/pbi/dept/secondary/siv/testdata/SA3-DS/lambda/2372215/0007_tiny/Alignment_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.alignmentset.xml",
+        "/pbi/dept/secondary/siv/references/lambdaNEB/sequence/lambdaNEB.fasta"
+    ]
+    TASK_OPTIONS = {
+      "genomic_consensus.task_options.min_coverage": 0,
+      "genomic_consensus.task_options.min_confidence": 0,
+      "genomic_consensus.task_options.algorithm": "quiver",
+      "genomic_consensus.task_options.diploid": False,
+    }
+
+    def run_after(self, rtc, output_dir):
+        contigs_file = rtc.task.output_files[1]
+        with openDataSet(contigs_file, strict=True) as ds:
+            self.assertTrue(isinstance(ds, ContigSet))
+
+
+class TestGffToBed(pbcommand.testkit.PbTestApp):
+    DRIVER_BASE = "gffToBed "
+    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
+    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
+    REQUIRES_PBCORE = True
+    INPUT_FILES = [
+        os.path.join(DATA_DIR, "converters", "variants.gff.gz"),
+    ]
+    TASK_OPTIONS = {
+        "genomic_consensus.task_options.gff2bed_purpose": "variants",
+        "genomic_consensus.task_options.track_name": "None",
+        "genomic_consensus.task_options.track_description": "None",
+        "genomic_consensus.task_options.use_score": 0,
+    }
+
+
+class TestGffToVcf(pbcommand.testkit.PbTestApp):
+    DRIVER_BASE = "gffToVcf"
+    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
+    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
+    REQUIRES_PBCORE = True
+    INPUT_FILES = [
+        os.path.join(DATA_DIR, "converters", "variants.gff.gz"),
+    ]
+    TASK_OPTIONS = {
+        "genomic_consensus.task_options.global_reference": "Staphylococcus_aureus_USA300_TCH1516",
+    }
+
+
+ at unittest.skipUnless(os.path.isdir(DATA_DIR_2), "Missing %s" % DATA_DIR_2)
+class TestSummarizeConsensus(pbcommand.testkit.PbTestApp):
+    DRIVER_BASE = "summarizeConsensus"
+    DRIVER_EMIT = DRIVER_BASE + " --emit-tool-contract "
+    DRIVER_RESOLVE = DRIVER_BASE + " --resolved-tool-contract "
+    REQUIRES_PBCORE = True
+    INPUT_FILES = [
+        os.path.join(DATA_DIR_2, "alignment_summary.gff"),
+        os.path.join(DATA_DIR_2, "variants.gff.gz")
+    ]
+    TASK_OPTIONS = {}
+
+
+if __name__ == "__main__":
+    unittest.main()

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbgenomicconsensus.git