[med-svn] [python-pbcore] 01/02: Imported Upstream version 1.0.0

Afif Elghraoui afif-guest at moszumanska.debian.org
Sun Jun 7 10:13:48 UTC 2015


This is an automated email from the git hooks/post-receive script.

afif-guest pushed a commit to branch master
in repository python-pbcore.

commit 25f1d31c7ca61c4685718d91da161c227bdedbfa
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Sat Jun 6 23:34:11 2015 -0700

    Imported Upstream version 1.0.0
---
 .gitignore                                         |    7 +
 CHANGELOG.org                                      |   45 +
 LICENSES.txt                                       |   32 +
 Makefile                                           |   52 +
 README.md                                          |   19 +
 doc/Makefile                                       |  156 +++
 doc/conf.py                                        |  253 ++++
 doc/index.rst                                      |   24 +
 doc/modules.rst                                    |    7 +
 doc/pacbio-theme/static/headerGradient.jpg         |  Bin 0 -> 7099 bytes
 doc/pacbio-theme/static/pacbio.css                 |  238 ++++
 doc/pacbio-theme/static/pacbioLogo.png             |  Bin 0 -> 3128 bytes
 doc/pacbio-theme/static/pygments.css               |   55 +
 doc/pacbio-theme/theme.conf                        |    4 +
 doc/pbcore.chemistry.rst                           |   22 +
 doc/pbcore.data.rst                                |   10 +
 doc/pbcore.deprecated.rst                          |   30 +
 doc/pbcore.io.rst                                  |  182 +++
 doc/pbcore.model.rst                               |    2 +
 doc/pbcore.rst                                     |   22 +
 doc/pbcore.util.rst                                |   18 +
 pbcore/__init__.py                                 |   31 +
 pbcore/chemistry/__init__.py                       |   31 +
 pbcore/chemistry/chemistry.py                      |   91 ++
 pbcore/chemistry/resources/mapping.xml             |  154 +++
 pbcore/data/1.4_bas_files.fofn                     |    2 +
 pbcore/data/2.0_bax_files.fofn                     |    3 +
 pbcore/data/2.1_bax_files.fofn                     |    3 +
 pbcore/data/2.1_ccs_files.fofn                     |    1 +
 pbcore/data/2.3_bax_files.fofn                     |    3 +
 pbcore/data/Fluidigm_human_amplicons.fasta         |  250 ++++
 pbcore/data/Fluidigm_human_amplicons.fasta.fai     |   48 +
 pbcore/data/Fluidigm_human_amplicons_tiny.fasta    |   19 +
 pbcore/data/__init__.py                            |  167 +++
 pbcore/data/aligned_reads_1.bam                    |  Bin 0 -> 34798 bytes
 pbcore/data/aligned_reads_1.bam.bai                |  Bin 0 -> 160 bytes
 pbcore/data/aligned_reads_1.cmp.h5                 |  Bin 0 -> 263540 bytes
 pbcore/data/bam_mapping.bam                        |  Bin 0 -> 172060 bytes
 pbcore/data/bam_mapping.bam.bai                    |  Bin 0 -> 112 bytes
 pbcore/data/bam_mapping.bam.pbi                    |  Bin 0 -> 41344 bytes
 pbcore/data/barcodes-ed65-450.fasta                |  900 ++++++++++++++
 pbcore/data/barcodes-ed65-450.fasta.fai            |  450 +++++++
 pbcore/data/bc_files.fofn                          |    3 +
 pbcore/data/blasr-output.m4                        |    2 +
 pbcore/data/blasr-output.m5                        |    2 +
 pbcore/data/cmph5_mapping.cmp.h5                   |  Bin 0 -> 236542 bytes
 pbcore/data/lambdaNEB.fa                           |  608 ++++++++++
 pbcore/data/lambdaNEB.fa.fai                       |    1 +
 ...c100129202555500000315043109121112_s1_p0.bas.h5 |  Bin 0 -> 1159590 bytes
 ...c100129202555500000315043109121112_s2_p0.bas.h5 |  Bin 0 -> 984538 bytes
 ...00497142550000001823078008081323_s1_p0.1.bax.h5 |  Bin 0 -> 485799 bytes
 ...00497142550000001823078008081323_s1_p0.2.bax.h5 |  Bin 0 -> 715572 bytes
 ...00497142550000001823078008081323_s1_p0.3.bax.h5 |  Bin 0 -> 739046 bytes
 ...c100497142550000001823078008081323_s1_p0.bas.h5 |  Bin 0 -> 260202 bytes
 ...00569412550000001823090301191423_s1_p0.1.ccs.h5 |  Bin 0 -> 487912 bytes
 ...00564662550000001823085912221321_s1_p0.1.bax.h5 |  Bin 0 -> 501435 bytes
 ...00564662550000001823085912221321_s1_p0.1.rgn.h5 |  Bin 0 -> 17000 bytes
 ...00564662550000001823085912221321_s1_p0.2.bax.h5 |  Bin 0 -> 406010 bytes
 ...00564662550000001823085912221321_s1_p0.2.rgn.h5 |  Bin 0 -> 17000 bytes
 ...00564662550000001823085912221321_s1_p0.3.bax.h5 |  Bin 0 -> 588082 bytes
 ...00564662550000001823085912221321_s1_p0.3.rgn.h5 |  Bin 0 -> 17060 bytes
 ...c100564662550000001823085912221321_s1_p0.bas.h5 |  Bin 0 -> 260202 bytes
 ...100626172550000001823119008061414_s1_p0.1.bc.h5 |  Bin 0 -> 62704 bytes
 ...100626172550000001823119008061414_s1_p0.2.bc.h5 |  Bin 0 -> 45120 bytes
 ...100626172550000001823119008061414_s1_p0.3.bc.h5 |  Bin 0 -> 27920 bytes
 ...00564852550000001823085912221377_s1_X0.1.bax.h5 |  Bin 0 -> 1284247 bytes
 ...4852550000001823085912221377_s1_X0.subreads.bam |  Bin 0 -> 202934 bytes
 ...00702482550000001823141103261590_s1_p0.1.bax.h5 |  Bin 0 -> 856704 bytes
 ...00702482550000001823141103261590_s1_p0.2.bax.h5 |  Bin 0 -> 404288 bytes
 ...00702482550000001823141103261590_s1_p0.3.bax.h5 |  Bin 0 -> 610688 bytes
 ...c100702482550000001823141103261590_s1_p0.bas.h5 |  Bin 0 -> 1318480 bytes
 pbcore/data/variants.gff                           |   11 +
 pbcore/io/BarcodeH5Reader.py                       |  374 ++++++
 pbcore/io/BasH5IO.py                               | 1026 ++++++++++++++++
 pbcore/io/FastaIO.py                               |  459 +++++++
 pbcore/io/FastqIO.py                               |  259 ++++
 pbcore/io/FofnIO.py                                |   96 ++
 pbcore/io/GffIO.py                                 |  233 ++++
 pbcore/io/__init__.py                              |   40 +
 pbcore/io/_utils.py                                |  246 ++++
 pbcore/io/align/BamAlignment.py                    |  571 +++++++++
 pbcore/io/align/BamIO.py                           |  394 ++++++
 pbcore/io/align/BlasrIO.py                         |  116 ++
 pbcore/io/align/CmpH5IO.py                         | 1277 ++++++++++++++++++++
 pbcore/io/align/PacBioBamIndex.py                  |  121 ++
 pbcore/io/align/_AlignmentMixin.py                 |  210 ++++
 pbcore/io/align/_BamSupport.py                     |  127 ++
 pbcore/io/align/__init__.py                        |   34 +
 pbcore/io/base.py                                  |  109 ++
 pbcore/io/opener.py                                |  134 ++
 pbcore/io/rangeQueries.py                          |  182 +++
 pbcore/model/__init__.py                           |   29 +
 pbcore/sequence.py                                 |   62 +
 pbcore/util/Process.py                             |   68 ++
 pbcore/util/ToolRunner.py                          |  115 ++
 pbcore/util/__init__.py                            |   29 +
 pbcore/util/decorators.py                          |   17 +
 setup.py                                           |   32 +
 tests/test_pbcore_data.py                          |   12 +
 tests/test_pbcore_io_AlnFileReaders.py             |  375 ++++++
 tests/test_pbcore_io_BarcodeH5Reader.py            |  141 +++
 tests/test_pbcore_io_BasH5Collection.py            |   28 +
 tests/test_pbcore_io_BasH5Reader.py                |  494 ++++++++
 tests/test_pbcore_io_BlasrIO.py                    |   10 +
 tests/test_pbcore_io_FastaIO.py                    |  133 ++
 tests/test_pbcore_io_FastaTable.py                 |   80 ++
 tests/test_pbcore_io_FastqIO.py                    |  183 +++
 tests/test_pbcore_io_FofnIO.py                     |   22 +
 tests/test_pbcore_io_GffIO.py                      |  100 ++
 tests/test_pbcore_io_rangeQueries.py               |   71 ++
 tests/test_pbcore_io_unaligned_bam.py              |   68 ++
 tests/test_pbcore_util_sequences.py                |   48 +
 112 files changed, 12083 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a0638ec
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
+build
+dist
+doc/_build
+*~
+*.pyc
+*.egg-info
+nosetests.xml
\ No newline at end of file
diff --git a/CHANGELOG.org b/CHANGELOG.org
new file mode 100644
index 0000000..2764237
--- /dev/null
+++ b/CHANGELOG.org
@@ -0,0 +1,45 @@
+* Version 1.0.0
+- BAM fixes
+- Better FOFN handling
+- Recognize additional part numbers for P6 chemistry
+- Better --debug in ToolRunner
+
+* Version 0.9.5
+- BAM fixes
+- Adopt aStart, aEnd conventions from BAM spec, replacing rStart, rEnd
+- AlnIndex sharing among CmpH5Reader objects
+
+* Version 0.9.4
+- BAM support moved to 3.0 spec; support for earlier PacBio BAMs
+  dropped
+- Deprecation warning decorators added
+- BAM-incompatible cmp.h5 accessors deprecated
+- Moved to support 3.0 FASTA conventions in Fasta readers
+- Rename FastaTable to IndexedFastaReader
+
+* Version 0.9.3
+- ".open" script added for convenience
+- openers added (factory methods invoking the appropriate Reader
+  class; useful for applications that want to transparently use either
+  BAM or cmp.h5)
+
+* Version 0.9.2
+- BAM support: Addition of BamReader, IndexedBamReader, and BamAlignment
+- Minor CmpH5Reader API changes for greater compatibility with
+  BamReader (deprecation of movieInfo in favor of readGroupInfo)
+- Removed unused components from CmpH5Reader API
+- Add example BAM file
+- Length accessors for FAST[AQ] record types
+
+* Version 0.9.1 (SMRTanalysis 2.3.0p1)
+- FASTA header parsing into "id" and "metadata" now available in the
+  FastaRecord types
+
+* Version 0.9.0 (SMRTanalysis 2.3.0)
+- pbcore.chemistry: a new subpackage for decoding barcode information
+  to the human-readable chemistry name
+- BasH5Reader: more robust handling of broken region tables
+- CmpH5Reader: loading an empty cmp.h5 will raise an EmptyCmpH5Error.
+  This is because the semantics of an empty cmp.h5 were never defined,
+  and for example it is not defined whether or not a cmp.h5 lacking a
+  movie table is compliant.
diff --git a/LICENSES.txt b/LICENSES.txt
new file mode 100644
index 0000000..5360bf2
--- /dev/null
+++ b/LICENSES.txt
@@ -0,0 +1,32 @@
+Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+* Neither the name of Pacific Biosciences nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE
+GRANTED BY THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC
+BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..64e10c7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,52 @@
+.PHONY: clean doc doc-clean tests check test install build bdist gh-pages
+
+build:
+	python setup.py build
+
+bdist:
+	python setup.py build --executable="/usr/bin/env python"
+	python setup.py bdist --formats=egg
+
+install:
+	python setup.py install
+
+clean: doc-clean
+	rm -rf build/;\
+	find . -name "*.egg-info" | xargs rm -rf;\
+	rm -rf dist/;\
+	find . -name "*.pyc" | xargs rm -f;
+	rm -f nosetests.xml
+
+doc:
+	sphinx-apidoc -o doc/ pbcore/ && cd doc/ && make html
+doc-clean:
+	cd doc && rm -rf _templates _static _build searchindex.js objects.inv
+
+doctest:
+	cd doc && make doctest
+
+unit-test:
+	nosetests --with-xunit tests -v
+
+test: doctest unit-test
+
+tests: test
+check: test
+
+GH_PAGES_SOURCES = pbcore doc
+
+gh-pages:
+	git checkout gh-pages
+	rm -rf _static _sources *.js *.html *.inv
+	git checkout master $(GH_PAGES_SOURCES)
+	cd doc && make html
+	mv -fv doc/_build/html/* .
+	rm -rf $(GH_PAGES_SOURCES)
+	git add --all && git commit -m "Automatic update of gh-pages branch" && git checkout master
+
+pip-install:
+	@which pip > /dev/null
+	@pip freeze|grep 'pbcore=='>/dev/null \
+      && pip uninstall -y pbcore \
+      || echo -n ''
+	@pip install --no-index ./
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..4ffa878
--- /dev/null
+++ b/README.md
@@ -0,0 +1,19 @@
+
+The pbcore package provides Python APIs for interacting with PacBio
+data files and writing bioinformatics applications.
+
+Installation:
+-------------
+    % pip install numpy
+    % pip install h5py
+    % python setup.py install
+
+Requirements:
+-------------
+- Python 2.7
+- h5py >= 2.0
+- numpy >= 1.6.0
+
+Documentation:
+--------------
+http://pacificbiosciences.github.io/pbcore/
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 0000000..e367d03
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,156 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext api-doc
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+
+api-doc:
+	sphinx-apidoc -o . -d 4  ../pbcore/
+
+html: api-doc
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pbcore.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pbcore.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/pbcore"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pbcore"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100755
index 0000000..6f0945d
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,253 @@
+# -*- coding: utf-8 -*-
+#
+# pbcore documentation build configuration file, created by
+# sphinx-quickstart on Thu Nov 10 14:37:34 2011.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+globals = {}
+execfile("../pbcore/__init__.py", globals)
+__VERSION__ = globals["__VERSION__"]
+
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc',
+              'sphinx.ext.intersphinx',
+              'sphinx.ext.todo',
+              'sphinx.ext.doctest']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'pbcore'
+copyright = u'2011-2015, Pacific Biosciences'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = __VERSION__
+# The full version, including alpha/beta/rc tags.
+release = __VERSION__
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'pacbio-theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+html_theme_path = ["../../../../doc/theme/","./"]
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+#html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'pbcoredoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'pbcore.tex', u'pbcore Documentation',
+   u'devnet at pacificbiosciences.com', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'pbcore', u'pbcore Documentation',
+     [u'devnet at pacificbiosciences.com'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output ------------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'pbcore', u'pbcore Documentation', u'devnet at pacificbiosciences.com',
+   'pbcore', 'One line description of project.', 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {'http://docs.python.org/': None}
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000..1a4889a
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,24 @@
+pbcore
+######
+
+The `pbcore` package provides Python modules for processing PacBio
+data files and building PacBio bioinformatics applications.
+
+
+Library API documentation
+=========================
+
+    :doc:`pbcore.io`: Classes for reading/writing PacBio data formats and essential common data formats
+
+    :doc:`pbcore.model`: Common base classes
+
+    :doc:`pbcore.util`: Utilities for building bioinformatics applications
+
+    :doc:`pbcore.data`: Small bundled data files that are handy for testing and debugging
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/doc/modules.rst b/doc/modules.rst
new file mode 100644
index 0000000..e7de000
--- /dev/null
+++ b/doc/modules.rst
@@ -0,0 +1,7 @@
+pbcore
+======
+
+.. toctree::
+   :maxdepth: 4
+
+   pbcore
diff --git a/doc/pacbio-theme/static/headerGradient.jpg b/doc/pacbio-theme/static/headerGradient.jpg
new file mode 100644
index 0000000..883f147
Binary files /dev/null and b/doc/pacbio-theme/static/headerGradient.jpg differ
diff --git a/doc/pacbio-theme/static/pacbio.css b/doc/pacbio-theme/static/pacbio.css
new file mode 100644
index 0000000..b4ab87f
--- /dev/null
+++ b/doc/pacbio-theme/static/pacbio.css
@@ -0,0 +1,238 @@
+/**
+ * Sphinx stylesheet -- default theme
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+ 
+ at import url("basic.css");
+ 
+/* -- page layout ----------------------------------------------------------- */
+ 
+body {
+    font-family: Arial, sans-serif;
+    font-size: 100%;
+    background-color: #555;
+    color: #555;
+    margin: 0;
+    padding: 0;
+    min-width: 500px;
+    max-width: 956px;
+    margin: 0 auto;
+}
+
+div.documentwrapper {
+    float: left;
+    width: 100%;
+}
+
+div.bodywrapper {
+    margin: 0 0 0 230px;
+}
+
+hr{
+    border: 1px solid #B1B4B6;
+    
+}
+ 
+div.document {
+    background-color: #eee;
+}
+ 
+div.body {
+    background-color: #ffffff;
+    color: #3E4349;
+    padding: 30px 30px 30px 30px;
+    font-size: 0.8em;
+}
+ 
+div.footer {
+    color: #555;
+	background-color: #fff;
+    padding: 13px 0;
+    text-align: center;
+    font-size: 75%;
+
+}
+div.footer a {
+    color: #444;
+    text-decoration: underline;
+}
+ 
+div.related {
+    background: #fff url(headerGradient.jpg);
+    line-height: 80px;
+    color: #fff;
+    font-size: 0.80em;
+    height: 79px;
+    z-index: -1;
+}
+
+div.related ul {
+    background: url(pacbioLogo.png) 10px no-repeat;
+    padding: 0 0 0 200px;
+}
+ 
+div.related a {
+    color: #E2F3CC;
+}
+ 
+div.sphinxsidebar {
+    font-size: 0.75em;
+    line-height: 1.5em;
+}
+
+div.sphinxsidebarwrapper{
+    padding: 20px 0;
+}
+ 
+div.sphinxsidebar h3,
+div.sphinxsidebar h4 {
+    font-family: Arial, sans-serif;
+    color: #222;
+    font-size: 1.2em;
+    font-weight: bold;
+    margin: 0;
+    padding: 5px 10px 0 10px;
+}
+
+div.sphinxsidebar h4{
+    font-size: 1.1em;
+}
+ 
+div.sphinxsidebar h3 a {
+    color: #444;
+}
+ 
+ 
+div.sphinxsidebar p {
+    color: #888;
+    padding: 0px 20px;
+	margin-top: 5px;
+}
+ 
+div.sphinxsidebar p.topless {
+}
+ 
+div.sphinxsidebar ul {
+    margin: 5px 20px 10px 20px;
+    padding: 0;
+    color: #000;
+}
+ 
+div.sphinxsidebar a {
+    color: #444;
+}
+ 
+div.sphinxsidebar input {
+    border: 1px solid #ccc;
+    font-family: sans-serif;
+    font-size: 1em;
+}
+
+div.sphinxsidebar input[type=text]{
+    margin-left: 20px;
+}
+ 
+/* -- body styles ----------------------------------------------------------- */
+ 
+a {
+    color: #005B81;
+    text-decoration: none;
+}
+ 
+a:hover {
+    color: #E32E00;
+    text-decoration: underline;
+}
+ 
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+    font-family: Arial, sans-serif;
+    font-weight: bold;
+    color: #264868;
+    margin: 30px 0px 10px 0px;
+    padding: 5px 0 5px 0px;
+}
+ 
+div.body h1 { border-top: 20px solid white; margin-top: 0; font-size: 180%; font-weight: normal; }
+div.body h2 { font-size: 125%; }
+div.body h3 { font-size: 110%; }
+div.body h4 { font-size: 100%; }
+div.body h5 { font-size: 100%; }
+div.body h6 { font-size: 100%; }
+ 
+a.headerlink {
+    color: #c60f0f;
+    font-size: 0.8em;
+    padding: 0 4px 0 4px;
+    text-decoration: none;
+}
+ 
+a.headerlink:hover {
+    background-color: #c60f0f;
+    color: white;
+}
+ 
+div.body p, div.body dd, div.body li {
+    line-height: 1.5em;
+    font-size: 1em;
+}
+ 
+div.admonition p.admonition-title + p {
+    display: inline;
+}
+
+div.highlight{
+    background-color: white;
+}
+
+div.note {
+    background-color: #eee;
+    border: 1px solid #ccc;
+}
+ 
+div.seealso {
+    background-color: #ffc;
+    border: 1px solid #ff6;
+}
+ 
+div.topic {
+    background-color: #eee;
+}
+ 
+div.warning {
+    background-color: #ffe4e4;
+    border: 1px solid #f66;
+}
+ 
+p.admonition-title {
+    display: inline;
+}
+ 
+p.admonition-title:after {
+    content: ":";
+}
+ 
+pre {
+    padding: 10px;
+    background-color: White;
+    color: #222;
+    line-height: 1.2em;
+    border: 1px solid #C6C9CB;
+    font-size: 1.2em;
+    margin: 1.5em 0 1.5em 0;
+    -webkit-box-shadow: 1px 1px 1px #d8d8d8;
+    -moz-box-shadow: 1px 1px 1px #d8d8d8;
+}
+ 
+tt {
+    background-color: #ecf0f3;
+    color: #222;
+    padding: 1px 2px;
+    font-size: 1.2em;
+    font-family: monospace;
+}
+
diff --git a/doc/pacbio-theme/static/pacbioLogo.png b/doc/pacbio-theme/static/pacbioLogo.png
new file mode 100644
index 0000000..b2e4887
Binary files /dev/null and b/doc/pacbio-theme/static/pacbioLogo.png differ
diff --git a/doc/pacbio-theme/static/pygments.css b/doc/pacbio-theme/static/pygments.css
new file mode 100644
index 0000000..4588cde
--- /dev/null
+++ b/doc/pacbio-theme/static/pygments.css
@@ -0,0 +1,55 @@
+.c { color: #999988; font-style: italic } /* Comment */
+.k { font-weight: bold } /* Keyword */
+.o { font-weight: bold } /* Operator */
+.cm { color: #999988; font-style: italic } /* Comment.Multiline */
+.cp { color: #999999; font-weight: bold } /* Comment.preproc */
+.c1 { color: #999988; font-style: italic } /* Comment.Single */
+.gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
+.ge { font-style: italic } /* Generic.Emph */
+.gr { color: #aa0000 } /* Generic.Error */
+.gh { color: #999999 } /* Generic.Heading */
+.gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
+.go { color: #111 } /* Generic.Output */
+.gp { color: #555555 } /* Generic.Prompt */
+.gs { font-weight: bold } /* Generic.Strong */
+.gu { color: #aaaaaa } /* Generic.Subheading */
+.gt { color: #aa0000 } /* Generic.Traceback */
+.kc { font-weight: bold } /* Keyword.Constant */
+.kd { font-weight: bold } /* Keyword.Declaration */
+.kp { font-weight: bold } /* Keyword.Pseudo */
+.kr { font-weight: bold } /* Keyword.Reserved */
+.kt { color: #445588; font-weight: bold } /* Keyword.Type */
+.m { color: #009999 } /* Literal.Number */
+.s { color: #bb8844 } /* Literal.String */
+.na { color: #008080 } /* Name.Attribute */
+.nb { color: #999999 } /* Name.Builtin */
+.nc { color: #445588; font-weight: bold } /* Name.Class */
+.no { color: #ff99ff } /* Name.Constant */
+.ni { color: #800080 } /* Name.Entity */
+.ne { color: #990000; font-weight: bold } /* Name.Exception */
+.nf { color: #990000; font-weight: bold } /* Name.Function */
+.nn { color: #555555 } /* Name.Namespace */
+.nt { color: #000080 } /* Name.Tag */
+.nv { color: purple } /* Name.Variable */
+.ow { font-weight: bold } /* Operator.Word */
+.mf { color: #009999 } /* Literal.Number.Float */
+.mh { color: #009999 } /* Literal.Number.Hex */
+.mi { color: #009999 } /* Literal.Number.Integer */
+.mo { color: #009999 } /* Literal.Number.Oct */
+.sb { color: #bb8844 } /* Literal.String.Backtick */
+.sc { color: #bb8844 } /* Literal.String.Char */
+.sd { color: #bb8844 } /* Literal.String.Doc */
+.s2 { color: #bb8844 } /* Literal.String.Double */
+.se { color: #bb8844 } /* Literal.String.Escape */
+.sh { color: #bb8844 } /* Literal.String.Heredoc */
+.si { color: #bb8844 } /* Literal.String.Interpol */
+.sx { color: #bb8844 } /* Literal.String.Other */
+.sr { color: #808000 } /* Literal.String.Regex */
+.s1 { color: #bb8844 } /* Literal.String.Single */
+.ss { color: #bb8844 } /* Literal.String.Symbol */
+.bp { color: #999999 } /* Name.Builtin.Pseudo */
+.vc { color: #ff99ff } /* Name.Variable.Class */
+.vg { color: #ff99ff } /* Name.Variable.Global */
+.vi { color: #ff99ff } /* Name.Variable.Instance */
+.il { color: #009999 } /* Literal.Number.Integer.Long */
+
diff --git a/doc/pacbio-theme/theme.conf b/doc/pacbio-theme/theme.conf
new file mode 100644
index 0000000..dd24a1a
--- /dev/null
+++ b/doc/pacbio-theme/theme.conf
@@ -0,0 +1,4 @@
+[theme]
+inherit = default 
+stylesheet = pacbio.css
+pygments_style = tango
diff --git a/doc/pbcore.chemistry.rst b/doc/pbcore.chemistry.rst
new file mode 100644
index 0000000..ad7d687
--- /dev/null
+++ b/doc/pbcore.chemistry.rst
@@ -0,0 +1,22 @@
+pbcore.chemistry package
+========================
+
+Submodules
+----------
+
+pbcore.chemistry.chemistry module
+---------------------------------
+
+.. automodule:: pbcore.chemistry.chemistry
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: pbcore.chemistry
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/pbcore.data.rst b/doc/pbcore.data.rst
new file mode 100644
index 0000000..e74af8b
--- /dev/null
+++ b/doc/pbcore.data.rst
@@ -0,0 +1,10 @@
+pbcore.data
+===========
+
+:mod:`pbcore.data`
+------------------
+
+.. automodule:: pbcore.data
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/pbcore.deprecated.rst b/doc/pbcore.deprecated.rst
new file mode 100644
index 0000000..2141176
--- /dev/null
+++ b/doc/pbcore.deprecated.rst
@@ -0,0 +1,30 @@
+pbcore.deprecated package
+=========================
+
+Submodules
+----------
+
+pbcore.deprecated.BasH5IO module
+--------------------------------
+
+.. automodule:: pbcore.deprecated.BasH5IO
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+pbcore.deprecated.ReferenceEntry module
+---------------------------------------
+
+.. automodule:: pbcore.deprecated.ReferenceEntry
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+
+Module contents
+---------------
+
+.. automodule:: pbcore.deprecated
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/pbcore.io.rst b/doc/pbcore.io.rst
new file mode 100644
index 0000000..34a9744
--- /dev/null
+++ b/doc/pbcore.io.rst
@@ -0,0 +1,182 @@
+pbcore.io
+=========
+
+The ``pbcore.io`` package provides a number of lightweight interfaces
+to PacBio data files and other standard bioinformatics file formats.
+Preferred usage is to import classes directly from the ``pbcore.io``
+package, e.g.::
+
+    >>> from pbcore.io import CmpH5Reader
+
+The classes within ``pbcore.io`` adhere to a few conventions, in order
+to provide a uniform API:
+
+  - Each data file type is thought of as a container of a `Record`
+    type; all `Reader` classes support streaming access, and
+    `CmpH5Reader` and `BasH5Reader` additionally provide random-access
+    to alignments/reads.
+
+  - The constructor argument needed to instantiate `Reader` and
+    `Writer` objects can be either a filename (which can be suffixed
+    by ".gz" for all but the h5 file types) or an open file handle.
+    The reader/writer classes will do what you would expect.
+
+
+  - The reader/writer classes all support the context manager idiom.
+    Meaning, if you write::
+
+      >>> with CmpH5Reader("aligned_reads.cmp.h5") as r:
+      ...   print r[0].read()
+
+    the `CmpH5Reader` object will be automatically closed after the
+    block within the "with" statement is executed.
+
+
+BAM/cmp.h5 compatibility: quick start
+-------------------------------------
+
+If you have an application that uses the `CmpH5Reader` and you want to
+start using BAM files, your best bet is to use the following generic
+factory functions:
+
+.. autofunction:: pbcore.io.openIndexedAlignmentFile
+
+.. autofunction:: pbcore.io.openAlignmentFile
+
+.. note::
+
+   Since BAM files contain a subset of the information that was
+   present in cmp.h5 files, you will need to provide these functions
+   an indexed FASTA file for your reference.  For *full*
+   compatibility, you need the `openIndexedAlignmentFile` function,
+   which requires the existence of a `bam.pbi` file (PacBio BAM index
+   companion file).
+
+
+
+
+`bas.h5` / `bax.h5` Formats (PacBio basecalls file)
+---------------------------------------------------
+
+The `bas.h5`/ `bax.h5` file formats are container formats for PacBio
+reads, built on top of the HDF5 standard.  Originally there was just
+one `bas.h5`, but eventually "multistreaming" came along and we had to
+split the file into three `bax.h5` *parts* and one `bas.h5` file
+containing pointers to the *parts*.  Use ``BasH5Reader`` to read any
+kind of `bas.h5` file, and ``BaxH5Reader`` to read a `bax.h5`.
+
+.. note::
+
+    In contrast to GFF, for example, the `bas.h5` read coordinate
+    system is 0-based and start-inclusive/end-exclusive, i.e. the same
+    convention as Python and the C++ STL.
+
+.. autoclass:: pbcore.io.BasH5Reader
+    :members:
+    :undoc-members:
+
+.. autoclass:: pbcore.io.BasH5IO.Zmw
+    :members:
+    :undoc-members:
+
+.. autoclass:: pbcore.io.BasH5IO.ZmwRead
+    :members:
+    :undoc-members:
+
+
+BAM format
+----------
+
+The BAM format is a standard format described aligned and unaligned
+reads.  PacBio is transitioning from the cmp.h5 format to the BAM
+format.  For basic functionality, one should use :class:`BamReader`;
+for full compatibility with the :class:`CmpH5Reader` API (including
+alignment index functionality) one should use
+:class:`IndexedBamReader`, which requires the auxiliary *PacBio BAM
+index file* (``bam.pbi`` file).
+
+.. autoclass:: pbcore.io.BamAlignment
+    :members:
+    :undoc-members:
+
+.. autoclass:: pbcore.io.BamReader
+    :members:
+    :undoc-members:
+
+.. autoclass:: pbcore.io.IndexedBamReader
+    :members:
+    :undoc-members:
+
+
+
+`cmp.h5` format (legacy PacBio alignment file)
+----------------------------------------------
+
+The `cmp.h5` file format is an alignment format built on top of the HDF5
+standard.  It is a simple container format for PacBio alignment records.
+
+.. note::
+
+    In contrast to GFF, for example, all `cmp.h5` coordinate systems
+    (refererence, read) are 0-based and start-inclusive/end-exclusive,
+    i.e. the same convention as Python and the C++ STL.
+
+
+.. autoclass:: pbcore.io.CmpH5Reader
+    :members:
+    :undoc-members:
+
+.. autoclass:: pbcore.io.CmpH5Alignment
+    :members:
+    :undoc-members:
+
+
+FASTA Format
+------------
+
+FASTA is a standard format for sequence data.  We recommmend using the
+`FastaTable` class, which provides random access to indexed FASTA
+files (using the conventional SAMtools "fai" index).
+
+.. autoclass:: pbcore.io.FastaTable
+    :members:
+
+.. autoclass:: pbcore.io.FastaRecord
+    :members:
+
+.. autoclass:: pbcore.io.FastaReader
+    :members:
+
+.. autoclass:: pbcore.io.FastaWriter
+    :members:
+
+
+FASTQ Format
+------------
+
+FASTQ is a standard format for sequence data with associated quality scores.
+
+.. autoclass:: pbcore.io.FastqRecord
+    :members:
+
+.. autoclass:: pbcore.io.FastqReader
+    :members:
+
+.. autoclass:: pbcore.io.FastqWriter
+    :members:
+
+
+
+GFF Format (Version 3)
+----------------------
+
+The GFF format is an open and flexible standard for representing genomic features.
+
+.. autoclass:: pbcore.io.Gff3Record
+    :members:
+
+.. autoclass:: pbcore.io.GffReader
+    :members:
+
+.. autoclass:: pbcore.io.GffWriter
+    :members:
diff --git a/doc/pbcore.model.rst b/doc/pbcore.model.rst
new file mode 100644
index 0000000..957bedb
--- /dev/null
+++ b/doc/pbcore.model.rst
@@ -0,0 +1,2 @@
+pbcore.model
+============
diff --git a/doc/pbcore.rst b/doc/pbcore.rst
new file mode 100644
index 0000000..b390063
--- /dev/null
+++ b/doc/pbcore.rst
@@ -0,0 +1,22 @@
+pbcore package
+==============
+
+Subpackages
+-----------
+
+.. toctree::
+
+    pbcore.chemistry
+    pbcore.data
+    pbcore.deprecated
+    pbcore.io
+    pbcore.model
+    pbcore.util
+
+Module contents
+---------------
+
+.. automodule:: pbcore
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/doc/pbcore.util.rst b/doc/pbcore.util.rst
new file mode 100644
index 0000000..ddf521d
--- /dev/null
+++ b/doc/pbcore.util.rst
@@ -0,0 +1,18 @@
+pbcore.util
+===========
+
+:mod:`Process` Module
+---------------------
+
+.. automodule:: pbcore.util.Process
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+:mod:`ToolRunner` Module
+------------------------
+
+.. automodule:: pbcore.util.ToolRunner
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/pbcore/__init__.py b/pbcore/__init__.py
new file mode 100644
index 0000000..394f477
--- /dev/null
+++ b/pbcore/__init__.py
@@ -0,0 +1,31 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+__VERSION__ = "1.0.0"
diff --git a/pbcore/chemistry/__init__.py b/pbcore/chemistry/__init__.py
new file mode 100644
index 0000000..47d189f
--- /dev/null
+++ b/pbcore/chemistry/__init__.py
@@ -0,0 +1,31 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+from .chemistry import *
diff --git a/pbcore/chemistry/chemistry.py b/pbcore/chemistry/chemistry.py
new file mode 100644
index 0000000..e9c31fd
--- /dev/null
+++ b/pbcore/chemistry/chemistry.py
@@ -0,0 +1,91 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+
+__all__ = ["tripleFromMetadataXML",
+           "decodeTriple",
+           "ChemistryLookupError" ]
+
+import xml.etree.ElementTree as ET, os.path
+from pkg_resources import Requirement, resource_filename
+from collections import OrderedDict
+
+class ChemistryLookupError(Exception): pass
+
+def _loadBarcodeMappingsFromFile(mapFile):
+    try:
+        tree = ET.parse(mapFile)
+        root = tree.getroot()
+        mappingElements = root.findall("Mapping")
+        mappings = OrderedDict()
+        mapKeys = ["BindingKit", "SequencingKit", "SoftwareVersion", "SequencingChemistry"]
+        for mapElement in mappingElements:
+            bindingKit          = mapElement.find("BindingKit").text
+            sequencingKit       = mapElement.find("SequencingKit").text
+            softwareVersion     = mapElement.find("SoftwareVersion").text
+            sequencingChemistry = mapElement.find("SequencingChemistry").text
+            mappings[(bindingKit, sequencingKit, softwareVersion)] = sequencingChemistry
+        return mappings
+    except:
+        raise ChemistryLookupError, "Error loading chemistry mapping xml"
+
+def _loadBarcodeMappings():
+    mappingFname = resource_filename(Requirement.parse('pbcore'),'pbcore/chemistry/resources/mapping.xml')
+    return _loadBarcodeMappingsFromFile(mappingFname)
+
+_BARCODE_MAPPINGS = _loadBarcodeMappings()
+
+def tripleFromMetadataXML(metadataXmlPath):
+    """
+    Scrape the triple from the metadata.xml, or exception if the file
+    or the relevant contents are not found
+    """
+    nsd = {None: "http://pacificbiosciences.com/PAP/Metadata.xsd",
+           "pb": "http://pacificbiosciences.com/PAP/Metadata.xsd"}
+    try:
+        tree = ET.parse(metadataXmlPath)
+        root = tree.getroot()
+        bindingKit = root.find("pb:BindingKit/pb:PartNumber", namespaces=nsd).text
+        sequencingKit = root.find("pb:SequencingKit/pb:PartNumber", namespaces=nsd).text
+        # The instrument version is truncated to the first 2 dot delimited components
+        instrumentControlVersion = root.find("pb:InstCtrlVer", namespaces=nsd).text
+        verComponents = instrumentControlVersion.split(".")[0:2]
+        instrumentControlVersion = ".".join(verComponents)
+        return (bindingKit, sequencingKit, instrumentControlVersion)
+    except Exception as e:
+        raise ChemistryLookupError, \
+            ("Could not find, or extract chemistry information from, %s" % (metadataXmlPath,))
+
+def decodeTriple(bindingKit, sequencingKit, softwareVersion):
+    """
+    Return the name of the chemisty configuration given the
+    configuration triple that was recorded on the instrument.
+    """
+    return _BARCODE_MAPPINGS.get((bindingKit, sequencingKit, softwareVersion), "unknown")
diff --git a/pbcore/chemistry/resources/mapping.xml b/pbcore/chemistry/resources/mapping.xml
new file mode 100644
index 0000000..8eda49c
--- /dev/null
+++ b/pbcore/chemistry/resources/mapping.xml
@@ -0,0 +1,154 @@
+<?xml version="1.0" encoding="utf-8"?>
+<MappingTable>
+  <DefaultSequencingChemistry>XL-C2</DefaultSequencingChemistry>
+  <Mapping>
+    <SequencingChemistry>C2</SequencingChemistry>
+    <BindingKit>001672551</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>1.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>C2</SequencingChemistry>
+    <BindingKit>001672551</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.0</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>C2</SequencingChemistry>
+    <BindingKit>001672551</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.1</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>C2</SequencingChemistry>
+    <BindingKit>001672551</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>XL-C2</SequencingChemistry>
+    <BindingKit>100150800</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>1.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>XL-C2</SequencingChemistry>
+    <BindingKit>100150800</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.0</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>XL-C2</SequencingChemistry>
+    <BindingKit>100150800</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.1</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>XL-C2</SequencingChemistry>
+    <BindingKit>100150800</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>XL-XL</SequencingChemistry>
+    <BindingKit>100150800</BindingKit>
+    <SequencingKit>100180800</SequencingKit>
+    <SoftwareVersion>1.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>XL-XL</SequencingChemistry>
+    <BindingKit>100150800</BindingKit>
+    <SequencingKit>100180800</SequencingKit>
+    <SoftwareVersion>2.0</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>XL-XL</SequencingChemistry>
+    <BindingKit>100150800</BindingKit>
+    <SequencingKit>100180800</SequencingKit>
+    <SoftwareVersion>2.1</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>XL-XL</SequencingChemistry>
+    <BindingKit>100150800</BindingKit>
+    <SequencingKit>100180800</SequencingKit>
+    <SoftwareVersion>2.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P4-C2</SequencingChemistry>
+    <BindingKit>100236500</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>1.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P4-C2</SequencingChemistry>
+    <BindingKit>100236500</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.0</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P4-C2</SequencingChemistry>
+    <BindingKit>100236500</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.1</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P4-C2</SequencingChemistry>
+    <BindingKit>100236500</BindingKit>
+    <SequencingKit>001558034</SequencingKit>
+    <SoftwareVersion>2.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P4-XL</SequencingChemistry>
+    <BindingKit>100236500</BindingKit>
+    <SequencingKit>100180800</SequencingKit>
+    <SoftwareVersion>2.0</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P4-XL</SequencingChemistry>
+    <BindingKit>100236500</BindingKit>
+    <SequencingKit>100180800</SequencingKit>
+    <SoftwareVersion>2.1</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P4-XL</SequencingChemistry>
+    <BindingKit>100236500</BindingKit>
+    <SequencingKit>100180800</SequencingKit>
+    <SoftwareVersion>2.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P5-C3</SequencingChemistry>
+    <BindingKit>100256000</BindingKit>
+    <SequencingKit>100254800</SequencingKit>
+    <SoftwareVersion>2.1</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P5-C3</SequencingChemistry>
+    <BindingKit>100256000</BindingKit>
+    <SequencingKit>100254800</SequencingKit>
+    <SoftwareVersion>2.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P6-C4</SequencingChemistry>
+    <BindingKit>100356300</BindingKit>
+    <SequencingKit>100356200</SequencingKit>
+    <SoftwareVersion>2.1</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P6-C4</SequencingChemistry>
+    <BindingKit>100356300</BindingKit>
+    <SequencingKit>100356200</SequencingKit>
+    <SoftwareVersion>2.3</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P6-C4</SequencingChemistry>
+    <BindingKit>100372700</BindingKit>
+    <SequencingKit>100356200</SequencingKit>
+    <SoftwareVersion>2.1</SoftwareVersion>
+  </Mapping>
+  <Mapping>
+    <SequencingChemistry>P6-C4</SequencingChemistry>
+    <BindingKit>100372700</BindingKit>
+    <SequencingKit>100356200</SequencingKit>
+    <SoftwareVersion>2.3</SoftwareVersion>
+  </Mapping>
+</MappingTable>
diff --git a/pbcore/data/1.4_bas_files.fofn b/pbcore/data/1.4_bas_files.fofn
new file mode 100644
index 0000000..2cb97cd
--- /dev/null
+++ b/pbcore/data/1.4_bas_files.fofn
@@ -0,0 +1,2 @@
+m110818_075520_42141_c100129202555500000315043109121112_s1_p0.bas.h5
+m110818_075520_42141_c100129202555500000315043109121112_s2_p0.bas.h5
diff --git a/pbcore/data/2.0_bax_files.fofn b/pbcore/data/2.0_bax_files.fofn
new file mode 100644
index 0000000..d77163a
--- /dev/null
+++ b/pbcore/data/2.0_bax_files.fofn
@@ -0,0 +1,3 @@
+m130522_092457_42208_c100497142550000001823078008081323_s1_p0.1.bax.h5
+m130522_092457_42208_c100497142550000001823078008081323_s1_p0.2.bax.h5
+m130522_092457_42208_c100497142550000001823078008081323_s1_p0.3.bax.h5
diff --git a/pbcore/data/2.1_bax_files.fofn b/pbcore/data/2.1_bax_files.fofn
new file mode 100644
index 0000000..a0f0097
--- /dev/null
+++ b/pbcore/data/2.1_bax_files.fofn
@@ -0,0 +1,3 @@
+m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.bax.h5
+m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.bax.h5
+m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.bax.h5
diff --git a/pbcore/data/2.1_ccs_files.fofn b/pbcore/data/2.1_ccs_files.fofn
new file mode 100644
index 0000000..55d2143
--- /dev/null
+++ b/pbcore/data/2.1_ccs_files.fofn
@@ -0,0 +1 @@
+m130727_114215_42211_c100569412550000001823090301191423_s1_p0.1.ccs.h5
diff --git a/pbcore/data/2.3_bax_files.fofn b/pbcore/data/2.3_bax_files.fofn
new file mode 100644
index 0000000..4a4fec0
--- /dev/null
+++ b/pbcore/data/2.3_bax_files.fofn
@@ -0,0 +1,3 @@
+m140912_020930_00114_c100702482550000001823141103261590_s1_p0.1.bax.h5
+m140912_020930_00114_c100702482550000001823141103261590_s1_p0.2.bax.h5
+m140912_020930_00114_c100702482550000001823141103261590_s1_p0.3.bax.h5
diff --git a/pbcore/data/Fluidigm_human_amplicons.fasta b/pbcore/data/Fluidigm_human_amplicons.fasta
new file mode 100644
index 0000000..b6d7ac5
--- /dev/null
+++ b/pbcore/data/Fluidigm_human_amplicons.fasta
@@ -0,0 +1,250 @@
+>ref000001|EGFR_Exon_2
+TTTCTTCCAGTTTGCCAAGGCACGAGTAACAAGCTCACGCAGTTGGGCACTTTTGAAGAT
+CATTTTCTCAGCCTCCAGAGGATGTTCAATAACTGTGAGGTGGTCCTTGGGAATTTGGAA
+ATTACCTATGTGCAGAGGAATTATGATCTTTCCTTCTTAAAGGTTGGTGACTTTGATTTT
+CCT
+>ref000002|EGFR_Exon_3
+TTCTTAGACCATCCAGGAGGTGGCTGGTTATGTCCTCATTGCCCTCAACACAGTGGAGCG
+AATTCCTTTGGAAAACCTGCAGATCATCAGAGGAAATATGTACTACGAAAATTCCTATGC
+CTTAGCAGTCTTATCTAACTATGATGCAAATAAAACCGGACTGAAGGAGCTGCCCATGAG
+AAATTTACAGGGTGAGAGGCTGG
+>ref000003|EGFR_Exon_4
+AGCTGGAAAGAGTGCTCACCGCAGTTCCATTCTCCCGCAGAAATCCTGCATGGCGCCGTG
+CGGTTCAGCAACAACCCTGCCCTGTGCAACGTGGAGAGCATCCAGTGGCGGGACATAGTC
+AGCAGTGACTTTCTCAGCAACATGTCGATGGACTTCCAGAACCACCTGGGCAGCTGTAAG
+TGTCGCATACACACTATCTCTGCCTCCAGCTCCTA
+>ref000004|EGFR_Exon_5
+GCGTCATCAGTTTCTCATCATTTCACTGAGATATGCATCTATTACTTTTACATTTCAGGC
+CAAAAGTGTGATCCAAGCTGTCCCAATGGGAGCTGCTGGGGTGCAGGAGAGGAGAACTGC
+CAGAAACGTAAGTCAGTGAACAGCCTCAGACCCATGT
+>ref000005|EGFR_Exon_6
+CCCTGGGAAATGATCCTACCCTCACTCTTCAGCTCACAGGGAACCTTTGCTCTTTTTCAG
+TGACCAAAATCATCTGTGCCCAGCAGTGCTCCGGGCGCTGCCGTGGCAAGTCCCCCAGTG
+ACTGCTGCCACAACCAGTGTGCTGCAGGCTGCACAGGCCCCCGGGAGAGCGACTGCCTGG
+TAAGA
+>ref000006|EGFR_Exon_7
+CCAGCGTGTCCTCTCTCCTCCATAGGTCTGCCGCAAATTCCGAGACGAAGCCACGTGCAA
+GGACACCTGCCCCCCACTCATGCTCTACAACCCCACCACGTACCAGATGGATGTGAACCC
+CGAGGGCAAATACAGCTTTGGTGCCACCTGCGTGAAGAAGTGTCCCCGTGAGTCCTCCTC
+TGTGGGCCCTCTAACTGGTCAGGCATCCTTGTC
+>ref000007|EGFR_Exon_8
+CAAAGGAGGATGGAGCCTTTCCATCACCCCTCAAGAGGACCTGGACCGCCTGTGTGAGGC
+CCGAGCACCTGGTGCCACCGTCATCACCTTCCTTTCATGCTCTCTTCCCCAGGTAATTAT
+GTGGTGACAGATCACGGCTCGTGCGTCCGAGCCTGTGGGGCCGACAGCTATGAGATGGAG
+GAAGACGGCGTCCGCAAGTGTAAGAAGTGCGAAGGGCCTTGCCGCAAAGGTAGGAAGCCC
+GCCGGTGTGCGGACGAGGCTTGTTCTCGGCTGCTGAGGCTGGGCTCTCATGCCACCTCCA
+AAGGAACACATC
+>ref000008|EGFR_Exon_9
+TCCAACAAATGTGAACGGAATACACGTCTCTCTTATCTCTGCAGTGTGTAACGGAATAGG
+TATTGGTGAATTTAAAGACTCACTCTCCATAAATGCTACGAATATTAAACACTTCAAAAA
+CTGCACCTCCATCAGTGGCGATCTCCACATCCTGCCGGTGGCATTTAGGGGGTGAGTCAC
+AGGTTCAGTTGCTTG
+>ref000009|EGFR_Exon_10
+GATCAATAATCACCCTGTTGTTTGTTTCAGTGACTCCTTCACACATACTCCTCCTCTGGA
+TCCACAGGAACTGGATATTCTGAAAACCGTAAAGGAAATCACAGGTTTGAGCTGAATTAT
+CACATGAATATAAATGGGAAATCAGTGTTTTAGAGAGAGAACTTTTCGACATATTTCCTG
+TTCCCTTGGAA
+>ref000010|EGFR_Exon_11
+TCCTACGTGGTGTGTGTCTGAAGTCTTTCATCTGCCTTACAGGGTTTTTGCTGATTCAGG
+CTTGGCCTGAAAACAGGACGGACCTCCATGCCTTTGAGAACCTAGAAATCATACGCGGCA
+GGACCAAGCAACAGTAAGTTGACCACAGCCAAAGC
+>ref000011|EGFR_Exon_12
+CCACATGATTTTTCTTCTCTCCAATGTAGTGGTCAGTTTTCTCTTGCAGTCGTCAGCCTG
+AACATAACATCCTTGGGATTACGCTCCCTCAAGGAGATAAGTGATGGAGATGTGATAATT
+TCAGGAAACAAAAATTTGTGCTATGCAAATACAATAAACTGGAAAAAACTGTTTGGGACC
+TCCGGTCAGAAAACCAAAATTATAAGCAACAGAGGTGAAAACAGCTGCAGTAAGTCACCG
+>ref000012|EGFR_Exon_13
+GCTCTGTCACTGACTGCTGTGACCCACTCTGTCTCCGCAGAGGCCACAGGCCAGGTCTGC
+CATGCCTTGTGCTCCCCCGAGGGCTGCTGGGGCCCGGAGCCCAGGGACTGCGTCTCTTGC
+CGGAATGTCAGCCGAGGCAGGGAATGCGTGGACAAGTGCAACCTTCTGGAGGGGTAGGAG
+GTTATTTCTTTAATCCCCTTGCGTTGATCAAAAATAAGGCTCCAGGTTGTTGTTATAGC
+>ref000013|EGFR_Exon_14
+GCTGACGGGTTTCCTCTTCCTCCTCTCAGTGAGCCAAGGGAGTTTGTGGAGAACTCTGAG
+TGCATACAGTGCCACCCAGAGTGCCTGCCTCAGGCCATGAACATCACCTGCACAGGACGG
+GTAAGAGCCCCTTGCTGCTATCCACGTC
+>ref000014|EGFR_Exon_15
+GCATGAACATTTTTCTCCACCTTGGTGCAGGGACCAGACAACTGTATCCAGTGTGCCCAC
+TACATTGACGGCCCCCACTGCGTCAAGACCTGCCCGGCAGGAGTCATGGGAGAAAACAAC
+ACCCTGGTCTGGAAGTACGCAGACGCCGGCCATGTGTGCCACCTGTGCCATCCAAACTGC
+ACCTACGGGTGAGTGGAAAGTGAAGGAGAACAGAA
+>ref000015|EGFR_Exon_16
+TTTCTCTTTCACTTCCTACAGATGCACTGGGCCAGGTCTTGAAGGCTGTCCAACGAATGG
+GTAAGTGTTCACAGCTCTGTGTCACATGGACCTCGTCAAGAATGACCACACTGCTGTGG
+>ref000016|EGFR_Exon_17
+TGGAATCTGTCAGCAACCTCACCCTTCCTTGTTCCTCCACCTCATTCCAGGCCTAAGATC
+CCGTCCATCGCCACTGGGATGGTGGGGGCCCTCCTCTTGCTGCTGGTGGTGGCCCTGGGG
+ATCGGCCTCTTCATGCGAAGGCGCCACATCGTTCGGAAGCGCACGCTGCGGAGGCTGCTG
+CAGGAGAGGGAGGTGAGTGCCAGTCCTGGG
+>ref000017|EGFR_Exon_18
+GCTGAGGTGACCCTTGTCTCTGTGTTCTTGTCCCCCCCAGCTTGTGGAGCCTCTTACACC
+CAGTGGAGAAGCTCCCAACCAAGCTCTCTTGAGGATCTTGAAGGAAACTGAATTCAAAAA
+GATCAAAGTGCTGGGCTCCGGTGCGTTCGGCACGGTGTATAAGGTAAGGTCCCTGGCACA
+GGCCTCTGGGCTGGGCCGCAGGGCCTCTCATGGTCTGGTGGG
+>ref000018|EGFR_Exon_19
+TCACAATTGCCAGTTAACGTCTTCCTTCTCTCTCTGTCATAGGGACTCTGGATCCCAGAA
+GGTGAGAAAGTTAAAATTCCCGTCGCTATCAAGGAATTAAGAGAAGCAACATCTCCGAAA
+GCCAACAAGGAAATCCTCGATGTGAGTTTCTGCTTTGCTGTGTGG
+>ref000019|EGFR_Exon_20
+CCACACTGACGTGCCTCTCCCTCCCTCCAGGAAGCCTACGTGATGGCCAGCGTGGACAAC
+CCCCACGTGTGCCGCCTGCTGGGCATCTGCCTCACCTCCACCGTGCAGCTCATCACGCAG
+CTCATGCCCTTCGGCTGCCTCCTGGACTATGTCCGGGAACACAAAGACAATATTGGCTCC
+CAGTACCTGCTCAACTGGTGTGTGCAGATCGCAAAGGTAATCAGGGAAGGGAGATACGG
+>ref000020|EGFR_Exon_21
+CCTCACAGCAGGGTCTTCTCTGTTTCAGGGCATGAACTACTTGGAGGACCGTCGCTTGGT
+GCACCGCGACCTGGCAGCCAGGAACGTACTGGTGAAAACACCGCAGCATGTCAAGATCAC
+AGATTTTGGGCTGGCCAAACTGCTGGGTGCGGAAGAGAAAGAATACCATGCAGAAGGAGG
+CAAAGTAAGGAGGTGGCTTTAGGTCAG
+>ref000021|EGFR_Exon_22	MetadataTest
+CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT
+TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA
+TGCTAATGAGTTTGTACTGAGGCCAAGCTGG
+>ref000022|EGFR_Exon_23
+CATGATCCCACTGCCTTCTTTTCTTGCTTCATCCTCTCAGGGGTGACTGTTTGGGAGTTG
+ATGACCTTTGGATCCAAGCCATATGACGGAATCCCTGCCAGCGAGATCTCCTCCATCCTG
+GAGAAAGGAGAACGCCTCCCTCAGCCACCCATATGTACCATCGATGTCTACATGATCATG
+GTCAAGTGTGAGTGACTGGTGGGTCTGTCCACACT
+>ref000023|EGFR_Exon_24
+TTCCAGTGTTCTAATTGCACTGTTTTTTCTCATTCCTTCCCCAGGCTGGATGATAGACGC
+AGATAGTCGCCCAAAGTTCCGTGAGTTGATCATCGAATTCTCCAAAATGGCCCGAGACCC
+CCAGCGCTACCTTGTCATTCAGGTACAAATTGCAGTCTGTGCTTCCATTGGGAAGAGTCC
+CTC
+>ref000024|EGFR_Exon_25
+CTAATAGCCTCAAAATCTCTGCACCAGGGGGATGAAAGAATGCATTTGCCAAGTCCTACA
+GACTCCAACTTCTACCGTGCCCTGATGGATGAAGAAGACATGGACGACGTGGTGGATGCC
+GACGAGTACCTCATCCCACAGCAGGGCTTCTTCAGCAGCCCCTCCACGTCACGGACTCCC
+CTCCTGAGCTCTCTGGTATGAAATCTCTGTCTCTCTCTCTCTCTCAAGCTGTGTCTACTC
+ATTTGAACAAA
+>ref000025|EGFR_Exon_26
+CATTCCATGGGCAACTTCTCTGTTTCTTTTTCAGAGTGCAACCAGCAACAATTCCACCGT
+GGCTTGCATTGATAGAAATGGGGTATGTATGAACACCTTATAAGCCAGAA
+>ref000026|EGFR_Exon_27
+CCTTCCCTCATTTCCTCCTGCAGCTGCAAAGCTGTCCCATCAAGGAAGACAGCTTCTTGC
+AGCGATACAGCTCAGACCCCACAGGCGCCTTGACTGAGGACAGCATAGACGACACCTTCC
+TCCCAGTGCCTGGTGAGTGGCTTGTCTGGA
+>ref000027|EGFR_Exon_28.1
+CCTCTGATTTCTTTCCACTTTCAGAATACATAAACCAGTCCGTTCCCAAAAGGCCCGCTG
+GCTCTGTGCAGAATCCTGTCTATCACAATCAGCCTCTGAACCCCGCGCCCAGCAGAGACC
+CACACTACCAGGACCCCCACAGCACTGCAGTGGGCAACCCCGAGTATCTCAACACTGTCC
+AGCCCACCTGTGTCAACAGCACATTCGACAGCCCTGCCCACTGGGCCCAGAAAGGCAGCC
+ACCAAATTAG
+>ref000028|EGFR_Exon_28.2
+TGTCAACAGCACATTCGACAGCCCTGCCCACTGGGCCCAGAAAGGCAGCCACCAAATTAG
+CCTGGACAACCCTGACTACCAGCAGGACTTCTTTCCCAAGGAAGCCAAGCCAAATGGCAT
+CTTTAAGGGCTCCACAGCTGAAAATGCAGAATACCTAAGGGTCGCGCCACAAAGCAGTGA
+ATTTATTGGAGCATGACCACGGAGGATAGTATGAGCCCTAAAAATCCAGACTCTTTCGAT
+ACCCAGGACC
+>ref000029|MET_Exon_1.1
+CTCTCGCCTTGAACCTGTTTTGGCAGATAAACCTCTCATAATGAAGGCCCCCGCTGTGCT
+TGCACCTGGCATCCTCGTGCTCCTGTTTACCTTGGTGCAGAGGAGCAATGGGGAGTGTAA
+AGAGGCACTAGCAAAGTCCGAGATGAATGTGAATATGAAGTATCAGCTTCCCAACTTCAC
+CGCGGAAACACCCATCCAGAATGTCATTCTACATGAGCATCACATTTTCCTTGGTGCCAC
+TAACTACATTTATGTTTTAAATGAGGAAGACCTTCAGAAGGTTGCTGAGTACAAGACTGG
+GCCTGTGCTG
+>ref000030|MET_Exon_1.2
+TTCCTTGGTGCCACTAACTACATTTATGTTTTAAATGAGGAAGACCTTCAGAAGGTTGCT
+GAGTACAAGACTGGGCCTGTGCTGGAACACCCAGATTGTTTCCCATGTCAGGACTGCAGC
+AGCAAAGCCAATTTATCAGGAGGTGTTTGGAAAGATAACATCAACATGGCTCTAGTTGTC
+GACACCTACTATGATGATCAACTCATTAGCTGTGGCAGCGTCAACAGAGGGACCTGCCAG
+CGACATGTCTTTCCCCACAATCATACTGCTGACATACAGTCGGAGGTTCACTGCATATTC
+TCCC
+>ref000031|MET_Exon_2
+TGGATTCACATTAACTCTATGACCATATTTTATTCCAGACACTTCTGAGAAATTCATCAG
+GCTGTGAAGCGCGCCGTGATGAATATCGAACAGAGTTTACCACAGCTTTGCAGCGCGTTG
+ACTTATTCATGGGTCAATTCAGCGAAGTCCTCTTAACATCTATATCCACCTTCATTAAAG
+GAGACCTCACCATAGCTAATCTTGGGACATCAGAGGGTCGCTTCATGCAGGTAAGTGCTT
+TCTGAGAGTAGCTGTGTCTGTTCTATCTGGTATTGTGCAA
+>ref000032|MET_Exon_3
+TGAGCTTGTTGGAATAAGGATGTTATAACTTTTTTGCTGTTTAGGTTGTGGTTTCTCGAT
+CAGGACCATCAACCCCTCATGTGAATTTTCTCCTGGACTCCCATCCAGTGTCTCCAGAAG
+TGATTGTGGAGCATACATTAAACCAAAATGGCTACACACTGGTTATCACTGGGAAGAAGG
+TAAGCTGTTCCCACAGGGAATTTCCATAGACG
+>ref000033|MET_Exon_4
+GAAGCTCTTTCCACCCCTTCTCTTCACAGATCACGAAGATCCCATTGAATGGCTTGGGCT
+GCAGACATTTCCAGTCCTGCAGTCAATGCCTCTCTGCCCCACCCTTTGTTCAGTGTGGCT
+GGTGCCACGACAAATGTGTGCGATCGGAGGAATGCCTGAGCGGGACATGGACTCAACAGA
+TCTGTCTGCCTGCAATCTACAAGGTAGGAATCTCTAACAGCTGGCA
+>ref000034|MET_Exon_5
+TGTCCTTGTAGGTTTTCCCAAATAGTGCACCCCTTGAAGGAGGGACAAGGCTGACCATAT
+GTGGCTGGGACTTTGGATTTCGGAGGAATAATAAATTTGATTTAAAGAAAACTAGAGTTC
+TCCTTGGAAATGAGAGCTGCACCTTGACTTTAAGTGAGAGCACGATGAATACGTAAGGAT
+CTTAAAATGCTTTGCTGGGG
+>ref000035|MET_Exon_6
+GAAAATTCCTTGGATTTGTCATGTATTAAACTTTGGGTTTTTTTTCCAGATTGAAATGCA
+CAGTTGGTCCTGCCATGAATAAGCATTTCAATATGTCCATAATTATTTCAAATGGCCACG
+GGACAACACAATACAGTACATTCTCCTATGTGGTAAGGAAGATTCTATCCTATCATG
+>ref000036|MET_Exon_7
+GTTTTGTTTTTATCTCCCCTCCAGGATCCTGTAATAACAAGTATTTCGCCGAAATACGGT
+CCTATGGCTGGTGGCACTTTACTTACTTTAACTGGAAATTACCTAAACAGTGGGAATTCT
+AGACACATTTCAATTGGTGGAAAAACATGTACTTTAAAAAGGTGTTGTAAATTTATTTTT
+TGTTGCATCTGTCAATTTGAA
+>ref000037|MET_Exon_8
+GGAACCATTGAGTTATATCCTTTTGATTTGTGGATATAATTCTAAAATATGTGTATCTCT
+AATAGCTAAAATTCACTTCCTTAATTTTTTTTGTTCAGTGTGTCAAACAGTATTCTTGAA
+TGTTATACCCCAGCCCAAACCATTTCAACTGAGTTTGCTGTTAAATTGAAAATTGACTTA
+GCCAACCGAGAGACAAGCATCTTCAGTTACCGTGAAGATCCCATTGTCTATGAAATTCAT
+CCAACCAAATCTTTTATTAGGTAAGTAGAAGCTTCTGATGGGTATAAGAAAACAA
+>ref000038|MET_Exon_9
+TTGGTGGAAAGAACCTCTCAACATTGTCAGTTTTCTATTTTGCTTTGCCAGTGGTGGGAG
+CACAATAACAGGTGTTGGGAAAAACCTGAATTCAGTTAGTGTCCCGAGAATGGTCATAAA
+TGTGCATGAAGCAGGAAGGAACTTTACAGTGGTAAGTCCTTTGAGCAATGGTTCTACTCA
+GAGCTCTGCATCTTTGCCTCTAACCATGTGGCTTTCATGGTACCTG
+>ref000039|MET_Exon_10
+TGTTGCCAAGCTGTATTCTGTTTACAGTGGATAATTGTGTCTTTCTCTAGGCATGTCAAC
+ATCGCTCTAATTCAGAGATAATCTGTTGTACCACTCCTTCCCTGCAACAGCTGAATCTGC
+AACTCCCCCTGAAAACCAAAGCCTTTTTCATGTTAGATGGGATCCTTTCCAAATACTTTG
+ATCTCATTTATGTACATAATCCTGTGTTTAAGCCTTTTGAAAAGCCAGTGATGATCTCAA
+TGGGCAATGAAAATGTACTGGAAATTAAGGTAAGAAATGCTTTAAACACTGTCTTAAATC
+ATCAGCTCAAA
+>ref000040|MET_Exon_12
+GGACCCAAAGTGCTACAACCTGTGTAGTACAAATATCTATCATGGCTAAATGCTGACTTT
+TCTTTATTTGTCATTTTTAGTGGAAGCAAGCAATTTCTTCAACCGTCCTTGGAAAAGTAA
+TAGTTCAACCAGATCAGAATTTCACAGGATTGATTGCTGGTGTTGTCTCAATATCAACAG
+CACTGTTATTACTACTTGGGTTTTTCCTGTGGCTGAAAAAGAGAAAGCAAATTAAAGGTG
+CATTTTTGTTACTGTTCATTTTTAGAAGTTACCTTAAGAACACAGTCATTACAGTTTAAG
+ATTGTCGTCGATTCTTG
+>ref000041|MET_Exon_13
+GCCCATGATAGCCGTCTTTAACAAGCTCTTTCTTTCTCTCTGTTTTAAGATCTGGGCAGT
+GAATTAGTTCGCTACGATGCAAGAGTACACACTCCTCATTTGGATAGGCTTGTAAGTGCC
+CGAAGTGTAAGCCCAACTACAGAAATGGTTTCAAATGAATCTGTAGACTACCGAGCTACT
+TTTCCAGAAGGTATATTTCAGTTTATTGTTCTGAGAAATACCTATACATATACCTCAGTG
+GGTTGTGACATTGTTG
+>ref000042|MET_Exon_14
+CCTTCATCTTACAGATCAGTTTCCTAATTCATCTCAGAACGGTTCATGCCGACAAGTGCA
+GTATCCTCTGACAGACATGTCCCCCATCCTAACTAGTGGGGACTCTGATATATCCAGTCC
+ATTACTGCAAAATACTGTCCACATTGACCTCAGTGCTCTAAATCCAGAGCTGGTCCAGGC
+AGTGCAGCATGTAGTGATTGGGCCCAGTAGCCTGATTGTGCATTTCAATGAAGTCATAGG
+AAGAGGTAAGTATTTCCACTCAGCTTTTTGTTAAATACGATTTTCCAGTAAGC
+>ref000043|MET_Exon_15
+ACGCAGTGCTAACCAAGTTCTTTCTTTTGCACAGGGCATTTTGGTTGTGTATATCATGGG
+ACTTTGTTGGACAATGATGGCAAGAAAATTCACTGTGCTGTGAAATCCTTGAACAGTAAG
+TGGCATTTTATTTAACCATGGAGTATACTTTTGTGGTTTGCAACCTAATAAATAGCTTAT
+AATAAAACGTTGATTTACACTTTCCCCTTGTGGA
+>ref000044|MET_Exon_16
+TGTCTCCACCACTGGATTTCTCAGGAATCACTGACATAGGAGAAGTTTCCCAATTTCTGA
+CCGAGGGAATCATCATGAAAGATTTTAGTCATCCCAATGTCCTCTCGCTCCTGGGAATCT
+GCCTGCGAAGTGAAGGGTCTCCGCTGGTGGTCCTACCATACATGAAACATGGAGATCTTC
+GAAATTTCATTCGAAATGAGACTCATGTAAGTTGACTGCCAAGCTTACTAACTGGCAAAC
+TAGCTGTAAGCC
+>ref000045|MET_Exon_17
+TGCTTTTCTAACTCTCTTTGACTGCAGAATCCAACTGTAAAAGATCTTATTGGCTTTGGT
+CTTCAAGTAGCCAAAGGCATGAAATATCTTGCAAGCAAAAAGTTTGTCCACAGAGACTTG
+GCTGCAAGAAACTGTATGTAAGTATCAGAATCTCTGTGCCACAATCCAAATTAAGTGACA
+AGGAGGA
+>ref000046|MET_Exon_18
+TTCTATTTCAGCCACGGGTAATAATTTTTGTCCTTTCTGTAGGCTGGATGAAAAATTCAC
+AGTCAAGGTTGCTGATTTTGGTCTTGCCAGAGACATGTATGATAAAGAATACTATAGTGT
+ACACAACAAAACAGGTGCAAAGCTGCCAGTGAAGTGGATGGCTTTGGAAAGTCTGCAAAC
+TCAAAAGTTTACCACCAAGTCAGATGTGGTAATGTATTGGTTATCTCTGAGTTTCTCCTC
+T
+>ref000047|MET_Exon_19
+CTCACCTCATCTGTCCTGTTTCTTGTTTTACTAGTGGTCCTTTGGCGTGCTCCTCTGGGA
+GCTGATGACAAGAGGAGCCCCACCTTATCCTGACGTAAACACCTTTGATATAACTGTTTA
+CTTGTTGCAAGGGAGAAGACTCCTACAACCCGAATACTGCCCAGACCCCTTGTAAGTAGT
+CTTTCTGTACCTCTTACGTTCTTTACTTTTACAGAAATGCC
+>ref000048|MET_Exon_20
+CCTGCCTTCAAAGGGTCTCTTACAGCATGTCTTTCTTTTTGGAACAGATATGAAGTAATG
+CTAAAATGCTGGCACCCTAAAGCCGAAATGCGCCCATCCTTTTCTGAACTGGTGTCCCGG
+ATATCAGCGATCTTCTCTACTTTCATTGGGGAGCACTATGTCCATGTGAACGCTACTTAT
+GTGAACGTAAAATGTGTCGCTCCGTATCCTTCTCTGTTGTCATCAGAAGATAACGCTGAT
+GATGAGGTGGACACACGACCAGCCTCCTTCTGGGAGACATCATAGTGCTAGTACTATGTC
+AAAGCAACAGTCCACAC
diff --git a/pbcore/data/Fluidigm_human_amplicons.fasta.fai b/pbcore/data/Fluidigm_human_amplicons.fasta.fai
new file mode 100644
index 0000000..b1fc715
--- /dev/null
+++ b/pbcore/data/Fluidigm_human_amplicons.fasta.fai
@@ -0,0 +1,48 @@
+ref000001|EGFR_Exon_2	183	23	60	61
+ref000002|EGFR_Exon_3	203	233	60	61
+ref000003|EGFR_Exon_4	215	463	60	61
+ref000004|EGFR_Exon_5	157	705	60	61
+ref000005|EGFR_Exon_6	185	888	60	61
+ref000006|EGFR_Exon_7	213	1100	60	61
+ref000007|EGFR_Exon_8	312	1340	60	61
+ref000008|EGFR_Exon_9	195	1681	60	61
+ref000009|EGFR_Exon_10	191	1904	60	61
+ref000010|EGFR_Exon_11	155	2123	60	61
+ref000011|EGFR_Exon_12	240	2305	60	61
+ref000012|EGFR_Exon_13	239	2573	60	61
+ref000013|EGFR_Exon_14	148	2840	60	61
+ref000014|EGFR_Exon_15	215	3015	60	61
+ref000015|EGFR_Exon_16	119	3258	60	61
+ref000016|EGFR_Exon_17	210	3403	60	61
+ref000017|EGFR_Exon_18	222	3641	60	61
+ref000018|EGFR_Exon_19	165	3891	60	61
+ref000019|EGFR_Exon_20	239	4083	60	61
+ref000020|EGFR_Exon_21	207	4350	60	61
+ref000021|EGFR_Exon_22	151	4598	60	61
+ref000022|EGFR_Exon_23	215	4776	60	61
+ref000023|EGFR_Exon_24	183	5019	60	61
+ref000024|EGFR_Exon_25	251	5230	60	61
+ref000025|EGFR_Exon_26	110	5510	60	61
+ref000026|EGFR_Exon_27	150	5646	60	61
+ref000027|EGFR_Exon_28.1	250	5825	60	61
+ref000028|EGFR_Exon_28.2	250	6106	60	61
+ref000029|MET_Exon_1.1	310	6385	60	61
+ref000030|MET_Exon_1.2	304	6725	60	61
+ref000031|MET_Exon_2	280	7057	60	61
+ref000032|MET_Exon_3	212	7364	60	61
+ref000033|MET_Exon_4	226	7602	60	61
+ref000034|MET_Exon_5	200	7854	60	61
+ref000035|MET_Exon_6	177	8080	60	61
+ref000036|MET_Exon_7	201	8282	60	61
+ref000037|MET_Exon_8	295	8509	60	61
+ref000038|MET_Exon_9	226	8831	60	61
+ref000039|MET_Exon_10	311	9084	60	61
+ref000040|MET_Exon_12	317	9424	60	61
+ref000041|MET_Exon_13	256	9770	60	61
+ref000042|MET_Exon_14	293	10054	60	61
+ref000043|MET_Exon_15	214	10375	60	61
+ref000044|MET_Exon_16	252	10616	60	61
+ref000045|MET_Exon_17	187	10896	60	61
+ref000046|MET_Exon_18	241	11110	60	61
+ref000047|MET_Exon_19	221	11379	60	61
+ref000048|MET_Exon_20	317	11627	60	61
diff --git a/pbcore/data/Fluidigm_human_amplicons_tiny.fasta b/pbcore/data/Fluidigm_human_amplicons_tiny.fasta
new file mode 100644
index 0000000..7fa541e
--- /dev/null
+++ b/pbcore/data/Fluidigm_human_amplicons_tiny.fasta
@@ -0,0 +1,19 @@
+>ref000001|EGFR_Exon_2
+TTTCTTCCAGTTTGCCAAGGCACGAGTAACAAGCTCACGCAGTTGGGCACTTTTGAAGAT
+CATTTTCTCAGCCTCCAGAGGATGTTCAATAACTGTGAGGTGGTCCTTGGGAATTTGGAA
+ATTACCTATGTGCAGAGGAATTATGATCTTTCCTTCTTAAAGGTTGGTGACTTTGATTTT
+CCT
+>ref000002|EGFR_Exon_3
+TTCTTAGACCATCCAGGAGGTGGCTGGTTATGTCCTCATTGCCCTCAACACAGTGGAGCG
+AATTCCTTTGGAAAACCTGCAGATCATCAGAGGAAATATGTACTACGAAAATTCCTATGC
+CTTAGCAGTCTTATCTAACTATGATGCAAATAAAACCGGACTGAAGGAGCTGCCCATGAG
+AAATTTACAGGGTGAGAGGCTGG
+>ref000003|EGFR_Exon_4
+AGCTGGAAAGAGTGCTCACCGCAGTTCCATTCTCCCGCAGAAATCCTGCATGGCGCCGTG
+CGGTTCAGCAACAACCCTGCCCTGTGCAACGTGGAGAGCATCCAGTGGCGGGACATAGTC
+AGCAGTGACTTTCTCAGCAACATGTCGATGGACTTCCAGAACCACCTGGGCAGCTGTAAG
+TGTCGCATACACACTATCTCTGCCTCCAGCTCCTA
+>ref000004|EGFR_Exon_5
+GCGTCATCAGTTTCTCATCATTTCACTGAGATATGCATCTATTACTTTTACATTTCAGGC
+CAAAAGTGTGATCCAAGCTGTCCCAATGGGAGCTGCTGGGGTGCAGGAGAGGAGAACTGC
+CAGAAACGTAAGTCAGTGAACAGCCTCAGACCCATGT
\ No newline at end of file
diff --git a/pbcore/data/__init__.py b/pbcore/data/__init__.py
new file mode 100644
index 0000000..b00ce44
--- /dev/null
+++ b/pbcore/data/__init__.py
@@ -0,0 +1,167 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+from pkg_resources import Requirement, resource_filename
+
+DATA_FILES = {'aligned_reads_1.cmp.h5':
+                  ['m110818_075520_42141_c100129202555500000315043109121112_s1_p0.bas.h5',
+                   'm110818_075520_42141_c100129202555500000315043109121112_s2_p0.bas.h5']}
+
+MOVIE_NAME_14 = "m110818_075520_42141_c100129202555500000315043109121112_s1_p0"
+MOVIE_NAME_20 = "m130522_092457_42208_c100497142550000001823078008081323_s1_p0"
+MOVIE_NAME_21 = "m130731_192718_42129_c100564662550000001823085912221321_s1_p0"
+MOVIE_NAME_23 = "m140912_020930_00114_c100702482550000001823141103261590_s1_p0"
+MOVIE_NAME_CCS = "m130727_114215_42211_c100569412550000001823090301191423_s1_p0"
+MOVIE_NAME_BC = "m140307_221913_42203_c100626172550000001823119008061414_s1_p0"
+
+def _getAbsPath(fname):
+    return resource_filename(Requirement.parse('pbcore'),'pbcore/data/%s' % fname)
+
+def getBasH5_v20():
+    return _getAbsPath(MOVIE_NAME_20 + '.bas.h5')
+
+def getBaxH5_v20():
+    return [_getAbsPath('.'.join((MOVIE_NAME_20, str(k), 'bax.h5')))
+            for k in range(1,4)]
+
+def getBasH5_v21():
+    return _getAbsPath(MOVIE_NAME_21 + '.bas.h5')
+
+def getBaxH5_v21():
+    return [_getAbsPath('.'.join((MOVIE_NAME_21, str(k), 'bax.h5')))
+            for k in range(1,4)]
+
+def getBasH5_v23():
+    return _getAbsPath(MOVIE_NAME_23 + '.bas.h5')
+
+def getBaxH5_v23():
+    return [_getAbsPath('.'.join((MOVIE_NAME_23, str(k), 'bax.h5')))
+            for k in range(1,4)]
+
+def getCCSH5():
+    return _getAbsPath(MOVIE_NAME_CCS + '.1.ccs.h5')
+
+def getBcH5s():
+    return [_getAbsPath('.'.join((MOVIE_NAME_BC, str(k), 'bc.h5')))
+            for k in range(1,4)]
+
+def getCmpH5s():
+    '''
+    Returns a list of dictionaries containing 2 keys: cmph5 and
+    bash5s. The latter are the bash5s that were used to generate the
+    cmp.h5 file.
+    '''
+    return [{'cmph5' : _getAbsPath(cmph5),
+             'bash5s': map(_getAbsPath, bash5s)}
+            for cmph5, bash5s in DATA_FILES.items()]
+
+def getCmpH5AndBas():
+    '''
+    The returned value is a dictionary containing 2 keys: cmph5
+    and bash5s. The latter are the bash5s that were used to generate
+    the cmp.h5 file.
+    '''
+    return getCmpH5s()[0]
+
+def getCmpH5():
+    return getCmpH5AndBas()["cmph5"]
+
+def getBasH5s():
+    return getCmpH5AndBas()["bash5s"]
+
+def getGff3():
+    '''
+    Returns the filename of an example GFFv3 file
+    '''
+    return _getAbsPath("variants.gff")
+
+def getFasta():
+    '''
+    Returns the filename of an example FASTA file.
+    '''
+    return _getAbsPath('Fluidigm_human_amplicons.fasta')
+
+
+def getTinyFasta():
+    """
+    Returns the filename of an example FASTA file.
+    """
+    return _getAbsPath('Fluidigm_human_amplicons_tiny.fasta')
+
+def getLambdaFasta():
+    """
+    Returns the filename of the FASTA of the lambda phage reference.
+    """
+    return _getAbsPath('lambdaNEB.fa')
+
+def getDosFormattedFasta():
+    """
+    Returns the filename of an example FASTA file with DOS line endings
+    """
+    return _getAbsPath('barcodes-ed65-450.fasta')
+
+def getBlasrM4():
+    return _getAbsPath('blasr-output.m4')
+
+def getBlasrM5():
+    return _getAbsPath('blasr-output.m5')
+
+def getFofns():
+    """
+    Returns a list of FOFN files
+    """
+    return map(_getAbsPath,
+               ["1.4_bas_files.fofn",
+                "2.0_bax_files.fofn",
+                "2.1_bax_files.fofn",
+                "2.1_ccs_files.fofn"])
+
+def getBcFofn():
+    return _getAbsPath("bc_files.fofn")
+
+
+def getBamAndCmpH5():
+    """
+    Get a "matched" BAM and cmp.h5 file
+    """
+    return (_getAbsPath("bam_mapping.bam"),
+            _getAbsPath("cmph5_mapping.cmp.h5"))
+
+def getBaxForBam():
+    """
+    Get the bax file that was mapped to produce the bam
+    """
+    return _getAbsPath("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5")
+
+def getUnalignedBam():
+    """
+    Get the unaligned BAM file, corresponding to the same bax above
+    """
+    return _getAbsPath("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.subreads.bam")
diff --git a/pbcore/data/aligned_reads_1.bam b/pbcore/data/aligned_reads_1.bam
new file mode 100644
index 0000000..3993fb0
Binary files /dev/null and b/pbcore/data/aligned_reads_1.bam differ
diff --git a/pbcore/data/aligned_reads_1.bam.bai b/pbcore/data/aligned_reads_1.bam.bai
new file mode 100644
index 0000000..81bc1ce
Binary files /dev/null and b/pbcore/data/aligned_reads_1.bam.bai differ
diff --git a/pbcore/data/aligned_reads_1.cmp.h5 b/pbcore/data/aligned_reads_1.cmp.h5
new file mode 100644
index 0000000..f05c634
Binary files /dev/null and b/pbcore/data/aligned_reads_1.cmp.h5 differ
diff --git a/pbcore/data/bam_mapping.bam b/pbcore/data/bam_mapping.bam
new file mode 100644
index 0000000..6fcaa68
Binary files /dev/null and b/pbcore/data/bam_mapping.bam differ
diff --git a/pbcore/data/bam_mapping.bam.bai b/pbcore/data/bam_mapping.bam.bai
new file mode 100644
index 0000000..d64c44d
Binary files /dev/null and b/pbcore/data/bam_mapping.bam.bai differ
diff --git a/pbcore/data/bam_mapping.bam.pbi b/pbcore/data/bam_mapping.bam.pbi
new file mode 100644
index 0000000..b80e837
Binary files /dev/null and b/pbcore/data/bam_mapping.bam.pbi differ
diff --git a/pbcore/data/barcodes-ed65-450.fasta b/pbcore/data/barcodes-ed65-450.fasta
new file mode 100644
index 0000000..a9d2889
--- /dev/null
+++ b/pbcore/data/barcodes-ed65-450.fasta
@@ -0,0 +1,900 @@
+>lbc1
+TCAGACGATGCGTCAT
+>lbc2
+CTATACATGACTCTGC
+>lbc3
+TACTAGAGTAGCACTC
+>lbc4
+TGTGTATCAGTACATG
+>lbc5
+ACACGCATGACACACT
+>lbc6
+GATCTCTACTATATGC
+>lbc7
+ACAGTCTATACTGCTG
+>lbc8
+ATGATGTGCTACATCT
+>lbc9
+CTGCGTGCTCTACGAC
+>lbc10
+GCGCGATACGATGACT
+>lbc11
+CGCGCTCAGCTGATCG
+>lbc12
+GCGCACGCACTACAGA
+>lbc13
+ACACTGACGTCGCGAC
+>lbc14
+CGTCTATATACGTATA
+>lbc15
+ATAGAGACTCAGAGCT
+>lbc16
+TAGATGCGAGAGTAGA
+>lbc17
+CATAGCGACTATCGTG
+>lbc18
+CATCACTACGCTAGAT
+>lbc19
+CGCATCTGTGCATGCA
+>lbc20
+TATGTGATCGTCTCTC
+>lbc21
+GTACACGCTGTGACTA
+>lbc22
+CGTGTCGCGCATATCT
+>lbc23
+ATATCAGTCATGCATA
+>lbc24
+GAGATCGACAGTCTCG
+>lbc25
+CACGCACACACGCGCG
+>lbc26
+CGAGCACGCGCGTGTG
+>lbc27
+GTAGTCTCGCACAGAT
+>lbc28
+GAGACTCTGTGCGCGT
+>lbc29
+GCTCGACTGTGAGAGA
+>lbc30
+AGAGATGTGTGATGAC
+>lbc31
+TACGACTACATATCAG
+>lbc32
+TATCTCTGTAGAGTCT
+>lbc33
+AGAGAGAGACATGCGC
+>lbc34
+ACTCTCGCTCTGTAGA
+>lbc35
+TCTATGTCTCAGTAGT
+>lbc36
+GCGTATATCTCATGCG
+>lbc37
+GTGCGTATGTCGCTAC
+>lbc38
+TGCTCGCAGTATCACA
+>lbc39
+CTGTGTGTGATAGAGT
+>lbc40
+CAGTGAGAGCGCGATA
+>lbc41
+GTACATATGCGTCTGT
+>lbc42
+GAGACTAGAGATAGTG
+>lbc43
+TACGCGTGTACGCAGA
+>lbc44
+TGTCACTCATCTGAGT
+>lbc45
+GCACATACACGCTCAC
+>lbc46
+GCTCGTCGCGCGCACA
+>lbc47
+ACAGTGCGCTGTCTAT
+>lbc48
+TCACACTCTAGAGCGA
+>lbc49
+TCACATATGTATACAT
+>lbc50
+CGCTGCGAGAGACAGT
+>lbc51
+ACACACAGACTGTGAG
+>lbc52
+GCAGACTCTCACACGC
+>lbc53
+TGCTCTCGTGTACTGT
+>lbc54
+GTGTGAGATATATATC
+>lbc55
+CTCAGTGTGACACATG
+>lbc56
+TGCGAGCGACTCTATC
+>lbc57
+GTCAGCTAGTGTCAGC
+>lbc58
+AGATATCATCAGCGAG
+>lbc59
+GTGCAGTGATCGATGA
+>lbc60
+TGACTCGCTCATAGTC
+>lbc61
+ATGCTGATGACGCGCT
+>lbc62
+GACAGCATCTGCGCTC
+>lbc63
+AGCGTCTGACGTGAGT
+>lbc64
+TCGATATACGACGTGC
+>lbc65
+TCGTCATACGCTCTAG
+>lbc66
+CGACTACGTACAGTAG
+>lbc67
+GCGTAGACAGACTACA
+>lbc68
+ACAGTATGATGTACTC
+>lbc69
+GTCTGATAGATACAGA
+>lbc70
+CTGCGCAGTACGTGCA
+>lbc71
+TAGATCTCTGACTCAC
+>lbc72
+CTGATGCGCGCTGTAC
+>lbc73
+CACTCGTGCACGATGC
+>lbc74
+TGACAGTATCACAGTG
+>lbc75
+GAGATACGCTGCAGTC
+>lbc76
+ACGTGAGCTCACTCGC
+>lbc77
+ATAGAGAGTGTCTCAG
+>lbc78
+CATAGAGAGATAGTAT
+>lbc79
+ATCTCGAGATGTAGCG
+>lbc80
+ACGATCACTCGTGTCA
+>lbc81
+GATCGACTCGAGCATC
+>lbc82
+ATGCTCACTACTACAT
+>lbc83
+CGTGCACATCTATAGC
+>lbc84
+GACTGCACATGCACGA
+>lbc85
+TATGACTAGTGTACTA
+>lbc86
+GACGTGTCGTAGATAT
+>lbc87
+ATAGCGACGCGATATA
+>lbc88
+ATCGCTGTGTCTATAG
+>lbc89
+TCTCACTGATAGCGTG
+>lbc90
+TGTCGTCTATCATGTA
+>lbc91
+CACACGAGATCTCATC
+>lbc92
+AGATACACATGATACT
+>lbc93
+CGTGAGTAGTCAGACG
+>lbc94
+TCTCGACTGCACATAT
+>lbc95
+TGAGTGACGTGTAGCG
+>lbc96
+GTGTGCACTCACACTC
+>lbc97
+TACGATCGTAGCTGCT
+>lbc98
+TATACACACTCGCTCG
+>lbc99
+AGCGCTGCGACACGCG
+>lbc100
+GTCGTAGCTGCTGTAT
+>lbc101
+CTGTACTAGAGCGTCT
+>lbc102
+TCGAGTGTATAGCTCA
+>lbc103
+ACTGTGACAGTATGAT
+>lbc104
+TGTCTGAGACGCATAC
+>lbc105
+CACTCACGTGTGATAT
+>lbc106
+ATCGCATCGCAGAGAC
+>lbc107
+TACTCATATATGCTAC
+>lbc108
+GTCTACGCTCGTCGCG
+>lbc109
+TGCGAGACTATCGCGA
+>lbc110
+CAGATCTCTCTGATGT
+>lbc111
+GTAGAGTGATCGCGTC
+>lbc112
+ACGACAGTCAGAGTAT
+>lbc113
+ATATATAGCTGATGCG
+>lbc114
+TGCTATCTGAGATACT
+>lbc115
+CAGCAGATCATGTCGA
+>lbc116
+TGCTGCGAGCGCTCTG
+>lbc117
+ACTATCGCAGCTCAGT
+>lbc118
+CGTCTCTCGTCTGTGC
+>lbc119
+GAGTCTCGATATACTA
+>lbc120
+TGTCATGTGTACACAC
+>lbc121
+TCTGTCGATATACACT
+>lbc122
+ACGTGCTCTATAGAGA
+>lbc123
+TATCAGCACGACATGC
+>lbc124
+GCTCTCACGATATCAG
+>lbc125
+TATATGCTCTGTGTGA
+>lbc126
+GATAGCTGCTAGCTGA
+>lbc127
+TCTCATGTGTGAGCTA
+>lbc128
+TCAGATGTGTCGCGAG
+>lbc129
+CGTAGCTCAGACACTC
+>lbc130
+TCAGAGACACTACGAG
+>lbc131
+ATCGAGCAGCAGTCGT
+>lbc132
+CGTAGCTCGAGATGAG
+>lbc133
+GCTAGTCGATGACAGC
+>lbc134
+CATGATGCGAGACGCT
+>lbc135
+GTGTAGCGTAGACAGT
+>lbc136
+AGCACGTGTGTCGACA
+>lbc137
+CTAGACACGCAGTCAC
+>lbc138
+TAGCGTGAGAGTGTCG
+>lbc139
+GTCTCTCTCTCACGCA
+>lbc140
+TGCATAGTAGTGCTCT
+>lbc141
+CATATCAGTGCTACAG
+>lbc142
+CGACGTCATAGTGCGT
+>lbc143
+ACACACTCTATCAGAT
+>lbc144
+GCTGTGTGTGCTCGTC
+>lbc145
+AGCGTAGCATCTGAGC
+>lbc146
+GAGTCTGCACGCGCTA
+>lbc147
+AGACGCGAGCGCGTAG
+>lbc148
+CTACGATGCTATGTAT
+>lbc149
+CGACTAGATCTATCAT
+>lbc150
+ATCTCTGTGCGCGCAG
+>lbc151
+GCTAGCATGCTCTCAG
+>lbc152
+GTCACGATATAGTGAC
+>lbc153
+TCTACTGCATGATGTC
+>lbc154
+AGTCGTGACTATGCTC
+>lbc155
+GTATAGACAGATGTGC
+>lbc156
+TAGTGTGCGACTCTGA
+>lbc157
+GCACTCAGAGACGCGA
+>lbc158
+TCTATCAGCGCTGATG
+>lbc159
+ATGTCGCATATATCGC
+>lbc160
+CACGACTATATGCTCT
+>lbc161
+AGTCACACGCACGCTG
+>lbc162
+CATACATCGCGCAGTA
+>lbc163
+TGCGAGCGTGCACAGA
+>lbc164
+CTCTGACTCGCGTCGA
+>lbc165
+CTATCTAGCACTCACA
+>lbc166
+ACACGTGATAGCTACG
+>lbc167
+GCGATCACTGTACACT
+>lbc168
+CGCTAGAGATCTGCTA
+>lbc169
+GATACTGACACACTAT
+>lbc170
+GAGCTGATGTACATGT
+>lbc171
+AGTCGCGTAGCTCATC
+>lbc172
+TGTAGAGATACTCACT
+>lbc173
+TCGCTGACTCGACACA
+>lbc174
+TACATCTCGCTGCGCA
+>lbc175
+GTATATATATACGTCT
+>lbc176
+TCGCGAGCAGCGACAT
+>lbc177
+AGCTCAGTATCATCTG
+>lbc178
+ACACAGTAGAGCGAGC
+>lbc179
+ACGACGCGCACTGACA
+>lbc180
+CTCATAGCGTGTACTC
+>lbc181
+GACGACAGACTGCATA
+>lbc182
+GTCTGTATAGCTATCT
+>lbc183
+TGTCTCGTGCTGAGAC
+>lbc184
+CATATGCTCGTGCACT
+>lbc185
+ACTACATACTAGATCA
+>lbc186
+TGTGCACGACAGCAGT
+>lbc187
+ATGATACACGCGCGAC
+>lbc188
+TGTCTGATCTGTATCA
+>lbc189
+CTCTCGCATACGCGAG
+>lbc190
+GAGCGTGTATACAGCG
+>lbc191
+GAGCTCATGTAGACAC
+>lbc192
+TACATATGTCACGCGC
+>lbc193
+ATCGCTCTCATGTCTA
+>lbc194
+ACGATGTATCTACGCA
+>lbc195
+TCGATACGCACTCGAT
+>lbc196
+CACGACACGACGATGT
+>lbc197
+CTGCAGCTCACTACTA
+>lbc198
+CTATATGAGACGAGTG
+>lbc199
+CTCTCGTAGACAGATA
+>lbc200
+CGCATGACACGTGTGT
+>lbc201
+CACATACTACTACTGA
+>lbc202
+AGTCAGATGCGCACTC
+>lbc203
+AGCGACGCGAGAGTGC
+>lbc204
+ATACACTCATGTGCAC
+>lbc205
+GCTACGCTATAGACAT
+>lbc206
+TATCTATCGCATATCG
+>lbc207
+TCACGTGCAGATATAG
+>lbc208
+GCACAGCGTAGCGCAT
+>lbc209
+CATGCTACGTCTCTGT
+>lbc210
+CTCACGTACGTCACAC
+>lbc211
+TCTGAGACACAGACTC
+>lbc212
+CTAGTCTCTATCGCAT
+>lbc213
+ACGCTCGCTGAGCATA
+>lbc214
+ACTCATGTATATGAGT
+>lbc215
+AGCGTAGCGCGCGTCA
+>lbc216
+TCTCGTCGCAGTCTCT
+>lbc217
+GACGAGCGTCTGAGAG
+>lbc218
+GTATGATCACTAGTAG
+>lbc219
+CTCACACATACACGTC
+>lbc220
+GTATCGAGCGTATAGC
+>lbc221
+GCTGCGCTGATATGCG
+>lbc222
+GTCAGAGCTCTCGTGC
+>lbc223
+ATATGACATACACGCA
+>lbc224
+CTCGCTCGACGAGCGC
+>lbc225
+CGTCATCTATATACAG
+>lbc226
+TGTACGCTCTCTATAT
+>lbc227
+AGATCGCGCATGTGTA
+>lbc228
+GACACAGTGTGTAGTC
+>lbc229
+GTGCGCTACAGTCTCT
+>lbc230
+CATCGTCTAGCACTCG
+>lbc231
+CAGCGCATCTCACGTC
+>lbc232
+GTCTCATCATGCTGCG
+>lbc233
+ATCGTATAGTCATACA
+>lbc234
+AGTGCGCACATGTCAG
+>lbc235
+ATCTACGACTAGCAGA
+>lbc236
+TCGCGACATATAGATG
+>lbc237
+AGATATACTGTCTGAT
+>lbc238
+AGTCACTGTCTACTCG
+>lbc239
+TATACGAGATACGTGA
+>lbc240
+ACATGCGTGACAGTCA
+>lbc241
+GTGAGAGTCTGATACT
+>lbc242
+GCACGATGTCAGCGCG
+>lbc243
+CACGTGCTCGAGAGTC
+>lbc244
+GACACTCAGTCTCTCA
+>lbc245
+ACAGTAGACTCTCAGA
+>lbc246
+ACACTAGATCGCGTGT
+>lbc247
+ACGTCAGCACTGCTCT
+>lbc248
+CACAGTCGCAGTACGC
+>lbc249
+GTGACTCTATGCTATA
+>lbc250
+CTCTACATCAGTGCTA
+>lbc251
+GATGAGTATAGACACA
+>lbc252
+ATCTGAGTCTGACACG
+>lbc253
+GCGAGACTCAGCTCTG
+>lbc254
+CGTACGACTGCAGCGT
+>lbc255
+CGTGTCACTCTGCGTG
+>lbc256
+AGCTCTGTCACTAGAC
+>lbc257
+GCGAGAGTGAGACGCA
+>lbc258
+TCTACTACACTGTACT
+>lbc259
+CATCGTCACAGACATA
+>lbc260
+GTGCACTCGCGCTCTC
+>lbc261
+TGACATCTACACATAC
+>lbc262
+GTCGTCTAGATCGACG
+>lbc263
+GACATAGCTAGATCGC
+>lbc264
+TATATATGTCTATAGA
+>lbc265
+CTGTGTATCTGTGTAC
+>lbc266
+CGACGCACGATACTAT
+>lbc267
+TGATATATACGCGCGT
+>lbc268
+CGCGTATGTATGTCGC
+>lbc269
+CTCGAGCAGTAGATAC
+>lbc270
+CTGTGCTATGTACGCG
+>lbc271
+ACTCAGCGCGTACATA
+>lbc272
+TGAGATATGCATGATG
+>lbc273
+ACTCTATGTCGATGTA
+>lbc274
+GCGCGTGCTGCGTCTA
+>lbc275
+GATCATGTGAGCATAG
+>lbc276
+CATGTAGAGCAGAGAG
+>lbc277
+GTGTGTCTCGATGCGC
+>lbc278
+CTCGCACGTCGCATAG
+>lbc279
+CGAGCTACTCTGACAG
+>lbc280
+CGTGAGTATATGTCAT
+>lbc281
+ACAGTACTAGTGCGAG
+>lbc282
+CTCACTACGCGCGCGT
+>lbc283
+GACTCTCTATCGTACT
+>lbc284
+TATATACAGAGTCGAG
+>lbc285
+TGAGTGAGACATATCA
+>lbc286
+GTGACACACAGAGCAC
+>lbc287
+CTGCGTATAGATATGA
+>lbc288
+GAGAGTGTGAGAGTGT
+>lbc289
+CGTCTCTATCTCTCTA
+>lbc290
+TACATGTGTCTATGTC
+>lbc291
+TCTCGCGCGTGCACGC
+>lbc292
+TATGTGTCTGCGCATA
+>lbc293
+AGTCTGAGAGAGCTAT
+>lbc294
+ACAGTCGAGCGCTGCG
+>lbc295
+GAGAGTAGCGTGTACA
+>lbc296
+GATATATCGAGTATAT
+>lbc297
+GCACACATATCTGATG
+>lbc298
+CATCGCGAGTGCGCTC
+>lbc299
+ACATATCGTACTCTCT
+>lbc300
+AGCACAGTCACATGTC
+>lbc301
+GCGCACAGACATCTGT
+>lbc302
+ACGCGCTATCTCAGAG
+>lbc303
+CTGTAGACATCACACG
+>lbc304
+TATCTGAGCGCGAGCA
+>lbc305
+CTCTGCTCTGACTCTC
+>lbc306
+ACGTAGTGCACACAGA
+>lbc307
+TGTATGAGTGTCTGAC
+>lbc308
+CTCTGCAGCGATCACT
+>lbc309
+ACTGCGAGATACACAC
+>lbc310
+TATAGTGCGCAGCGAC
+>lbc311
+GATGTGTGCGCAGTGC
+>lbc312
+AGACACACACGCACAT
+>lbc313
+CACATGTGACTCGACG
+>lbc314
+GATCTGTCGTGAGCGT
+>lbc315
+ATATAGCGCATAGCTC
+>lbc316
+ACTCATCACGTCTCGA
+>lbc317
+CTCTCTAGAGTGACAT
+>lbc318
+TCACACTGTGCGAGAC
+>lbc319
+CGCGCGAGTATCTCGT
+>lbc320
+TATCTCTCGAGTCGCG
+>lbc321
+TAGATGAGTACACGTA
+>lbc322
+CATGTGCGCTCATCAC
+>lbc323
+GTATAGCACTCGAGCG
+>lbc324
+ACTCTGCTGTCATCGC
+>lbc325
+CGCATATCTCACTAGT
+>lbc326
+CACTATACACTGCGCT
+>lbc327
+CGCACAGATACGCTCT
+>lbc328
+CAGATCTCGCGTGACA
+>lbc329
+GCGCTCTCTCACATAC
+>lbc330
+ACACATCTCGTGAGAG
+>lbc331
+AGTAGTGTGATACTAG
+>lbc332
+CGAGCATATATATCTC
+>lbc333
+CTATACGTATATCTAT
+>lbc334
+GTGTATCAGCGAGTAT
+>lbc335
+GCTGAGACGACGCGCG
+>lbc336
+GCGCAGTGTCACATCA
+>lbc337
+TCATACACACAGATAG
+>lbc338
+CACTCGACTCTCGCGT
+>lbc339
+CACATATCAGAGTGCG
+>lbc340
+CGTATACAGTCACGCT
+>lbc341
+TGTAGACTAGCGCTGC
+>lbc342
+AGCACACATATAGCGC
+>lbc343
+GATATCTCGATCTCTG
+>lbc344
+TCTCACGAGAGCGCAC
+>lbc345
+TGTGCTCTCTACACAG
+>lbc346
+TGTCATATGAGAGTGT
+>lbc347
+CTGTGTGCTCGCTATG
+>lbc348
+TATAGAGCTCTACATA
+>lbc349
+CTATACATAGTGATGT
+>lbc350
+TCTCTCTATCGCGCTC
+>lbc351
+ATAGCGACATCTCTCT
+>lbc352
+GCGCGCGCACTCTCTG
+>lbc353
+TCTCTCGATATGATAG
+>lbc354
+GATCACAGAGATGCTC
+>lbc355
+GCTCGCACAGCGCGTC
+>lbc356
+CACAGAGACACGCACA
+>lbc357
+GCGTGTGTCGAGTGTA
+>lbc358
+GTCATCTGTACGCTAT
+>lbc359
+CACACGCACTGAGATA
+>lbc360
+ACACATATCGCACTAC
+>lbc361
+GAGAGCGCTGACTCTG
+>lbc362
+ACACGTGTGCTCTCTC
+>lbc363
+CGAGTGTGTCTATACT
+>lbc364
+GTGATGCATACGTACA
+>lbc365
+CTCGTGACGCTGACTG
+>lbc366
+TCTGTATCTCTATGTG
+>lbc367
+TGTGTCTCTGAGAGTA
+>lbc368
+TAGATCTATCATCGTC
+>lbc369
+ACATATACAGCGTATC
+>lbc370
+CGCTCATATGAGCTCA
+>lbc371
+GTCGCGCATAGAGCGC
+>lbc372
+TACACACTATGTGCGT
+>lbc373
+ATACGCGCGCGCATGC
+>lbc374
+GTGCGCGAGAGTATAC
+>lbc375
+GCGCTAGTGTGTACGA
+>lbc376
+GAGACACGTCGCACAC
+>lbc377
+ACAGAGTGTGCAGATA
+>lbc378
+TAGAGCGTCTCTCGTA
+>lbc379
+TCTATGAGCACTCTCG
+>lbc380
+ATGTGTATATAGATAT
+>lbc381
+CTCACACTCTCTCACA
+>lbc382
+TCAGCGCACTGTGCTG
+>lbc383
+GTGCATACATACATAT
+>lbc384
+CAGAGAGATATCTCTG
+>lbc385
+TCTCAGATAGTCTATA
+>lbc386
+AGTAGACAGAGCGTGA
+>lbc387
+AGTCGAGATATACAGT
+>lbc388
+AGAGAGCTCTCTCATC
+>lbc389
+AGCTACGCGTGCACTG
+>lbc390
+CAGTCTGTGAGTCACT
+>lbc391
+AGAGCAGACGAGACTC
+>lbc392
+GTCTATCTCGCGAGAG
+>lbc393
+GATGTCTGAGTGTGTG
+>lbc394
+ACTCGCGCACGCGCGA
+>lbc395
+ATATGAGTGACTCGTG
+>lbc396
+AGTCTGCGAGACAGAG
+>lbc397
+GTGTGTGTCACACTAT
+>lbc398
+CGTACGTGCGAGTACA
+>lbc399
+TATCGCTAGATGCGCA
+>lbc400
+GTGAGTATGTACTCTG
+>lbc401
+GTCATACGAGTGAGCA
+>lbc402
+ATGAGTCTCACTGTAT
+>lbc403
+TCGATGCGCATACAGC
+>lbc404
+CTATGTGAGTGTGATC
+>lbc405
+GCATACTGTGCGCTCG
+>lbc406
+CGACATAGCGCGACGA
+>lbc407
+ATGCGATACATAGTCT
+>lbc408
+CGTCTAGATAGAGATG
+>lbc409
+TACTCACTGCGCTCAC
+>lbc410
+ACACACACACTCTATA
+>lbc411
+CTCTATATATCTCGTC
+>lbc412
+ACATATGTCTGAGACA
+>lbc413
+ACACACGCGAGACAGA
+>lbc414
+GACACTCGCATGTGCG
+>lbc415
+CAGTATAGAGTCATAG
+>lbc416
+ATCTCTGCTACACTCA
+>lbc417
+CACACGCGCGCTATAT
+>lbc418
+GATACGAGAGCTGATG
+>lbc419
+CGCGACACGCTCGCGC
+>lbc420
+CATATATATCAGCTGT
+>lbc421
+TACACAGCATCTCGCA
+>lbc422
+AGATGTCATGTCTCTA
+>lbc423
+TCACGTGCTCACTGTG
+>lbc424
+GTGACAGACGTCACGC
+>lbc425
+CGTGTCTAGCGCGCGC
+>lbc426
+CGCTCTGTCACGTCTG
+>lbc427
+TGTGTCAGAGACTGTC
+>lbc428
+CTACGAGACAGATCGC
+>lbc429
+GATATACGCGAGAGAG
+>lbc430
+TAGAGAGCGTCGCGTG
+>lbc431
+ATAGTACACTCTGTGT
+>lbc432
+ACGACATCGCTCACAG
+>lbc433
+AGCATACGCACTATAG
+>lbc434
+AGTAGCTCGTCGAGTG
+>lbc435
+GTGCTATAGCACACGC
+>lbc436
+GCGAGCTATACATATA
+>lbc437
+CGTGTCTCTCGATACA
+>lbc438
+GACGCGCTCACAGTGA
+>lbc439
+GAGCACAGAGCGCGCT
+>lbc440
+CATAGATACGCACGCG
+>lbc441
+AGACACGAGTCTAGAT
+>lbc442
+GACTCGCGATACTAGA
+>lbc443
+TAGAGCGTGCATATAT
+>lbc444
+ACGTGTATGACGATAC
+>lbc445
+ATACGCATATCGCAGT
+>lbc446
+GATATATATGTGTGTA
+>lbc447
+GCGATACACAGTCGCA
+>lbc448
+TCACTGTGTGTGTCTG
+>lbc449
+CGCACACATAGATACA
+>lbc450
+CACTACTAGCGTGTGC
diff --git a/pbcore/data/barcodes-ed65-450.fasta.fai b/pbcore/data/barcodes-ed65-450.fasta.fai
new file mode 100644
index 0000000..57aa3fb
--- /dev/null
+++ b/pbcore/data/barcodes-ed65-450.fasta.fai
@@ -0,0 +1,450 @@
+lbc1	16	7	16	18
+lbc2	16	32	16	18
+lbc3	16	57	16	18
+lbc4	16	82	16	18
+lbc5	16	107	16	18
+lbc6	16	132	16	18
+lbc7	16	157	16	18
+lbc8	16	182	16	18
+lbc9	16	207	16	18
+lbc10	16	233	16	18
+lbc11	16	259	16	18
+lbc12	16	285	16	18
+lbc13	16	311	16	18
+lbc14	16	337	16	18
+lbc15	16	363	16	18
+lbc16	16	389	16	18
+lbc17	16	415	16	18
+lbc18	16	441	16	18
+lbc19	16	467	16	18
+lbc20	16	493	16	18
+lbc21	16	519	16	18
+lbc22	16	545	16	18
+lbc23	16	571	16	18
+lbc24	16	597	16	18
+lbc25	16	623	16	18
+lbc26	16	649	16	18
+lbc27	16	675	16	18
+lbc28	16	701	16	18
+lbc29	16	727	16	18
+lbc30	16	753	16	18
+lbc31	16	779	16	18
+lbc32	16	805	16	18
+lbc33	16	831	16	18
+lbc34	16	857	16	18
+lbc35	16	883	16	18
+lbc36	16	909	16	18
+lbc37	16	935	16	18
+lbc38	16	961	16	18
+lbc39	16	987	16	18
+lbc40	16	1013	16	18
+lbc41	16	1039	16	18
+lbc42	16	1065	16	18
+lbc43	16	1091	16	18
+lbc44	16	1117	16	18
+lbc45	16	1143	16	18
+lbc46	16	1169	16	18
+lbc47	16	1195	16	18
+lbc48	16	1221	16	18
+lbc49	16	1247	16	18
+lbc50	16	1273	16	18
+lbc51	16	1299	16	18
+lbc52	16	1325	16	18
+lbc53	16	1351	16	18
+lbc54	16	1377	16	18
+lbc55	16	1403	16	18
+lbc56	16	1429	16	18
+lbc57	16	1455	16	18
+lbc58	16	1481	16	18
+lbc59	16	1507	16	18
+lbc60	16	1533	16	18
+lbc61	16	1559	16	18
+lbc62	16	1585	16	18
+lbc63	16	1611	16	18
+lbc64	16	1637	16	18
+lbc65	16	1663	16	18
+lbc66	16	1689	16	18
+lbc67	16	1715	16	18
+lbc68	16	1741	16	18
+lbc69	16	1767	16	18
+lbc70	16	1793	16	18
+lbc71	16	1819	16	18
+lbc72	16	1845	16	18
+lbc73	16	1871	16	18
+lbc74	16	1897	16	18
+lbc75	16	1923	16	18
+lbc76	16	1949	16	18
+lbc77	16	1975	16	18
+lbc78	16	2001	16	18
+lbc79	16	2027	16	18
+lbc80	16	2053	16	18
+lbc81	16	2079	16	18
+lbc82	16	2105	16	18
+lbc83	16	2131	16	18
+lbc84	16	2157	16	18
+lbc85	16	2183	16	18
+lbc86	16	2209	16	18
+lbc87	16	2235	16	18
+lbc88	16	2261	16	18
+lbc89	16	2287	16	18
+lbc90	16	2313	16	18
+lbc91	16	2339	16	18
+lbc92	16	2365	16	18
+lbc93	16	2391	16	18
+lbc94	16	2417	16	18
+lbc95	16	2443	16	18
+lbc96	16	2469	16	18
+lbc97	16	2495	16	18
+lbc98	16	2521	16	18
+lbc99	16	2547	16	18
+lbc100	16	2574	16	18
+lbc101	16	2601	16	18
+lbc102	16	2628	16	18
+lbc103	16	2655	16	18
+lbc104	16	2682	16	18
+lbc105	16	2709	16	18
+lbc106	16	2736	16	18
+lbc107	16	2763	16	18
+lbc108	16	2790	16	18
+lbc109	16	2817	16	18
+lbc110	16	2844	16	18
+lbc111	16	2871	16	18
+lbc112	16	2898	16	18
+lbc113	16	2925	16	18
+lbc114	16	2952	16	18
+lbc115	16	2979	16	18
+lbc116	16	3006	16	18
+lbc117	16	3033	16	18
+lbc118	16	3060	16	18
+lbc119	16	3087	16	18
+lbc120	16	3114	16	18
+lbc121	16	3141	16	18
+lbc122	16	3168	16	18
+lbc123	16	3195	16	18
+lbc124	16	3222	16	18
+lbc125	16	3249	16	18
+lbc126	16	3276	16	18
+lbc127	16	3303	16	18
+lbc128	16	3330	16	18
+lbc129	16	3357	16	18
+lbc130	16	3384	16	18
+lbc131	16	3411	16	18
+lbc132	16	3438	16	18
+lbc133	16	3465	16	18
+lbc134	16	3492	16	18
+lbc135	16	3519	16	18
+lbc136	16	3546	16	18
+lbc137	16	3573	16	18
+lbc138	16	3600	16	18
+lbc139	16	3627	16	18
+lbc140	16	3654	16	18
+lbc141	16	3681	16	18
+lbc142	16	3708	16	18
+lbc143	16	3735	16	18
+lbc144	16	3762	16	18
+lbc145	16	3789	16	18
+lbc146	16	3816	16	18
+lbc147	16	3843	16	18
+lbc148	16	3870	16	18
+lbc149	16	3897	16	18
+lbc150	16	3924	16	18
+lbc151	16	3951	16	18
+lbc152	16	3978	16	18
+lbc153	16	4005	16	18
+lbc154	16	4032	16	18
+lbc155	16	4059	16	18
+lbc156	16	4086	16	18
+lbc157	16	4113	16	18
+lbc158	16	4140	16	18
+lbc159	16	4167	16	18
+lbc160	16	4194	16	18
+lbc161	16	4221	16	18
+lbc162	16	4248	16	18
+lbc163	16	4275	16	18
+lbc164	16	4302	16	18
+lbc165	16	4329	16	18
+lbc166	16	4356	16	18
+lbc167	16	4383	16	18
+lbc168	16	4410	16	18
+lbc169	16	4437	16	18
+lbc170	16	4464	16	18
+lbc171	16	4491	16	18
+lbc172	16	4518	16	18
+lbc173	16	4545	16	18
+lbc174	16	4572	16	18
+lbc175	16	4599	16	18
+lbc176	16	4626	16	18
+lbc177	16	4653	16	18
+lbc178	16	4680	16	18
+lbc179	16	4707	16	18
+lbc180	16	4734	16	18
+lbc181	16	4761	16	18
+lbc182	16	4788	16	18
+lbc183	16	4815	16	18
+lbc184	16	4842	16	18
+lbc185	16	4869	16	18
+lbc186	16	4896	16	18
+lbc187	16	4923	16	18
+lbc188	16	4950	16	18
+lbc189	16	4977	16	18
+lbc190	16	5004	16	18
+lbc191	16	5031	16	18
+lbc192	16	5058	16	18
+lbc193	16	5085	16	18
+lbc194	16	5112	16	18
+lbc195	16	5139	16	18
+lbc196	16	5166	16	18
+lbc197	16	5193	16	18
+lbc198	16	5220	16	18
+lbc199	16	5247	16	18
+lbc200	16	5274	16	18
+lbc201	16	5301	16	18
+lbc202	16	5328	16	18
+lbc203	16	5355	16	18
+lbc204	16	5382	16	18
+lbc205	16	5409	16	18
+lbc206	16	5436	16	18
+lbc207	16	5463	16	18
+lbc208	16	5490	16	18
+lbc209	16	5517	16	18
+lbc210	16	5544	16	18
+lbc211	16	5571	16	18
+lbc212	16	5598	16	18
+lbc213	16	5625	16	18
+lbc214	16	5652	16	18
+lbc215	16	5679	16	18
+lbc216	16	5706	16	18
+lbc217	16	5733	16	18
+lbc218	16	5760	16	18
+lbc219	16	5787	16	18
+lbc220	16	5814	16	18
+lbc221	16	5841	16	18
+lbc222	16	5868	16	18
+lbc223	16	5895	16	18
+lbc224	16	5922	16	18
+lbc225	16	5949	16	18
+lbc226	16	5976	16	18
+lbc227	16	6003	16	18
+lbc228	16	6030	16	18
+lbc229	16	6057	16	18
+lbc230	16	6084	16	18
+lbc231	16	6111	16	18
+lbc232	16	6138	16	18
+lbc233	16	6165	16	18
+lbc234	16	6192	16	18
+lbc235	16	6219	16	18
+lbc236	16	6246	16	18
+lbc237	16	6273	16	18
+lbc238	16	6300	16	18
+lbc239	16	6327	16	18
+lbc240	16	6354	16	18
+lbc241	16	6381	16	18
+lbc242	16	6408	16	18
+lbc243	16	6435	16	18
+lbc244	16	6462	16	18
+lbc245	16	6489	16	18
+lbc246	16	6516	16	18
+lbc247	16	6543	16	18
+lbc248	16	6570	16	18
+lbc249	16	6597	16	18
+lbc250	16	6624	16	18
+lbc251	16	6651	16	18
+lbc252	16	6678	16	18
+lbc253	16	6705	16	18
+lbc254	16	6732	16	18
+lbc255	16	6759	16	18
+lbc256	16	6786	16	18
+lbc257	16	6813	16	18
+lbc258	16	6840	16	18
+lbc259	16	6867	16	18
+lbc260	16	6894	16	18
+lbc261	16	6921	16	18
+lbc262	16	6948	16	18
+lbc263	16	6975	16	18
+lbc264	16	7002	16	18
+lbc265	16	7029	16	18
+lbc266	16	7056	16	18
+lbc267	16	7083	16	18
+lbc268	16	7110	16	18
+lbc269	16	7137	16	18
+lbc270	16	7164	16	18
+lbc271	16	7191	16	18
+lbc272	16	7218	16	18
+lbc273	16	7245	16	18
+lbc274	16	7272	16	18
+lbc275	16	7299	16	18
+lbc276	16	7326	16	18
+lbc277	16	7353	16	18
+lbc278	16	7380	16	18
+lbc279	16	7407	16	18
+lbc280	16	7434	16	18
+lbc281	16	7461	16	18
+lbc282	16	7488	16	18
+lbc283	16	7515	16	18
+lbc284	16	7542	16	18
+lbc285	16	7569	16	18
+lbc286	16	7596	16	18
+lbc287	16	7623	16	18
+lbc288	16	7650	16	18
+lbc289	16	7677	16	18
+lbc290	16	7704	16	18
+lbc291	16	7731	16	18
+lbc292	16	7758	16	18
+lbc293	16	7785	16	18
+lbc294	16	7812	16	18
+lbc295	16	7839	16	18
+lbc296	16	7866	16	18
+lbc297	16	7893	16	18
+lbc298	16	7920	16	18
+lbc299	16	7947	16	18
+lbc300	16	7974	16	18
+lbc301	16	8001	16	18
+lbc302	16	8028	16	18
+lbc303	16	8055	16	18
+lbc304	16	8082	16	18
+lbc305	16	8109	16	18
+lbc306	16	8136	16	18
+lbc307	16	8163	16	18
+lbc308	16	8190	16	18
+lbc309	16	8217	16	18
+lbc310	16	8244	16	18
+lbc311	16	8271	16	18
+lbc312	16	8298	16	18
+lbc313	16	8325	16	18
+lbc314	16	8352	16	18
+lbc315	16	8379	16	18
+lbc316	16	8406	16	18
+lbc317	16	8433	16	18
+lbc318	16	8460	16	18
+lbc319	16	8487	16	18
+lbc320	16	8514	16	18
+lbc321	16	8541	16	18
+lbc322	16	8568	16	18
+lbc323	16	8595	16	18
+lbc324	16	8622	16	18
+lbc325	16	8649	16	18
+lbc326	16	8676	16	18
+lbc327	16	8703	16	18
+lbc328	16	8730	16	18
+lbc329	16	8757	16	18
+lbc330	16	8784	16	18
+lbc331	16	8811	16	18
+lbc332	16	8838	16	18
+lbc333	16	8865	16	18
+lbc334	16	8892	16	18
+lbc335	16	8919	16	18
+lbc336	16	8946	16	18
+lbc337	16	8973	16	18
+lbc338	16	9000	16	18
+lbc339	16	9027	16	18
+lbc340	16	9054	16	18
+lbc341	16	9081	16	18
+lbc342	16	9108	16	18
+lbc343	16	9135	16	18
+lbc344	16	9162	16	18
+lbc345	16	9189	16	18
+lbc346	16	9216	16	18
+lbc347	16	9243	16	18
+lbc348	16	9270	16	18
+lbc349	16	9297	16	18
+lbc350	16	9324	16	18
+lbc351	16	9351	16	18
+lbc352	16	9378	16	18
+lbc353	16	9405	16	18
+lbc354	16	9432	16	18
+lbc355	16	9459	16	18
+lbc356	16	9486	16	18
+lbc357	16	9513	16	18
+lbc358	16	9540	16	18
+lbc359	16	9567	16	18
+lbc360	16	9594	16	18
+lbc361	16	9621	16	18
+lbc362	16	9648	16	18
+lbc363	16	9675	16	18
+lbc364	16	9702	16	18
+lbc365	16	9729	16	18
+lbc366	16	9756	16	18
+lbc367	16	9783	16	18
+lbc368	16	9810	16	18
+lbc369	16	9837	16	18
+lbc370	16	9864	16	18
+lbc371	16	9891	16	18
+lbc372	16	9918	16	18
+lbc373	16	9945	16	18
+lbc374	16	9972	16	18
+lbc375	16	9999	16	18
+lbc376	16	10026	16	18
+lbc377	16	10053	16	18
+lbc378	16	10080	16	18
+lbc379	16	10107	16	18
+lbc380	16	10134	16	18
+lbc381	16	10161	16	18
+lbc382	16	10188	16	18
+lbc383	16	10215	16	18
+lbc384	16	10242	16	18
+lbc385	16	10269	16	18
+lbc386	16	10296	16	18
+lbc387	16	10323	16	18
+lbc388	16	10350	16	18
+lbc389	16	10377	16	18
+lbc390	16	10404	16	18
+lbc391	16	10431	16	18
+lbc392	16	10458	16	18
+lbc393	16	10485	16	18
+lbc394	16	10512	16	18
+lbc395	16	10539	16	18
+lbc396	16	10566	16	18
+lbc397	16	10593	16	18
+lbc398	16	10620	16	18
+lbc399	16	10647	16	18
+lbc400	16	10674	16	18
+lbc401	16	10701	16	18
+lbc402	16	10728	16	18
+lbc403	16	10755	16	18
+lbc404	16	10782	16	18
+lbc405	16	10809	16	18
+lbc406	16	10836	16	18
+lbc407	16	10863	16	18
+lbc408	16	10890	16	18
+lbc409	16	10917	16	18
+lbc410	16	10944	16	18
+lbc411	16	10971	16	18
+lbc412	16	10998	16	18
+lbc413	16	11025	16	18
+lbc414	16	11052	16	18
+lbc415	16	11079	16	18
+lbc416	16	11106	16	18
+lbc417	16	11133	16	18
+lbc418	16	11160	16	18
+lbc419	16	11187	16	18
+lbc420	16	11214	16	18
+lbc421	16	11241	16	18
+lbc422	16	11268	16	18
+lbc423	16	11295	16	18
+lbc424	16	11322	16	18
+lbc425	16	11349	16	18
+lbc426	16	11376	16	18
+lbc427	16	11403	16	18
+lbc428	16	11430	16	18
+lbc429	16	11457	16	18
+lbc430	16	11484	16	18
+lbc431	16	11511	16	18
+lbc432	16	11538	16	18
+lbc433	16	11565	16	18
+lbc434	16	11592	16	18
+lbc435	16	11619	16	18
+lbc436	16	11646	16	18
+lbc437	16	11673	16	18
+lbc438	16	11700	16	18
+lbc439	16	11727	16	18
+lbc440	16	11754	16	18
+lbc441	16	11781	16	18
+lbc442	16	11808	16	18
+lbc443	16	11835	16	18
+lbc444	16	11862	16	18
+lbc445	16	11889	16	18
+lbc446	16	11916	16	18
+lbc447	16	11943	16	18
+lbc448	16	11970	16	18
+lbc449	16	11997	16	18
+lbc450	16	12024	16	18
diff --git a/pbcore/data/bc_files.fofn b/pbcore/data/bc_files.fofn
new file mode 100644
index 0000000..758a46d
--- /dev/null
+++ b/pbcore/data/bc_files.fofn
@@ -0,0 +1,3 @@
+m140307_221913_42203_c100626172550000001823119008061414_s1_p0.1.bc.h5
+m140307_221913_42203_c100626172550000001823119008061414_s1_p0.2.bc.h5
+m140307_221913_42203_c100626172550000001823119008061414_s1_p0.3.bc.h5
diff --git a/pbcore/data/blasr-output.m4 b/pbcore/data/blasr-output.m4
new file mode 100644
index 0000000..230bc9e
--- /dev/null
+++ b/pbcore/data/blasr-output.m4
@@ -0,0 +1,2 @@
+read1/0_60 lambda_NEB3011 -285 96.7213 0 0 60 60 0 100 160 48502 254
+read2/0_63 lambda_NEB3011 -274 93.6508 0 0 63 63 0 200 260 48502 254
diff --git a/pbcore/data/blasr-output.m5 b/pbcore/data/blasr-output.m5
new file mode 100644
index 0000000..700ae2a
--- /dev/null
+++ b/pbcore/data/blasr-output.m5
@@ -0,0 +1,2 @@
+read1/0_60 60 0 60 +  lambda_NEB3011 48502 100 160 + -285 59 0 1 1 254 CTCTGAAAAGAAAGG-AACGACAGGTGCTGAAAGCGTAGCTTTTTGGCCTCTGTCGTTTCC |||||||||||||||*||||||||||||||||||||*|||||||||||||||||||||||| CTCTGAAAAGAAAGGAAACGACAGGTGCTGAAAGCG-AGCTTTTTGGCCTCTGTCGTTTCC
+read2/0_63 63 0 63 +  lambda_NEB3011 48502 200 260 + -274 59 1 3 0 254 CAAAAAACAGCTGGCTGACATTTTCGGTGCGAGTATCCGTACCATTTTCCAGAACTGGCAGGA ||||||*|||||||||||||||||||||||||||||||||||||**||*|||||||||||||| CAAAAAGCAGCTGGCTGACATTTTCGGTGCGAGTATCCGTACCA--TT-CAGAACTGGCAGGA
diff --git a/pbcore/data/cmph5_mapping.cmp.h5 b/pbcore/data/cmph5_mapping.cmp.h5
new file mode 100644
index 0000000..ae4b5e7
Binary files /dev/null and b/pbcore/data/cmph5_mapping.cmp.h5 differ
diff --git a/pbcore/data/lambdaNEB.fa b/pbcore/data/lambdaNEB.fa
new file mode 100644
index 0000000..33011e5
--- /dev/null
+++ b/pbcore/data/lambdaNEB.fa
@@ -0,0 +1,608 @@
+>lambda_NEB3011
+GGGCGGCGACCTCGCGGGTTTTCGCTATTTATGAAAATTTTCCGGTTTAAGGCGTTTCCGTTCTTCTTCGTCATAACTTA
+ATGTTTTTATTTAAAATACCCTCTGAAAAGAAAGGAAACGACAGGTGCTGAAAGCGAGCTTTTTGGCCTCTGTCGTTTCC
+TTTCTCTGTTTTTGTCCGTGGAATGAACAATGGAAGTCAACAAAAAGCAGCTGGCTGACATTTTCGGTGCGAGTATCCGT
+ACCATTCAGAACTGGCAGGAACAGGGAATGCCCGTTCTGCGAGGCGGTGGCAAGGGTAATGAGGTGCTTTATGACTCTGC
+CGCCGTCATAAAATGGTATGCCGAAAGGGATGCTGAAATTGAGAACGAAAAGCTGCGCCGGGAGGTTGAAGAACTGCGGC
+AGGCCAGCGAGGCAGATCTCCAGCCAGGAACTATTGAGTACGAACGCCATCGACTTACGCGTGCGCAGGCCGACGCACAG
+GAACTGAAGAATGCCAGAGACTCCGCTGAAGTGGTGGAAACCGCATTCTGTACTTTCGTGCTGTCGCGGATCGCAGGTGA
+AATTGCCAGTATTCTCGACGGGCTCCCCCTGTCGGTGCAGCGGCGTTTTCCGGAACTGGAAAACCGACATGTTGATTTCC
+TGAAACGGGATATCATCAAAGCCATGAACAAAGCAGCCGCGCTGGATGAACTGATACCGGGGTTGCTGAGTGAATATATC
+GAACAGTCAGGTTAACAGGCTGCGGCATTTTGTCCGCGCCGGGCTTCGCTCACTGTTCAGGCCGGAGCCACAGACCGCCG
+TTGAATGGGCGGATGCTAATTACTATCTCCCGAAAGAATCCGCATACCAGGAAGGGCGCTGGGAAACACTGCCCTTTCAG
+CGGGCCATCATGAATGCGATGGGCAGCGACTACATCCGTGAGGTGAATGTGGTGAAGTCTGCCCGTGTCGGTTATTCCAA
+AATGCTGCTGGGTGTTTATGCCTACTTTATAGAGCATAAGCAGCGCAACACCCTTATCTGGTTGCCGACGGATGGTGATG
+CCGAGAACTTTATGAAAACCCACGTTGAGCCGACTATTCGTGATATTCCGTCGCTGCTGGCGCTGGCCCCGTGGTATGGC
+AAAAAGCACCGGGATAACACGCTCACCATGAAGCGTTTCACTAATGGGCGTGGCTTCTGGTGCCTGGGCGGTAAAGCGGC
+AAAAAACTACCGTGAAAAGTCGGTGGATGTGGCGGGTTATGATGAACTTGCTGCTTTTGATGATGATATTGAACAGGAAG
+GCTCTCCGACGTTCCTGGGTGACAAGCGTATTGAAGGCTCGGTCTGGCCAAAGTCCATCCGTGGCTCCACGCCAAAAGTG
+AGAGGCACCTGTCAGATTGAGCGTGCAGCCAGTGAATCCCCGCATTTTATGCGTTTTCATGTTGCCTGCCCGCATTGCGG
+GGAGGAGCAGTATCTTAAATTTGGCGACAAAGAGACGCCGTTTGGCCTCAAATGGACGCCGGATGACCCCTCCAGCGTGT
+TTTATCTCTGCGAGCATAATGCCTGCGTCATCCGCCAGCAGGAGCTGGACTTTACTGATGCCCGTTATATCTGCGAAAAG
+ACCGGGATCTGGACCCGTGATGGCATTCTCTGGTTTTCGTCATCCGGTGAAGAGATTGAGCCACCTGACAGTGTGACCTT
+TCACATCTGGACAGCGTACAGCCCGTTCACCACCTGGGTGCAGATTGTCAAAGACTGGATGAAAACGAAAGGGGATACGG
+GAAAACGTAAAACCTTCGTAAACACCACGCTCGGTGAGACGTGGGAGGCGAAAATTGGCGAACGTCCGGATGCTGAAGTG
+ATGGCAGAGCGGAAAGAGCATTATTCAGCGCCCGTTCCTGACCGTGTGGCTTACCTGACCGCCGGTATCGACTCCCAGCT
+GGACCGCTACGAAATGCGCGTATGGGGATGGGGGCCGGGTGAGGAAAGCTGGCTGATTGACCGGCAGATTATTATGGGCC
+GCCACGACGATGAACAGACGCTGCTGCGTGTGGATGAGGCCATCAATAAAACCTATACCCGCCGGAATGGTGCAGAAATG
+TCGATATCCCGTATCTGCTGGGATACTGGCGGGATTGACCCGACCATTGTGTATGAACGCTCGAAAAAACATGGGCTGTT
+CCGGGTGATCCCCATTAAAGGGGCATCCGTCTACGGAAAGCCGGTGGCCAGCATGCCACGTAAGCGAAACAAAAACGGGG
+TTTACCTTACCGAAATCGGTACGGATACCGCGAAAGAGCAGATTTATAACCGCTTCACACTGACGCCGGAAGGGGATGAA
+CCGCTTCCCGGTGCCGTTCACTTCCCGAATAACCCGGATATTTTTGATCTGACCGAAGCGCAGCAGCTGACTGCTGAAGA
+GCAGGTCGAAAAATGGGTGGATGGCAGGAAAAAAATACTGTGGGACAGCAAAAAGCGACGCAATGAGGCACTCGACTGCT
+TCGTTTATGCGCTGGCGGCGCTGCGCATCAGTATTTCCCGCTGGCAGCTGGATCTCAGTGCGCTGCTGGCGAGCCTGCAG
+GAAGAGGATGGTGCAGCAACCAACAAGAAAACACTGGCAGATTACGCCCGTGCCTTATCCGGAGAGGATGAATGACGCGA
+CAGGAAGAACTTGCCGCTGCCCGTGCGGCACTGCATGACCTGATGACAGGTAAACGGGTGGCAACAGTACAGAAAGACGG
+ACGAAGGGTGGAGTTTACGGCCACTTCCGTGTCTGACCTGAAAAAATATATTGCAGAGCTGGAAGTGCAGACCGGCATGA
+CACAGCGACGCAGGGGACCTGCAGGATTTTATGTATGAAAACGCCCACCATTCCCACCCTTCTGGGGCCGGACGGCATGA
+CATCGCTGCGCGAATATGCCGGTTATCACGGCGGTGGCAGCGGATTTGGAGGGCAGTTGCGGTCGTGGAACCCACCGAGT
+GAAAGTGTGGATGCAGCCCTGTTGCCCAACTTTACCCGTGGCAATGCCCGCGCAGACGATCTGGTACGCAATAACGGCTA
+TGCCGCCAACGCCATCCAGCTGCATCAGGATCATATCGTCGGGTCTTTTTTCCGGCTCAGTCATCGCCCAAGCTGGCGCT
+ATCTGGGCATCGGGGAGGAAGAAGCCCGTGCCTTTTCCCGCGAGGTTGAAGCGGCATGGAAAGAGTTTGCCGAGGATGAC
+TGCTGCTGCATTGACGTTGAGCGAAAACGCACGTTTACCATGATGATTCGGGAAGGTGTGGCCATGCACGCCTTTAACGG
+TGAACTGTTCGTTCAGGCCACCTGGGATACCAGTTCGTCGCGGCTTTTCCGGACACAGTTCCGGATGGTCAGCCCGAAGC
+GCATCAGCAACCCGAACAATACCGGCGACAGCCGGAACTGCCGTGCCGGTGTGCAGATTAATGACAGCGGTGCGGCGCTG
+GGATATTACGTCAGCGAGGACGGGTATCCTGGCTGGATGCCGCAGAAATGGACATGGATACCCCGTGAGTTACCCGGCGG
+GCGCGCCTCGTTCATTCACGTTTTTGAACCCGTGGAGGACGGGCAGACTCGCGGTGCAAATGTGTTTTACAGCGTGATGG
+AGCAGATGAAGATGCTCGACACGCTGCAGAACACGCAGCTGCAGAGCGCCATTGTGAAGGCGATGTATGCCGCCACCATT
+GAGAGTGAGCTGGATACGCAGTCAGCGATGGATTTTATTCTGGGCGCGAACAGTCAGGAGCAGCGGGAAAGGCTGACCGG
+CTGGATTGGTGAAATTGCCGCGTATTACGCCGCAGCGCCGGTCCGGCTGGGAGGCGCAAAAGTACCGCACCTGATGCCGG
+GTGACTCACTGAACCTGCAGACGGCTCAGGATACGGATAACGGCTACTCCGTGTTTGAGCAGTCACTGCTGCGGTATATC
+GCTGCCGGGCTGGGTGTCTCGTATGAGCAGCTTTCCCGGAATTACGCCCAGATGAGCTACTCCACGGCACGGGCCAGTGC
+GAACGAGTCGTGGGCGTACTTTATGGGGCGGCGAAAATTCGTCGCATCCCGTCAGGCGAGCCAGATGTTTCTGTGCTGGC
+TGGAAGAGGCCATCGTTCGCCGCGTGGTGACGTTACCTTCAAAAGCGCGCTTCAGTTTTCAGGAAGCCCGCAGTGCCTGG
+GGGAACTGCGACTGGATAGGCTCCGGTCGTATGGCCATCGATGGTCTGAAAGAAGTTCAGGAAGCGGTGATGCTGATAGA
+AGCCGGACTGAGTACCTACGAGAAAGAGTGCGCAAAACGCGGTGACGACTATCAGGAAATTTTTGCCCAGCAGGTCCGTG
+AAACGATGGAGCGCCGTGCAGCCGGTCTTAAACCGCCCGCCTGGGCGGCTGCAGCATTTGAATCCGGGCTGCGACAATCA
+ACAGAGGAGGAGAAGAGTGACAGCAGAGCTGCGTAATCTCCCGCATATTGCCAGCATGGCCTTTAATGAGCCGCTGATGC
+TTGAACCCGCCTATGCGCGGGTTTTCTTTTGTGCGCTTGCAGGCCAGCTTGGGATCAGCAGCCTGACGGATGCGGTGTCC
+GGCGACAGCCTGACTGCCCAGGAGGCACTCGCGACGCTGGCATTATCCGGTGATGATGACGGACCACGACAGGCCCGCAG
+TTATCAGGTCATGAACGGCATCGCCGTGCTGCCGGTGTCCGGCACGCTGGTCAGCCGGACGCGGGCGCTGCAGCCGTACT
+CGGGGATGACCGGTTACAACGGCATTATCGCCCGTCTGCAACAGGCTGCCAGCGATCCGATGGTGGACGGCATTCTGCTC
+GATATGGACACGCCCGGCGGGATGGTGGCGGGGGCATTTGACTGCGCTGACATCATCGCCCGTGTGCGTGACATAAAACC
+GGTATGGGCGCTTGCCAACGACATGAACTGCAGTGCAGGTCAGTTGCTTGCCAGTGCCGCCTCCCGGCGTCTGGTCACGC
+AGACCGCCCGGACAGGCTCCATCGGCGTCATGATGGCTCACAGTAATTACGGTGCTGCGCTGGAGAAACAGGGTGTGGAA
+ATCACGCTGATTTACAGCGGCAGCCATAAGGTGGATGGCAACCCCTACAGCCATCTTCCGGATGACGTCCGGGAGACACT
+GCAGTCCCGGATGGACGCAACCCGCCAGATGTTTGCGCAGAAGGTGTCGGCATATACCGGCCTGTCCGTGCAGGTTGTGC
+TGGATACCGAGGCTGCAGTGTACAGCGGTCAGGAGGCCATTGATGCCGGACTGGCTGATGAACTTGTTAACAGCACCGAT
+GCGATCACCGTCATGCGTGATGCACTGGATGCACGTAAATCCCGTCTCTCAGGAGGGCGAATGACCAAAGAGACTCAATC
+AACAACTGTTTCAGCCACTGCTTCGCAGGCTGACGTTACTGACGTGGTGCCAGCGACGGAGGGCGAGAACGCCAGCGCGG
+CGCAGCCGGACGTGAACGCGCAGATCACCGCAGCGGTTGCGGCAGAAAACAGCCGCATTATGGGGATCCTCAACTGTGAG
+GAGGCTCACGGACGCGAAGAACAGGCACGCGTGCTGGCAGAAACCCCCGGTATGACCGTGAAAACGGCCCGCCGCATTCT
+GGCCGCAGCACCACAGAGTGCACAGGCGCGCAGTGACACTGCGCTGGATCGTCTGATGCAGGGGGCACCGGCACCGCTGG
+CTGCAGGTAACCCGGCATCTGATGCCGTTAACGATTTGCTGAACACACCAGTGTAAGGGATGTTTATGACGAGCAAAGAA
+ACCTTTACCCATTACCAGCCGCAGGGCAACAGTGACCCGGCTCATACCGCAACCGCGCCCGGCGGATTGAGTGCGAAAGC
+GCCTGCAATGACCCCGCTGATGCTGGACACCTCCAGCCGTAAGCTGGTTGCGTGGGATGGCACCACCGACGGTGCTGCCG
+TTGGCATTCTTGCGGTTGCTGCTGACCAGACCAGCACCACGCTGACGTTCTACAAGTCCGGCACGTTCCGTTATGAGGAT
+GTGCTCTGGCCGGAGGCTGCCAGCGACGAGACGAAAAAACGGACCGCGTTTGCCGGAACGGCAATCAGCATCGTTTAACT
+TTACCCTTCATCACTAAAGGCCGCCTGTGCGGCTTTTTTTACGGGATTTTTTTATGTCGATGTACACAACCGCCCAACTG
+CTGGCGGCAAATGAGCAGAAATTTAAGTTTGATCCGCTGTTTCTGCGTCTCTTTTTCCGTGAGAGCTATCCCTTCACCAC
+GGAGAAAGTCTATCTCTCACAAATTCCGGGACTGGTAAACATGGCGCTGTACGTTTCGCCGATTGTTTCCGGTGAGGTTA
+TCCGTTCCCGTGGCGGCTCCACCTCTGAATTTACGCCGGGATATGTCAAGCCGAAGCATGAAGTGAATCCGCAGATGACC
+CTGCGTCGCCTGCCGGATGAAGATCCGCAGAATCTGGCGGACCCGGCTTACCGCCGCCGTCGCATCATCATGCAGAACAT
+GCGTGACGAAGAGCTGGCCATTGCTCAGGTCGAAGAGATGCAGGCAGTTTCTGCCGTGCTTAAGGGCAAATACACCATGA
+CCGGTGAAGCCTTCGATCCGGTTGAGGTGGATATGGGCCGCAGTGAGGAGAATAACATCACGCAGTCCGGCGGCACGGAG
+TGGAGCAAGCGTGACAAGTCCACGTATGACCCGACCGACGATATCGAAGCCTACGCGCTGAACGCCAGCGGTGTGGTGAA
+TATCATCGTGTTCGATCCGAAAGGCTGGGCGCTGTTCCGTTCCTTCAAAGCCGTCAAGGAGAAGCTGGATACCCGTCGTG
+GCTCTAATTCCGAGCTGGAGACAGCGGTGAAAGACCTGGGCAAAGCGGTGTCCTATAAGGGGATGTATGGCGATGTGGCC
+ATCGTCGTGTATTCCGGACAGTACGTGGAAAACGGCGTCAAAAAGAACTTCCTGCCGGACAACACGATGGTGCTGGGGAA
+CACTCAGGCACGCGGTCTGCGCACCTATGGCTGCATTCAGGATGCGGACGCACAGCGCGAAGGCATTAACGCCTCTGCCC
+GTTACCCGAAAAACTGGGTGACCACCGGCGATCCGGCGCGTGAGTTCACCATGATTCAGTCAGCACCGCTGATGCTGCTG
+GCTGACCCTGATGAGTTCGTGTCCGTACAACTGGCGTAATCATGGCCCTTCGGGGCCATTGTTTCTCTGTGGAGGAGTCC
+ATGACGAAAGATGAACTGATTGCCCGTCTCCGCTCGCTGGGTGAACAACTGAACCGTGATGTCAGCCTGACGGGGACGAA
+AGAAGAACTGGCGCTCCGTGTGGCAGAGCTGAAAGAGGAGCTTGATGACACGGATGAAACTGCCGGTCAGGACACCCCTC
+TCAGCCGGGAAAATGTGCTGACCGGACATGAAAATGAGGTGGGATCAGCGCAGCCGGATACCGTGATTCTGGATACGTCT
+GAACTGGTCACGGTCGTGGCACTGGTGAAGCTGCATACTGATGCACTTCACGCCACGCGGGATGAACCTGTGGCATTTGT
+GCTGCCGGGAACGGCGTTTCGTGTCTCTGCCGGTGTGGCAGCCGAAATGACAGAGCGCGGCCTGGCCAGAATGCAATAAC
+GGGAGGCGCTGTGGCTGATTTCGATAACCTGTTCGATGCTGCCATTGCCCGCGCCGATGAAACGATACGCGGGTACATGG
+GAACGTCAGCCACCATTACATCCGGTGAGCAGTCAGGTGCGGTGATACGTGGTGTTTTTGATGACCCTGAAAATATCAGC
+TATGCCGGACAGGGCGTGCGCGTTGAAGGCTCCAGCCCGTCCCTGTTTGTCCGGACTGATGAGGTGCGGCAGCTGCGGCG
+TGGAGACACGCTGACCATCGGTGAGGAAAATTTCTGGGTAGATCGGGTTTCGCCGGATGATGGCGGAAGTTGTCATCTCT
+GGCTTGGACGGGGCGTACCGCCTGCCGTTAACCGTCGCCGCTGAAAGGGGGATGTATGGCCATAAAAGGTCTTGAGCAGG
+CCGTTGAAAACCTCAGCCGTATCAGCAAAACGGCGGTGCCTGGTGCCGCCGCAATGGCCATTAACCGCGTTGCTTCATCC
+GCGATATCGCAGTCGGCGTCACAGGTTGCCCGTGAGACAAAGGTACGCCGGAAACTGGTAAAGGAAAGGGCCAGGCTGAA
+AAGGGCCACGGTCAAAAATCCGCAGGCCAGAATCAAAGTTAACCGGGGGGATTTGCCCGTAATCAAGCTGGGTAATGCGC
+GGGTTGTCCTTTCGCGCCGCAGGCGTCGTAAAAAGGGGCAGCGTTCATCCCTGAAAGGTGGCGGCAGCGTGCTTGTGGTG
+GGTAACCGTCGTATTCCCGGCGCGTTTATTCAGCAACTGAAAAATGGCCGGTGGCATGTCATGCAGCGTGTGGCTGGGAA
+AAACCGTTACCCCATTGATGTGGTGAAAATCCCGATGGCGGTGCCGCTGACCACGGCGTTTAAACAAAATATTGAGCGGA
+TACGGCGTGAACGTCTTCCGAAAGAGCTGGGCTATGCGCTGCAGCATCAACTGAGGATGGTAATAAAGCGATGAAACATA
+CTGAACTCCGTGCAGCCGTACTGGATGCACTGGAGAAGCATGACACCGGGGCGACGTTTTTTGATGGTCGCCCCGCTGTT
+TTTGATGAGGCGGATTTTCCGGCAGTTGCCGTTTATCTCACCGGCGCTGAATACACGGGCGAAGAGCTGGACAGCGATAC
+CTGGCAGGCGGAGCTGCATATCGAAGTTTTCCTGCCTGCTCAGGTGCCGGATTCAGAGCTGGATGCGTGGATGGAGTCCC
+GGATTTATCCGGTGATGAGCGATATCCCGGCACTGTCAGATTTGATCACCAGTATGGTGGCCAGCGGCTATGACTACCGG
+CGCGACGATGATGCGGGCTTGTGGAGTTCAGCCGATCTGACTTATGTCATTACCTATGAAATGTGAGGACGCTATGCCTG
+TACCAAATCCTACAATGCCGGTGAAAGGTGCCGGGACCACCCTGTGGGTTTATAAGGGGAGCGGTGACCCTTACGCGAAT
+CCGCTTTCAGACGTTGACTGGTCGCGTCTGGCAAAAGTTAAAGACCTGACGCCCGGCGAACTGACCGCTGAGTCCTATGA
+CGACAGCTATCTCGATGATGAAGATGCAGACTGGACTGCGACCGGGCAGGGGCAGAAATCTGCCGGAGATACCAGCTTCA
+CGCTGGCGTGGATGCCCGGAGAGCAGGGGCAGCAGGCGCTGCTGGCGTGGTTTAATGAAGGCGATACCCGTGCCTATAAA
+ATCCGCTTCCCGAACGGCACGGTCGATGTGTTCCGTGGCTGGGTCAGCAGTATCGGTAAGGCGGTGACGGCGAAGGAAGT
+GATCACCCGCACGGTGAAAGTCACCAATGTGGGACGTCCGTCGATGGCAGAAGATCGCAGCACGGTAACAGCGGCAACCG
+GCATGACCGTGACGCCTGCCAGCACCTCGGTGGTGAAAGGGCAGAGCACCACGCTGACCGTGGCCTTCCAGCCGGAGGGC
+GTAACCGACAAGAGCTTTCGTGCGGTGTCTGCGGATAAAACAAAAGCCACCGTGTCGGTCAGTGGTATGACCATCACCGT
+GAACGGCGTTGCTGCAGGCAAGGTCAACATTCCGGTTGTATCCGGTAATGGTGAGTTTGCTGCGGTTGCAGAAATTACCG
+TCACCGCCAGTTAATCCGGAGAGTCAGCGATGTTCCTGAAAACCGAATCATTTGAACATAACGGTGTGACCGTCACGCTT
+TCTGAACTGTCAGCCCTGCAGCGCATTGAGCATCTCGCCCTGATGAAACGGCAGGCAGAACAGGCGGAGTCAGACAGCAA
+CCGGAAGTTTACTGTGGAAGACGCCATCAGAACCGGCGCGTTTCTGGTGGCGATGTCCCTGTGGCATAACCATCCGCAGA
+AGACGCAGATGCCGTCCATGAATGAAGCCGTTAAACAGATTGAGCAGGAAGTGCTTACCACCTGGCCCACGGAGGCAATT
+TCTCATGCTGAAAACGTGGTGTACCGGCTGTCTGGTATGTATGAGTTTGTGGTGAATAATGCCCCTGAACAGACAGAGGA
+CGCCGGGCCCGCAGAGCCTGTTTCTGCGGGAAAGTGTTCGACGGTGAGCTGAGTTTTGCCCTGAAACTGGCGCGTGAGAT
+GGGGCGACCCGACTGGCGTGCCATGCTTGCCGGGATGTCATCCACGGAGTATGCCGACTGGCACCGCTTTTACAGTACCC
+ATTATTTTCATGATGTTCTGCTGGATATGCACTTTTCCGGGCTGACGTACACCGTGCTCAGCCTGTTTTTCAGCGATCCG
+GATATGCATCCGCTGGATTTCAGTCTGCTGAACCGGCGCGAGGCTGACGAAGAGCCTGAAGATGATGTGCTGATGCAGAA
+AGCGGCAGGGCTTGCCGGAGGTGTCCGCTTTGGCCCGGACGGGAATGAAGTTATCCCCGCTTCCCCGGATGTGGCGGACA
+TGACGGAGGATGACGTAATGCTGATGACAGTATCAGAAGGGATCGCAGGAGGAGTCCGGTATGGCTGAACCGGTAGGCGA
+TCTGGTCGTTGATTTGAGTCTGGATGCGGCCAGATTTGACGAGCAGATGGCCAGAGTCAGGCGTCATTTTTCTGGTACGG
+AAAGTGATGCGAAAAAAACAGCGGCAGTCGTTGAACAGTCGCTGAGCCGACAGGCGCTGGCTGCACAGAAAGCGGGGATT
+TCCGTCGGGCAGTATAAAGCCGCCATGCGTATGCTGCCTGCACAGTTCACCGACGTGGCCACGCAGCTTGCAGGCGGGCA
+AAGTCCGTGGCTGATCCTGCTGCAACAGGGGGGGCAGGTGAAGGACTCCTTCGGCGGGATGATCCCCATGTTCAGGGGGC
+TTGCCGGTGCGATCACCCTGCCGATGGTGGGGGCCACCTCGCTGGCGGTGGCGACCGGTGCGCTGGCGTATGCCTGGTAT
+CAGGGCAACTCAACCCTGTCCGATTTCAACAAAACGCTGGTCCTTTCCGGCAATCAGGCGGGACTGACGGCAGATCGTAT
+GCTGGTCCTGTCCAGAGCCGGGCAGGCGGCAGGGCTGACGTTTAACCAGACCAGCGAGTCACTCAGCGCACTGGTTAAGG
+CGGGGGTAAGCGGTGAGGCTCAGATTGCGTCCATCAGCCAGAGTGTGGCGCGTTTCTCCTCTGCATCCGGCGTGGAGGTG
+GACAAGGTCGCTGAAGCCTTCGGGAAGCTGACCACAGACCCGACGTCGGGGCTGACGGCGATGGCTCGCCAGTTCCATAA
+CGTGTCGGCGGAGCAGATTGCGTATGTTGCTCAGTTGCAGCGTTCCGGCGATGAAGCCGGGGCATTGCAGGCGGCGAACG
+AGGCCGCAACGAAAGGGTTTGATGACCAGACCCGCCGCCTGAAAGAGAACATGGGCACGCTGGAGACCTGGGCAGACAGG
+ACTGCGCGGGCATTCAAATCCATGTGGGATGCGGTGCTGGATATTGGTCGTCCTGATACCGCGCAGGAGATGCTGATTAA
+GGCAGAGGCTGCGTATAAGAAAGCAGACGACATCTGGAATCTGCGCAAGGATGATTATTTTGTTAACGATGAAGCGCGGG
+CGCGTTACTGGGATGATCGTGAAAAGGCCCGTCTTGCGCTTGAAGCCGCCCGAAAGAAGGCTGAGCAGCAGACTCAACAG
+GACAAAAATGCGCAGCAGCAGAGCGATACCGAAGCGTCACGGCTGAAATATACCGAAGAGGCGCAGAAGGCTTACGAACG
+GCTGCAGACGCCGCTGGAGAAATATACCGCCCGTCAGGAAGAACTGAACAAGGCACTGAAAGACGGGAAAATCCTGCAGG
+CGGATTACAACACGCTGATGGCGGCGGCGAAAAAGGATTATGAAGCGACGCTGAAAAAGCCGAAACAGTCCAGCGTGAAG
+GTGTCTGCGGGCGATCGTCAGGAAGACAGTGCTCATGCTGCCCTGCTGACGCTTCAGGCAGAACTCCGGACGCTGGAGAA
+GCATGCCGGAGCAAATGAGAAAATCAGCCAGCAGCGCCGGGATTTGTGGAAGGCGGAGAGTCAGTTCGCGGTACTGGAGG
+AGGCGGCGCAACGTCGCCAGCTGTCTGCACAGGAGAAATCCCTGCTGGCGCATAAAGATGAGACGCTGGAGTACAAACGC
+CAGCTGGCTGCACTTGGCGACAAGGTTACGTATCAGGAGCGCCTGAACGCGCTGGCGCAGCAGGCGGATAAATTCGCACA
+GCAGCAACGGGCAAAACGGGCCGCCATTGATGCGAAAAGCCGGGGGCTGACTGACCGGCAGGCAGAACGGGAAGCCACGG
+AACAGCGCCTGAAGGAACAGTATGGCGATAATCCGCTGGCGCTGAATAACGTCATGTCAGAGCAGAAAAAGACCTGGGCG
+GCTGAAGACCAGCTTCGCGGGAACTGGATGGCAGGCCTGAAGTCCGGCTGGAGTGAGTGGGAAGAGAGCGCCACGGACAG
+TATGTCGCAGGTAAAAAGTGCAGCCACGCAGACCTTTGATGGTATTGCACAGAATATGGCGGCGATGCTGACCGGCAGTG
+AGCAGAACTGGCGCAGCTTCACCCGTTCCGTGCTGTCCATGATGACAGAAATTCTGCTTAAGCAGGCAATGGTGGGGATT
+GTCGGGAGTATCGGCAGCGCCATTGGCGGGGCTGTTGGTGGCGGCGCATCCGCGTCAGGCGGTACAGCCATTCAGGCCGC
+TGCGGCGAAATTCCATTTTGCAACCGGAGGATTTACGGGAACCGGCGGCAAATATGAGCCAGCGGGGATTGTTCACCGTG
+GTGAGTTTGTCTTCACGAAGGAGGCAACCAGCCGGATTGGCGTGGGGAATCTTTACCGGCTGATGCGCGGCTATGCCACC
+GGCGGTTATGTCGGTACACCGGGCAGCATGGCAGACAGCCGGTCGCAGGCGTCCGGGACGTTTGAGCAGAATAACCATGT
+GGTGATTAACAACGACGGCACGAACGGGCAGATAGGTCCGGCTGCTCTGAAGGCGGTGTATGACATGGCCCGCAAGGGTG
+CCCGTGATGAAATTCAGACACAGATGCGTGATGGTGGCCTGTTCTCCGGAGGTGGACGATGAAGACCTTCCGCTGGAAAG
+TGAAACCCGGTATGGATGTGGCTTCGGTCCCTTCTGTAAGAAAGGTGCGCTTTGGTGATGGCTATTCTCAGCGAGCGCCT
+GCCGGGCTGAATGCCAACCTGAAAACGTACAGCGTGACGCTTTCTGTCCCCCGTGAGGAGGCCACGGTACTGGAGTCGTT
+TCTGGAAGAGCACGGGGGCTGGAAATCCTTTCTGTGGACGCCGCCTTATGAGTGGCGGCAGATAAAGGTGACCTGCGCAA
+AATGGTCGTCGCGGGTCAGTATGCTGCGTGTTGAGTTCAGCGCAGAGTTTGAACAGGTGGTGAACTGATGCAGGATATCC
+GGCAGGAAACACTGAATGAATGCACCCGTGCGGAGCAGTCGGCCAGCGTGGTGCTCTGGGAAATCGACCTGACAGAGGTC
+GGTGGAGAACGTTATTTTTTCTGTAATGAGCAGAACGAAAAAGGTGAGCCGGTCACCTGGCAGGGGCGACAGTATCAGCC
+GTATCCCATTCAGGGGAGCGGTTTTGAACTGAATGGCAAAGGCACCAGTACGCGCCCCACGCTGACGGTTTCTAACCTGT
+ACGGTATGGTCACCGGGATGGCGGAAGATATGCAGAGTCTGGTCGGCGGAACGGTGGTCCGGCGTAAGGTTTACGCCCGT
+TTTCTGGATGCGGTGAACTTCGTCAACGGAAACAGTTACGCCGATCCGGAGCAGGAGGTGATCAGCCGCTGGCGCATTGA
+GCAGTGCAGCGAACTGAGCGCGGTGAGTGCCTCCTTTGTACTGTCCACGCCGACGGAAACGGATGGCGCTGTTTTTCCGG
+GACGTATCATGCTGGCCAACACCTGCACCTGGACCTATCGCGGTGACGAGTGCGGTTATAGCGGTCCGGCTGTCGCGGAT
+GAATATGACCAGCCAACGTCCGATATCACGAAGGATAAATGCAGCAAATGCCTGAGCGGTTGTAAGTTCCGCAATAACGT
+CGGCAACTTTGGCGGCTTCCTTTCCATTAACAAACTTTCGCAGTAAATCCCATGACACAGACAGAATCAGCGATTCTGGC
+GCACGCCCGGCGATGTGCGCCAGCGGAGTCGTGCGGCTTCGTGGTAAGCACGCCGGAGGGGGAAAGATATTTCCCCTGCG
+TGAATATCTCCGGTGAGCCGGAGGCGTATTTCCGTATGTCGCCGGAAGACTGGCTGCAGGCAGAAATGCAGGGTGAGATT
+GTGGCGCTGGTCCACAGCCACCCCGGTGGTCTGCCCTGGCTGAGTGAGGCCGACCGGCGGCTGCAGGTGCAGAGTGATTT
+GCCGTGGTGGCTGGTCTGCCGGGGGACGATTCATAAGTTCCGCTGTGTGCCGCATCTCACCGGGCGGCGCTTTGAGCACG
+GTGTGACGGACTGTTACACACTGTTCCGGGATGCTTATCATCTGGCGGGGATTGAGATGCCGGACTTTCATCGTGAGGAT
+GACTGGTGGCGTAACGGCCAGAATCTCTATCTGGATAATCTGGAGGCGACGGGGCTGTATCAGGTGCCGTTGTCAGCGGC
+ACAGCCGGGCGATGTGCTGCTGTGCTGTTTTGGTTCATCAGTGCCGAATCACGCCGCAATTTACTGCGGCGACGGCGAGC
+TGCTGCACCATATTCCTGAACAACTGAGCAAACGAGAGAGGTACACCGACAAATGGCAGCGACGCACACACTCCCTCTGG
+CGTCACCGGGCATGGCGCGCATCTGCCTTTACGGGGATTTACAACGATTTGGTCGCCGCATCGACCTTCGTGTGAAAACG
+GGGGCTGAAGCCATCCGGGCACTGGCCACACAGCTCCCGGCGTTTCGTCAGAAACTGAGCGACGGCTGGTATCAGGTACG
+GATTGCCGGGCGGGACGTCAGCACGTCCGGGTTAACGGCGCAGTTACATGAGACTCTGCCTGATGGCGCTGTAATTCATA
+TTGTTCCCAGAGTCGCCGGGGCCAAGTCAGGTGGCGTATTCCAGATTGTCCTGGGGGCTGCCGCCATTGCCGGATCATTC
+TTTACCGCCGGAGCCACCCTTGCAGCATGGGGGGCAGCCATTGGGGCCGGTGGTATGACCGGCATCCTGTTTTCTCTCGG
+TGCCAGTATGGTGCTCGGTGGTGTGGCGCAGATGCTGGCACCGAAAGCCAGAACTCCCCGTATACAGACAACGGATAACG
+GTAAGCAGAACACCTATTTCTCCTCACTGGATAACATGGTTGCCCAGGGCAATGTTCTGCCTGTTCTGTACGGGGAAATG
+CGCGTGGGGTCACGCGTGGTTTCTCAGGAGATCAGCACGGCAGACGAAGGGGACGGTGGTCAGGTTGTGGTGATTGGTCG
+CTGATGCAAAATGTTTTATGTGAAACCGCCTGCGGGCGGTTTTGTCATTTATGGAGCGTGAGGAATGGGTAAAGGAAGCA
+GTAAGGGGCATACCCCGCGCGAAGCGAAGGACAACCTGAAGTCCACGCAGTTGCTGAGTGTGATCGATGCCATCAGCGAA
+GGGCCGATTGAAGGTCCGGTGGATGGCTTAAAAAGCGTGCTGCTGAACAGTACGCCGGTGCTGGACACTGAGGGGAATAC
+CAACATATCCGGTGTCACGGTGGTGTTCCGGGCTGGTGAGCAGGAGCAGACTCCGCCGGAGGGATTTGAATCCTCCGGCT
+CCGAGACGGTGCTGGGTACGGAAGTGAAATATGACACGCCGATCACCCGCACCATTACGTCTGCAAACATCGACCGTCTG
+CGCTTTACCTTCGGTGTACAGGCACTGGTGGAAACCACCTCAAAGGGTGACAGGAATCCGTCGGAAGTCCGCCTGCTGGT
+TCAGATACAACGTAACGGTGGCTGGGTGACGGAAAAAGACATCACCATTAAGGGCAAAACCACCTCGCAGTATCTGGCCT
+CGGTGGTGATGGGTAACCTGCCGCCGCGCCCGTTTAATATCCGGATGCGCAGGATGACGCCGGACAGCACCACAGACCAG
+CTGCAGAACAAAACGCTCTGGTCGTCATACACTGAAATCATCGATGTGAAACAGTGCTACCCGAACACGGCACTGGTCGG
+CGTGCAGGTGGACTCGGAGCAGTTCGGCAGCCAGCAGGTGAGCCGTAATTATCATCTGCGCGGGCGTATTCTGCAGGTGC
+CGTCGAACTATAACCCGCAGACGCGGCAATACAGCGGTATCTGGGACGGAACGTTTAAACCGGCATACAGCAACAACATG
+GCCTGGTGTCTGTGGGATATGCTGACCCATCCGCGCTACGGCATGGGGAAACGTCTTGGTGCGGCGGATGTGGATAAATG
+GGCGCTGTATGTCATCGGCCAGTACTGCGACCAGTCAGTGCCGGACGGCTTTGGCGGCACGGAGCCGCGCATCACCTGTA
+ATGCGTACCTGACCACACAGCGTAAGGCGTGGGATGTGCTCAGCGATTTCTGCTCGGCGATGCGCTGTATGCCGGTATGG
+AACGGGCAGACGCTGACGTTCGTGCAGGACCGACCGTCGGATAAGACGTGGACCTATAACCGCAGTAATGTGGTGATGCC
+GGATGATGGCGCGCCGTTCCGCTACAGCTTCAGCGCCCTGAAGGACCGCCATAATGCCGTTGAGGTGAACTGGATTGACC
+CGAACAACGGCTGGGAGACGGCGACAGAGCTTGTTGAAGATACGCAGGCCATTGCCCGTTACGGTCGTAATGTTACGAAG
+ATGGATGCCTTTGGCTGTACCAGCCGGGGGCAGGCACACCGCGCCGGGCTGTGGCTGATTAAAACAGAACTGCTGGAAAC
+GCAGACCGTGGATTTCAGCGTCGGCGCAGAAGGGCTTCGCCATGTACCGGGCGATGTTATTGAAATCTGCGATGATGACT
+ATGCCGGTATCAGCACCGGTGGTCGTGTGCTGGCGGTGAACAGCCAGACCCGGACGCTGACGCTCGACCGTGAAATCACG
+CTGCCATCCTCCGGTACCGCGCTGATAAGCCTGGTTGACGGAAGTGGCAATCCGGTCAGCGTGGAGGTTCAGTCCGTCAC
+CGACGGCGTGAAGGTAAAAGTGAGCCGTGTTCCTGACGGTGTTGCTGAATACAGCGTATGGGAGCTGAAGCTGCCGACGC
+TGCGCCAGCGACTGTTCCGCTGCGTGAGTATCCGTGAGAACGACGACGGCACGTATGCCATCACCGCCGTGCAGCATGTG
+CCGGAAAAAGAGGCCATCGTGGATAACGGGGCGCACTTTGACGGCGAACAGAGTGGCACGGTGAATGGTGTCACGCCGCC
+AGCGGTGCAGCACCTGACCGCAGAAGTCACTGCAGACAGCGGGGAATATCAGGTGCTGGCGCGATGGGACACACCGAAGG
+TGGTGAAGGGCGTGAGTTTCCTGCTCCGTCTGACCGTAACAGCGGACGACGGCAGTGAGCGGCTGGTCAGCACGGCCCGG
+ACGACGGAAACCACATACCGCTTCACGCAACTGGCGCTGGGGAACTACAGGCTGACAGTCCGGGCGGTAAATGCGTGGGG
+GCAGCAGGGCGATCCGGCGTCGGTATCGTTCCGGATTGCCGCACCGGCAGCACCGTCGAGGATTGAGCTGACGCCGGGCT
+ATTTTCAGATAACCGCCACGCCGCATCTTGCCGTTTATGACCCGACGGTACAGTTTGAGTTCTGGTTCTCGGAAAAGCAG
+ATTGCGGATATCAGACAGGTTGAAACCAGCACGCGTTATCTTGGTACGGCGCTGTACTGGATAGCCGCCAGTATCAATAT
+CAAACCGGGCCATGATTATTACTTTTATATCCGCAGTGTGAACACCGTTGGCAAATCGGCATTCGTGGAGGCCGTCGGTC
+GGGCGAGCGATGATGCGGAAGGTTACCTGGATTTTTTCAAAGGCAAGATAACCGAATCCCATCTCGGCAAGGAGCTGCTG
+GAAAAAGTCGAGCTGACGGAGGATAACGCCAGCAGACTGGAGGAGTTTTCGAAAGAGTGGAAGGATGCCAGTGATAAGTG
+GAATGCCATGTGGGCTGTCAAAATTGAGCAGACCAAAGACGGCAAACATTATGTCGCGGGTATTGGCCTCAGCATGGAGG
+ACACGGAGGAAGGCAAACTGAGCCAGTTTCTGGTTGCCGCCAATCGTATCGCATTTATTGACCCGGCAAACGGGAATGAA
+ACGCCGATGTTTGTGGCGCAGGGCAACCAGATATTCATGAACGACGTGTTCCTGAAGCGCCTGACGGCCCCCACCATTAC
+CAGCGGCGGCAATCCTCCGGCCTTTTCCCTGACACCGGACGGAAAGCTGACCGCTAAAAATGCGGATATCAGTGGCAGTG
+TGAATGCGAACTCCGGGACGCTCAGTAATGTGACGATAGCTGAAAACTGTACGATAAACGGTACGCTGAGGGCGGAAAAA
+ATCGTCGGGGACATTGTAAAGGCGGCGAGCGCGGCTTTTCCGCGCCAGCGTGAAAGCAGTGTGGACTGGCCGTCAGGTAC
+CCGTACTGTCACCGTGACCGATGACCATCCTTTTGATCGCCAGATAGTGGTGCTTCCGCTGACGTTTCGCGGAAGTAAGC
+GTACTGTCAGCGGCAGGACAACGTATTCGATGTGTTATCTGAAAGTACTGATGAACGGTGCGGTGATTTATGATGGCGCG
+GCGAACGAGGCGGTACAGGTGTTCTCCCGTATTGTTGACATGCCAGCGGGTCGGGGAAACGTGATCCTGACGTTCACGCT
+TACGTCCACACGGCATTCGGCAGATATTCCGCCGTATACGTTTGCCAGCGATGTGCAGGTTATGGTGATTAAGAAACAGG
+CGCTGGGCATCAGCGTGGTCTGAGTGTGTTACAGAGGTTCGTCCGGGAACGGGCGTTTTATTATAAAACAGTGAGAGGTG
+AACGATGCGTAATGTGTGTATTGCCGTTGCTGTCTTTGCCGCACTTGCGGTGACAGTCACTCCGGCCCGTGCGGAAGGTG
+GACATGGTACGTTTACGGTGGGCTATTTTCAAGTGAAACCGGGTACATTGCCGTCGTTGTCGGGCGGGGATACCGGTGTG
+AGTCATCTGAAAGGGATTAACGTGAAGTACCGTTATGAGCTGACGGACAGTGTGGGGGTGATGGCTTCCCTGGGGTTCGC
+CGCGTCGAAAAAGAGCAGCACAGTGATGACCGGGGAGGATACGTTTCACTATGAGAGCCTGCGTGGACGTTATGTGAGCG
+TGATGGCCGGACCGGTTTTACAAATCAGTAAGCAGGTCAGTGCGTACGCCATGGCCGGAGTGGCTCACAGTCGGTGGTCC
+GGCAGTACAATGGATTACCGTAAGACGGAAATCACTCCCGGGTATATGAAAGAGACGACCACTGCCAGGGACGAAAGTGC
+AATGCGGCATACCTCAGTGGCGTGGAGTGCAGGTATACAGATTAATCCGGCAGCGTCCGTCGTTGTTGATATTGCTTATG
+AAGGCTCCGGCAGTGGCGACTGGCGTACTGACGGATTCATCGTTGGGGTCGGTTATAAATTCTGATTAGCCAGGTAACAC
+AGTGTTATGACAGCCCGCCGGAACCGGTGGGCTTTTTTGTGGGGTGAATATGGCAGTAAAGATTTCAGGAGTCCTGAAAG
+ACGGCACAGGAAAACCGGTACAGAACTGCACCATTCAGCTGAAAGCCAGACGTAACAGCACCACGGTGGTGGTGAACACG
+GTGGGCTCAGAGAATCCGGATGAAGCCGGGCGTTACAGCATGGATGTGGAGTACGGTCAGTACAGTGTCATCCTGCAGGT
+TGACGGTTTTCCACCATCGCACGCCGGGACCATCACCGTGTATGAAGATTCACAACCGGGGACGCTGAATGATTTTCTCT
+GTGCCATGACGGAGGATGATGCCCGGCCGGAGGTGCTGCGTCGTCTTGAACTGATGGTGGAAGAGGTGGCGCGTAACGCG
+TCCGTGGTGGCACAGAGTACGGCAGACGCGAAGAAATCAGCCGGCGATGCCAGTGCATCAGCTGCTCAGGTCGCGGCCCT
+TGTGACTGATGCAACTGACTCAGCACGCGCCGCCAGCACGTCCGCCGGACAGGCTGCATCGTCAGCTCAGGAAGCGTCCT
+CCGGCGCAGAAGCGGCATCAGCAAAGGCCACTGAAGCGGAAAAAAGTGCCGCAGCCGCAGAGTCCTCAAAAAACGCGGCG
+GCCACCAGTGCCGGTGCGGCGAAAACGTCAGAAACGAATGCTGCAGCGTCACAACAATCAGCCGCCACGTCTGCCTCCAC
+CGCGGCCACGAAAGCGTCAGAGGCCGCCACTTCAGCACGAGATGCGGTGGCCTCAAAAGAGGCAGCAAAATCATCAGAAA
+CGAACGCATCATCAAGTGCCGGTCGTGCAGCTTCCTCGGCAACGGCGGCAGAAAATTCTGCCAGGGCGGCAAAAACGTCC
+GAGACGAATGCCAGGTCATCTGAAACAGCAGCGGAACGGAGCGCCTCTGCCGCGGCAGACGCAAAAACAGCGGCGGCGGG
+GAGTGCGTCAACGGCATCCACGAAGGCGACAGAGGCTGCGGGAAGTGCGGTATCAGCATCGCAGAGCAAAAGTGCGGCAG
+AAGCGGCGGCAATACGTGCAAAAAATTCGGCAAAACGTGCAGAAGATATAGCTTCAGCTGTCGCGCTTGAGGATGCGGAC
+ACAACGAGAAAGGGGATAGTGCAGCTCAGCAGTGCAACCAACAGCACGTCTGAAACGCTTGCTGCAACGCCAAAGGCGGT
+TAAGGTGGTAATGGATGAAACGAACAGAAAAGCCCACTGGACAGTCCGGCACTGACCGGAACGCCAACAGCACCAACCGC
+GCTCAGGGGAACAAACAATACCCAGATTGCGAACACCGCTTTTGTACTGGCCGCGATTGCAGATGTTATCGACGCGTCAC
+CTGACGCACTGAATACGCTGAATGAACTGGCCGCAGCGCTCGGGAATGATCCAGATTTTGCTACCACCATGACTAACGCG
+CTTGCGGGTAAACAACCGAAGAATGCGACACTGACGGCGCTGGCAGGGCTTTCCACGGCGAAAAATAAATTACCGTATTT
+TGCGGAAAATGATGCCGCCAGCCTGACTGAACTGACTCAGGTTGGCAGGGATATTCTGGCAAAAAATTCCGTTGCAGATG
+TTCTTGAATACCTTGGGGCCGGTGAGAATTCGGCCTTTCCGGCAGGTGCGCCGATCCCGTGGCCATCAGATATCGTTCCG
+TCTGGCTACGTCCTGATGCAGGGGCAGGCGTTTGACAAATCAGCCTACCCAAAACTTGCTGTCGCGTATCCATCGGGTGT
+GCTTCCTGATATGCGAGGCTGGACAATCAAGGGGAAACCCGCCAGCGGTCGTGCTGTATTGTCTCAGGAACAGGATGGAA
+TTAAGTCGCACACCCACAGTGCCAGTGCATCCGGTACGGATTTGGGGACGAAAACCACATCGTCGTTTGATTACGGGACG
+AAAACAACAGGCAGTTTCGATTACGGCACCAAATCGACGAATAACACGGGGGCTCATGCTCACAGTCTGAGCGGTTCAAC
+AGGGGCCGCGGGTGCTCATGCCCACACAAGTGGTTTAAGGATGAACAGTTCTGGCTGGAGTCAGTATGGAACAGCAACCA
+TTACAGGAAGTTTATCCACAGTTAAAGGAACCAGCACACAGGGTATTGCTTATTTATCGAAAACGGACAGTCAGGGCAGC
+CACAGTCACTCATTGTCCGGTACAGCCGTGAGTGCCGGTGCACATGCGCATACAGTTGGTATTGGTGCGCACCAGCATCC
+GGTTGTTATCGGTGCTCATGCCCATTCTTTCAGTATTGGTTCACACGGACACACCATCACCGTTAACGCTGCGGGTAACG
+CGGAAAACACCGTCAAAAACATTGCATTTAACTATATTGTGAGGCTTGCATAATGGCATTCAGAATGAGTGAACAACCAC
+GGACCATAAAAATTTATAATCTGCTGGCCGGAACTAATGAATTTATTGGTGAAGGTGACGCATATATTCCGCCTCATACC
+GGTCTGCCTGCAAACAGTACCGATATTGCACCGCCAGATATTCCGGCTGGCTTTGTGGCTGTTTTCAACAGTGATGAGGC
+ATCGTGGCATCTCGTTGAAGACCATCGGGGTAAAACCGTCTATGACGTGGCTTCCGGCGACGCGTTATTTATTTCTGAAC
+TCGGTCCGTTACCGGAAAATTTTACCTGGTTATCGCCGGGAGGGGAATATCAGAAGTGGAACGGCACAGCCTGGGTGAAG
+GATACGGAAGCAGAAAAACTGTTCCGGATCCGGGAGGCGGAAGAAACAAAAAAAAGCCTGATGCAGGTAGCCAGTGAGCA
+TATTGCGCCGCTTCAGGATGCTGCAGATCTGGAAATTGCAACGAAGGAAGAAACCTCGTTGCTGGAAGCCTGGAAGAAGT
+ATCGGGTGTTGCTGAACCGTGTTGATACATCAACTGCACCTGATATTGAGTGGCCTGCTGTCCCTGTTATGGAGTAATCG
+TTTTGTGATATGCCGCAGAAACGTTGTATGAAATAACGTTCTGCGGTTAGTTAGTATATTGTAAAGCTGAGTATTGGTTT
+ATTTGGCGATTATTATCTTCAGGAGAATAATGGAAGTTCTATGACTCAATTGTTCATAGTGTTTACATCACCGCCAATTG
+CTTTTAAGACTGAACGCATGAAATATGGTTTTTCGTCATGTTTTGAGTCTGCTGTTGATATTTCTAAAGTCGGTTTTTTT
+TCTTCGTTTTCTCTAACTATTTTCCATGAAATACATTTTTGATTATTATTTGAATCAATTCCAATTACCTGAAGTCTTTC
+ATCTATAATTGGCATTGTATGTATTGGTTTATTGGAGTAGATGCTTGCTTTTCTGAGCCATAGCTCTGATATCCAAATGA
+AGCCATAGGCATTTGTTATTTTGGCTCTGTCAGCTGCATAACGCCAAAAAATATATTTATCTGCTTGATCTTCAAATGTT
+GTATTGATTAAATCAATTGGATGGAATTGTTTATCATAAAAAATTAATGTTTGAATGTGATAACCGTCCTTTAAAAAAGT
+CGTTTCTGCAAGCTTGGCTGTATAGTCAACTAACTCTTCTGTCGAAGTGATATTTTTAGGCTTATCTACCAGTTTTAGAC
+GCTCTTTAATATCTTCAGGAATTATTTTATTGTCATATTGTATCATGCTAAATGACAATTTGCTTATGGAGTAATCTTTT
+AATTTTAAATAAGTTATTCTCCTGGCTTCATCAAATAAAGAGTCGAATGATGTTGGCGAAATCACATCGTCACCCATTGG
+ATTGTTTATTTGTATGCCAAGAGAGTTACAGCAGTTATACATTCTGCCATAGATTATAGCTAAGGCATGTAATAATTCGT
+AATCTTTTAGCGTATTAGCGACCCATCGTCTTTCTGATTTAATAATAGATGATTCAGTTAAATATGAAGGTAATTTCTTT
+TGTGCAAGTCTGACTAACTTTTTTATACCAATGTTTAACATACTTTCATTTGTAATAAACTCAATGTCATTTTCTTCAAT
+GTAAGATGAAATAAGAGTAGCCTTTGCCTCGCTATACATTTCTAAATCGCCTTGTTTTTCTATCGTATTGCGAGAATTTT
+TAGCCCAAGCCATTAATGGATCATTTTTCCATTTTTCAATAACATTATTGTTATACCAAATGTCATATCCTATAATCTGG
+TTTTTGTTTTTTTGAATAATAAATGTTACTGTTCTTGCGGTTTGGAGGAATTGATTCAAATTCAAGCGAAATAATTCAGG
+GTCAAAATATGTATCAATGCAGCATTTGAGCAAGTGCGATAAATCTTTAAGTCTTCTTTCCCATGGTTTTTTAGTCATAA
+AACTCTCCATTTTGATAGGTTGCATGCTAGATGCTGATATATTTTAGAGGTGATAAAATTAACTGCTTAACTGTCAATGT
+AATACAAGTTGTTTGATCTTTGCAATGATTCTTATCAGAAACCATATAGTAAATTAGTTACACAGGAAATTTTTAATATT
+ATTATTATCATTCATTATGTATTAAAATTAGAGTTGTGGCTTGGCTCTGCTAACACGTTGCTCATAGGAGATATGGTAGA
+GCCGCAGACACGTCGTATGCAGGAACGTGCTGCGGCTGGCTGGTGAACTTCCGATAGTGCGGGTGTTGAATGATTTCCAG
+TTGCTACCGATTTTACATATTTTTTGCATGAGAGAATTTGTACCACCTCCCACCGACCATCTATGACTGTACGCCACTGT
+CCCTAGGACTGCTATGTGCCGGAGCGGACATTACAAACGTCCTTCTCGGTGCATGCCACTGTTGCCAATGACCTGCCTAG
+GAATTGGTTAGCAAGTTACTACCGGATTTTGTAAAAACAGCCCTCCTCATATAAAAAGTATTCGTTCACTTCCGATAAGC
+GTCGTAATTTTCTATCTTTCATCATATTCTAGATCCCTCTGAAAAAATCTTCCGAGTTTGCTAGGCACTGATACATAACT
+CTTTTCCAATAATTGGGGAAGTCATTCAAATCTATAATAGGTTTCAGATTTGCTTCAATAAATTCTGACTGTAGCTGCTG
+AAACGTTGCGGTTGAACTATATTTCCTTATAACTTTTACGAAAGAGTTTCTTTGAGTAATCACTTCACTCAAGTGCTTCC
+CTGCCTCCAAACGATACCTGTTAGCAATATTTAATAGCTTGAAATGATGAAGAGCTCTGTGTTTGTCTTCCTGCCTCCAG
+TTCGCCGGGCATTCAACATAAAAACTGATAGCACCCGGAGTTCCGGAAACGAAATTTGCATATACCCATTGCTCACGAAA
+AAAAATGTCCTTGTCGATATAGGGATGAATCGCTTGGTGTACCTCATCTACTGCGAAAACTTGACCTTTCTCTCCCATAT
+TGCAGTCGCGGCACGATGGAACTAAATTAATAGGCATCACCGAAAATTCAGGATAATGTGCAATAGGAAGAAAATGATCT
+ATATTTTTTGTCTGTCCTATATCACCACAAAATGGACATTTTTCACCTGATGAAACAAGCATGTCATCGTAATATGTTCT
+AGCGGGTTTGTTTTTATCTCGGAGATTATTTTCATAAAGCTTTTCTAATTTAACCTTTGTCAGGTTACCAACTACTAAGG
+TTGTAGGCTCAAGAGGGTGTGTCCTGTCGTAGGTAAATAACTGACCTGTCGAGCTTAATATTCTATATTGTTGTTCTTTC
+TGCAAAAAAGTGGGGAAGTGAGTAATGAAATTATTTCTAACATTTATCTGCATCATACCTTCCGAGCATTTATTAAGCAT
+TTCGCTATAAGTTCTCGCTGGAAGAGGTAGTTTTTTCATTGTACTTTACCTTCATCTCTGTTCATTATCATCGCTTTTAA
+AACGGTTCGACCTTCTAATCCTATCTGACCATTATAATTTTTTAGAATGGTTTCATAAGAAAGCTCTGAATCAACGGACT
+GCGATAATAAGTGGTGGTATCCAGAATTTGTCACTTCAAGTAAAAACACCTCACGAGTTAAAACACCTAAGTTCTCACCG
+AATGTCTCAATATCCGGACGGATAATATTTATTGCTTCTCTTGACCGTAGGACTTTCCACATGCAGGATTTTGGAACCTC
+TTGCAGTACTACTGGGGAATGAGTTGCAATTATTGCTACACCATTGCGTGCATCGAGTAAGTCGCTTAATGTTCGTAAAA
+AAGCAGAGAGCAAAGGTGGATGCAGATGAACCTCTGGTTCATCGAATAAAACTAATGACTTTTCGCCAACGACATCTACT
+AATCTTGTGATAGTAAATAAAACAATTGCATGTCCAGAGCTCATTCGAAGCAGATATTTCTGGATATTGTCATAAAACAA
+TTTAGTGAATTTATCATCGTCCACTTGAATCTGTGGTTCATTACGTCTTAACTCTTCATATTTAGAAATGAGGCTGATGA
+GTTCCATATTTGAAAAGTTTTCATCACTACTTAGTTTTTTGATAGCTTCAAGCCAGAGTTGTCTTTTTCTATCTACTCTC
+ATACAACCAATAAATGCTGAAATGAATTCTAAGCGGAGATCGCCTAGTGATTTTAAACTATTGCTGGCAGCATTCTTGAG
+TCCAATATAAAAGTATTGTGTACCTTTTGCTGGGTCAGGTTGTTCTTTAGGAGGAGTAAAAGGATCAAATGCACTAAACG
+AAACTGAAACAAGCGATCGAAAATATCCCTTTGGGATTCTTGACTCGATAAGTCTATTATTTTCAGAGAAAAAATATTCA
+TTGTTTTCTGGGTTGGTGATTGCACCAATCATTCCATTCAAAATTGTTGTTTTACCACACCCATTCCGCCCGATAAAAGC
+ATGAATGTTCGTGCTGGGCATAGAATTAACCGTCACCTCAAAAGGTATAGTTAAATCACTGAATCCGGGAGCACTTTTTC
+TATTAAATGAAAAGTGGAAATCTGACAATTCTGGCAAACCATTTAACACACGTGCGAACTGTCCATGAATTTCTGAAAGA
+GTTACCCCTCTAAGTAATGAGGTGTTAAGGACGCTTTCATTTTCAATGTCGGCTAATCGATTTGGCCATACTACTAAATC
+CTGAATAGCTTTAAGAAGGTTATGTTTAAAACCATCGCTTAATTTGCTGAGATTAACATAGTAGTCAATGCTTTCACCTA
+AGGAAAAAAACATTTCAGGGAGTTGACTGAATTTTTTATCTATTAATGAATAAGTGCTTACTTCTTCTTTTTGACCTACA
+AAACCAATTTTAACATTTCCGATATCGCATTTTTCACCATGCTCATCAAAGACAGTAAGATAAAACATTGTAACAAAGGA
+ATAGTCATTCCAACCATCTGCTCGTAGGAATGCCTTATTTTTTTCTACTGCAGGAATATACCCGCCTCTTTCAATAACAC
+TAAACTCCAACATATAGTAACCCTTAATTTTATTAAAATAACCGCAATTTATTTGGCGGCAACACAGGATCTCTCTTTTA
+AGTTACTCTCTATTACATACGTTTTCCATCTAAAAATTAGTAGTATTGAACTTAACGGGGCATCGTATTGTAGTTTTCCA
+TATTTAGCTTTCTGCTTCCTTTTGGATAACCCACTGTTATTCATGTTGCATGGTGCACTGTTTATACCAACGATATAGTC
+TATTAATGCATATATAGTATCGCCGAACGATTAGCTCTTCAGGCTTCTGAAGAAGCGTTTCAAGTACTAATAAGCCGATA
+GATAGCCACGGACTTCGTAGCCATTTTTCATAAGTGTTAACTTCCGCTCCTCGCTCATAACAGACATTCACTACAGTTAT
+GGCGGAAAGGTATGCATGCTGGGTGTGGGGAAGTCGTGAAAGAAAAGAAGTCAGCTGCGTCGTTTGACATCACTGCTATC
+TTCTTACTGGTTATGCAGGTCGTAGTGGGTGGCACACAAAGCTTTGCACTGGATTGCGAGGCTTTGTGCTTCTCTGGAGT
+GCGACAGGTTTGATGACAAAAAATTAGCGCAAGAAGACAAAAATCACCTTGCGCTAATGCTCTGTTACAGGTCACTAATA
+CCATCTAAGTAGTTGATTCATAGTGACTGCATATGTTGTGTTTTACAGTATTATGTAGTCTGTTTTTTATGCAAAATCTA
+ATTTAATATATTGATATTTATATCATTTTACGTTTCTCGTTCAGCTTTTTTATACTAAGTTGGCATTATAAAAAAGCATT
+GCTTATCAATTTGTTGCAACGAACAGGTCACTATCAGTCAAAATAAAATCATTATTTGATTTCAATTTTGTCCCACTCCC
+TGCCTCTGTCATCACGATACTGTGATGCCATGGTGTCCGACTTATGCCCGAGAAGATGTTGAGCAAACTTATCGCTTATC
+TGCTTCTCATAGAGTCTTGCAGACAAACTGCGCAACTCGTGAAAGGTAGGCGGATCCCCTTCGAAGGAAAGACCTGATGC
+TTTTCGTGCGCGCATAAAATACCTTGATACTGTGCCGGATGAAAGCGGTTCGCGACGAGTAGATGCAATTATGGTTTCTC
+CGCCAAGAATCTCTTTGCATTTATCAAGTGTTTCCTTCATTGATATTCCGAGAGCATCAATATGCAATGCTGTTGGGATG
+GCAATTTTTACGCCTGTTTTGCTTTGCTCGACATAAAGATATCCATCTACGATATCAGACCACTTCATTTCGCATAAATC
+ACCAACTCGTTGCCCGGTAACAACAGCCAGTTCCATTGCAAGTCTGAGCCAACATGGTGATGATTCTGCTGCTTGATAAA
+TTTTCAGGTATTCGTCAGCCGTAAGTCTTGATCTCCTTACCTCTGATTTTGCTGCGCGAGTGGCAGCGACATGGTTTGTT
+GTTATATGGCCTTCAGCTATTGCCTCTCGGAATGCATCGCTCAGTGTTGATCTGATTAACTTGGCTGACGCCGCCTTGCC
+CTCGTCTATGTATCCATTGAGCATTGCCGCAATTTCTTTTGTGGTGATGTCTTCAAGTGGAGCATCAGGCAGACCCCTCC
+TTATTGCTTTAATTTTGCTCATGTAATTTATGAGTGTCTTCTGCTTGATTCCTCTGCTGGCCAGGATTTTTTCGTAGCGA
+TCAAGCCATGAATGTAACGTAACGGAATTATCACTGTTGATTCTCGCTGTCAGAGGCTTGTGTTTGTGTCCTGAAAATAA
+CTCAATGTTGGCCTGTATAGCTTCAGTGATTGCGATTCGCCTGTCTCTGCCTAATCCAAACTCTTTACCCGTCCTTGGGT
+CCCTGTAGCAGTAATATCCATTGTTTCTTATATAAAGGTTAGGGGGTAAATCCCGGCGCTCATGACTTCGCCTTCTTCCC
+ATTTCTGATCCTCTTCAAAAGGCCACCTGTTACTGGTCGATTTAAGTCAACCTTTACCGCTGATTCGTGGAACAGATACT
+CTCTTCCATCCTTAACCGGAGGTGGGAATATCCTGCATTCCCGAACCCATCGACGAACTGTTTCAAGGCTTCTTGGACGT
+CGCTGGCGTGCGTTCCACTCCTGAAGTGTCAAGTACATCGCAAAGTCTCCGCAATTACACGCAAGAAAAAACCGCCATCA
+GGCGGCTTGGTGTTCTTTCAGTTCTTCAATTCGAATATTGGTTACGTCTGCATGTGCTATCTGCGCCCATATCATCCAGT
+GGTCGTAGCAGTCGTTGATGTTCTCCGCTTCGATAACTCTGTTGAATGGCTCTCCATTCCATTCTCCTGTGACTCGGAAG
+TGCATTTATCATCTCCATAAAACAAAACCCGCCGTAGCGAGTTCAGATAAAATAAATCCCCGCGAGTGCGAGGATTGTTA
+TGTAATATTGGGTTTAATCATCTATATGTTTTGTACAGAGAGGGCAAGTATCGTTTCCACCGTACTCGTGATAATAATTT
+TGCACGGTATCAGTCATTTCTCGCACATTGCAGAATGGGGATTTGTCTTCATTAGACTTATAAACCTTCATGGAATATTT
+GTATGCCGACTCTATATCTATACCTTCATCTACATAAACACCTTCGTGATGTCTGCATGGAGACAAGACACCGGATCTGC
+ACAACATTGATAACGCCCAATCTTTTTGCTCAGACTCTAACTCATTGATACTCATTTATAAACTCCTTGCAATGTATGTC
+GTTTCAGCTAAACGGTATCAGCAATGTTTATGTAAAGAAACAGTAAGATAATACTCAACCCGATGTTTGAGTACGGTCAT
+CATCTGACACTACAGACTCTGGCATCGCTGTGAAGACGACGCGAAATTCAGCATTTTCACAAGCGTTATCTTTTACAAAA
+CCGATCTCACTCTCCTTTGATGCGAATGCCAGCGTCAGACATCATATGCAGATACTCACCTGCATCCTGAACCCATTGAC
+CTCCAACCCCGTAATAGCGATGCGTAATGATGTCGATAGTTACTAACGGGTCTTGTTCGATTAACTGCCGCAGAAACTCT
+TCCAGGTCACCAGTGCAGTGCTTGATAACAGGAGTCTTCCCAGGATGGCGAACAACAAGAAACTGGTTTCCGTCTTCACG
+GACTTCGTTGCTTTCCAGTTTAGCAATACGCTTACTCCCATCCGAGATAACACCTTCGTAATACTCACGCTGCTCGTTGA
+GTTTTGATTTTGCTGTTTCAAGCTCAACACGCAGTTTCCCTACTGTTAGCGCAATATCCTCGTTCTCCTGGTCGCGGCGT
+TTGATGTATTGCTGGTTTCTTTCCCGTTCATCCAGCAGTTCCAGCACAATCGATGGTGTTACCAATTCATGGAAAAGGTC
+TGCGTCAAATCCCCAGTCGTCATGCATTGCCTGCTCTGCCGCTTCACGCAGTGCCTGAGAGTTAATTTCGCTCACTTCGA
+ACCTCTCTGTTTACTGATAAGTTCCAGATCCTCCTGGCAACTTGCACAAGTCCGACAACCCTGAACGACCAGGCGTCTTC
+GTTCATCTATCGGATCGCCACACTCACAACAATGAGTGGCAGATATAGCCTGGTGGTTCAGGCGGCGCATTTTTATTGCT
+GTGTTGCGCTGTAATTCTTCTATTTCTGATGCTGAATCAATGATGTCTGCCATCTTTCATTAATCCCTGAACTGTTGGTT
+AATACGCTTGAGGGTGAATGCGAATAATAAAAAAGGAGCCTGTAGCTCCCTGATGATTTTGCTTTTCATGTTCATCGTTC
+CTTAAAGACGCCGTTTAACATGCCGATTGCCAGGCTTAAATGAGTCGGTGTGAATCCCATCAGCGTTACCGTTTCGCGGT
+GCTTCTTCAGTACGCTACGGCAAATGTCATCGACGTTTTTATCCGGAAACTGCTGTCTGGCTTTTTTTGATTTCAGAATT
+AGCCTGACGGGCAATGCTGCGAAGGGCGTTTTCCTGCTGAGGTGTCATTGAACAAGTCCCATGTCGGCAAGCATAAGCAC
+ACAGAATATGAAGCCCGCTGCCAGAAAAATGCATTCCGTGGTTGTCATACCTGGTTTCTCTCATCTGCTTCTGCTTTCGC
+CACCATCATTTCCAGCTTTTGTGAAAGGGATGCGGCTAACGTATGAAATTCTTCGTCTGTTTCTACTGGTATTGGCACAA
+ACCTGATTCCAATTTGAGCAAGGCTATGTGCCATCTCGATACTCGTTCTTAACTCAACAGAAGATGCTTTGTGCATACAG
+CCCCTCGTTTATTATTTATCTCCTCAGCCAGCCGCTGTGCTTTCAGTGGATTTCGGATAACAGAAAGGCCGGGAAATACC
+CAGCCTCGCTTTGTAACGGAGTAGACGAAAGTGATTGCGCCTACCCGGATATTATCGTGAGGATGCGTCATCGCCATTGC
+TCCCCAAATACAAAACCAATTTCAGCCAGTGCCTCGTCCATTTTTTCGATGAACTCCGGCACGATCTCGTCAAAACTCGC
+CATGTACTTTTCATCCCGCTCAATCACGACATAATGCAGGCCTTCACGCTTCATACGCGGGTCATAGTTGGCAAAGTACC
+AGGCATTTTTTCGCGTCACCCACATGCTGTACTGCACCTGGGCCATGTAAGCTGACTTTATGGCCTCGAAACCACCGAGC
+CGGAACTTCATGAAATCCCGGGAGGTAAACGGGCATTTCAGTTCAAGGCCGTTGCCGTCACTGCATAAACCATCGGGAGA
+GCAGGCGGTACGCATACTTTCGTCGCGATAGATGATCGGGGATTCAGTAACATTCACGCCGGAAGTGAATTCAAACAGGG
+TTCTGGCGTCGTTCTCGTACTGTTTTCCCCAGGCCAGTGCTTTAGCGTTAACTTCCGGAGCCACACCGGTGCAAACCTCA
+GCAAGCAGGGTGTGGAAGTAGGACATTTTCATGTCAGGCCACTTCTTTCCGGAGCGGGGTTTTGCTATCACGTTGTGAAC
+TTCTGAAGCGGTGATGACGCCGAGCCGTAATTTGTGCCACGCATCATCCCCCTGTTCGACAGCTCTCACATCGATCCCGG
+TACGCTGCAGGATAATGTCCGGTGTCATGCTGCCACCTTCTGCTCTGCGGCTTTCTGTTTCAGGAATCCAAGAGCTTTTA
+CTGCTTCGGCCTGTGTCAGTTCTGACGATGCACGAATGTCGCGGCGAAATATCTGGGAACAGAGCGGCAATAAGTCGTCA
+TCCCATGTTTTATCCAGGGCGATCAGCAGAGTGTTAATCTCCTGCATGGTTTCATCGTTAACCGGAGTGATGTCGCGTTC
+CGGCTGACGTTCTGCAGTGTATGCAGTATTTTCGACAATGCGCTCGGCTTCATCCTTGTCATAGATACCAGCAAATCCGA
+AGGCCAGACGGGCACACTGAATCATGGCTTTATGACGTAACATCCGTTTGGGATGCGACTGCCACGGCCCCGTGATTTCT
+CTGCCTTCGCGAGTTTTGAATGGTTCGCGGCGGCATTCATCCATCCATTCGGTAACGCAGATCGGATGATTACGGTCCTT
+GCGGTAAATCCGGCATGTACAGGATTCATTGTCCTGCTCAAAGTCCATGCCATCAAACTGCTGGTTTTCATTGATGATGC
+GGGACCAGCCATCAACGCCCACCACCGGAACGATGCCATTCTGCTTATCAGGAAAGGCGTAAATTTCTTTCGTCCACGGA
+TTAAGGCCGTACTGGTTGGCAACGATCAGTAATGCGATGAACTGCGCATCGCTGGCATCACCTTTAAATGCCGTCTGGCG
+AAGAGTGGTGATCAGTTCCTGTGGGTCGACAGAATCCATGCCGACACGTTCAGCCAGCTTCCCAGCCAGCGTTGCGAGTG
+CAGTACTCATTCGTTTTATACCTCTGAATCAATATCAACCTGGTGGTGAGCAATGGTTTCAACCATGTACCGGATGTGTT
+CTGCCATGCGCTCCTGAAACTCAACATCGTCATCAAACGCACGGGTAATGGATTTTTTGCTGGCCCCGTGGCGTTGCAAA
+TGATCGATGCATAGCGATTCAAACAGGTGCTGGGGCAGGCCTTTTTCCATGTCGTCTGCCAGTTCTGCCTCTTTCTCTTC
+ACGGGCGAGCTGCTGGTAGTGACGCGCCCAGCTCTGAGCCTCAAGACGATCCTGAATGTAATAAGCGTTCATGGCTGAAC
+TCCTGAAATAGCTGTGAAAATATCGCCCGCGAAATGCCGGGCTGATTAGGAAAACAGGAAAGGGGGTTAGTGAATGCTTT
+TGCTTGATCTCAGTTTCAGTATTAATATCCATTTTTTATAAGCGTCGACGGCTTCACGAAACATCTTTTCATCGCCAATA
+AAAGTGGCGATAGTGAATTTAGTCTGGATAGCCATAAGTGTTTGATCCATTCTTTGGGACTCCTGGCTGATTAAGTATGT
+CGATAAGGCGTTTCCATCCGTCACGTAATTTACGGGTGATTCGTTCAAGTAAAGATTCGGAAGGGCAGCCAGCAACAGGC
+CACCCTGCAATGGCATATTGCATGGTGTGCTCCTTATTTATACATAACGAAAAACGCCTCGAGTGAAGCGTTATTGGTAT
+GCGGTAAAACCGCACTCAGGCGGCCTTGATAGTCATATCATCTGAATCAAATATTCCTGATGTATCGATATCGGTAATTC
+TTATTCCTTCGCTACCATCCATTGGAGGCCATCCTTCCTGACCATTTCCATCATTCCAGTCGAACTCACACACAACACCA
+TATGCATTTAAGTCGCTTGAAATTGCTATAAGCAGAGCATGTTGCGCCAGCATGATTAATACAGCATTTAATACAGAGCC
+GTGTTTATTGAGTCGGTATTCAGAGTCTGACCAGAAATTATTAATCTGGTGAAGTTTTTCCTCTGTCATTACGTCATGGT
+CGATTTCAATTTCTATTGATGCTTTCCAGTCGTAATCAATGATGTATTTTTTGATGTTTGACATCTGTTCATATCCTCAC
+AGATAAAAAATCGCCCTCACACTGGAGGGCAAAGAAGATTTCCAATAATCAGAACAAGTCGGCTCCTGTTTAGTTACGAG
+CGACATTGCTCCGTGTATTCACTCGTTGGAATGAATACACAGTGCAGTGTTTATTCTGTTATTTATGCCAAAAATAAAGG
+CCACTATCAGGCAGCTTTGTTGTTCTGTTTACCAAGTTCTCTGGCAATCATTGCCGTCGTTCGTATTGCCCATTTATCGA
+CATATTTCCCATCTTCCATTACAGGAAACATTTCTTCAGGCTTAACCATGCATTCCGATTGCAGCTTGCATCCATTGCAT
+CGCTTGAATTGTCCACACCATTGATTTTTATCAATAGTCGTAGTCATACGGATAGTCCTGGTATTGTTCCATCACATCCT
+GAGGATGCTCTTCGAACTCTTCAAATTCTTCTTCCATATATCACCTTAAATAGTGGATTGCGGTAGTAAAGATTGTGCCT
+GTCTTTTAACCACATCAGGCTCGGTGGTTCTCGTGTACCCCTACAGCGAGAAATCGGATAAACTATTACAACCCCTACAG
+TTTGATGAGTATAGAAATGGATCCACTCGTTATTCTCGGACGAGTGTTCAGTAATGAACCTCTGGAGAGAACCATGTATA
+TGATCGTTATCTGGGTTGGACTTCTGCTTTTAAGCCCAGATAACTGGCCTGAATATGTTAATGAGAGAATCGGTATTCCT
+CATGTGTGGCATGTTTTCGTCTTTGCTCTTGCATTTTCGCTAGCAATTAATGTGCATCGATTATCAGCTATTGCCAGCGC
+CAGATATAAGCGATTTAAGCTAAGAAAACGCATTAAGATGCAAAACGATAAAGTGCGATCAGTAATTCAAAACCTTACAG
+AAGAGCAATCTATGGTTTTGTGCGCAGCCCTTAATGAAGGCAGGAAGTATGTGGTTACATCAAAACAATTCCCATACATT
+AGTGAGTTGATTGAGCTTGGTGTGTTGAACAAAACTTTTTCCCGATGGAATGGAAAGCATATATTATTCCCTATTGAGGA
+TATTTACTGGACTGAATTAGTTGCCAGCTATGATCCATATAATATTGAGATAAAGCCAAGGCCAATATCTAAGTAACTAG
+ATAAGAGGAATCGATTTTCCCTTAATTTTCTGGCGTCCACTGCATGTTATGCCGCGTTCGCCAGGCTTGCTGTACCATGT
+GCGCTGATTCTTGCGCTCAATACGTTGCAGGTTGCTTTCAATCTGTTTGTGGTATTCAGCCAGCACTGTAAGGTCTATCG
+GATTTAGTGCGCTTTCTACTCGTGATTTCGGTTTGCGATTCAGCGAGAGAATAGGGCGGTTAACTGGTTTTGCGCTTACC
+CCAACCAACAGGGGATTTGCTGCTTTCCATTGAGCCTGTTTCTCTGCGCGACGTTCGCGGCGGCGTGTTTGTGCATCCAT
+CTGGATTCTCCTGTCAGTTAGCTTTGGTGGTGTGTGGCAGTTGTAGTCCTGAACGAAAACCCCCCGCGATTGGCACATTG
+GCAGCTAATCCGGAATCGCACTTACGGCCAATGCTTCGTTTCGTATCACACACCCCAAAGCCTTCTGCTTTGAATGCTGC
+CCTTCTTCAGGGCTTAATTTTTAAGAGCGTCACCTTCATGGTGGTCAGTGCGTCCTGCTGATGTGCTCAGTATCACCGCC
+AGTGGTATTTATGTCAACACCGCCAGAGATAATTTATCACCGCAGATGGTTATCTGTATGTTTTTTATATGAATTTATTT
+TTTGCAGGGGGGCATTGTTTGGTAGGTGAGAGATCTGAATTGCTATGTTTAGTGAGTTGTATCTATTTATTTTTCAATAA
+ATACAATTGGTTATGTGTTTTGGGGGCGATCGTGAGGCAAAGAAAACCCGGCGCTGAGGCCGGGTTATTCTTGTTCTCTG
+GTCAAATTATATAGTTGGAAAACAAGGATGCATATATGAATGAACGATGCAGAGGCAATGCCGATGGCGATAGTGGGTAT
+CATGTAGCCGCTTATGCTGGAAAGAAGCAATAACCCGCAGAAAAACAAAGCTCCAAGCTCAACAAAACTAAGGGCATAGA
+CAATAACTACCGATGTCATATACCCATACTCTCTAATCTTGGCCAGTCGGCGCGTTCTGCTTCCGATTAGAAACGTCAAG
+GCAGCAATCAGGATTGCAATCATGGTTCCTGCATATGATGACAATGTCGCCCCAAGACCATCTCTATGAGCTGAAAAAGA
+AACACCAGGAATGTAGTGGCGGAAAAGGAGATAGCAAATGCTTACGATAACGTAAGGAATTATTACTATGTAAACACCAG
+GCATGATTCTGTTCCGCATAATTACTCCTGATAATTAATCCTTAACTTTGCCCACCTGCCTTTTAAAACATTCCAGTATA
+TCACTTTTCATTCTTGCGTAGCAATATGCCATCTCTTCAGCTATCTCAGCATTGGTGACCTTGTTCAGAGGCGCTGAGAG
+ATGGCCTTTTTCTGATAGATAATGTTCTGTTAAAATATCTCCGGCCTCATCTTTTGCCCGCAGGCTAATGTCTGAAAATT
+GAGGTGACGGGTTAAAAATAATATCCTTGGCAACCTTTTTTATATCCCTTTTAAATTTTGGCTTAATGACTATATCCAAT
+GAGTCAAAAAGCTCCCCTTCAATATCTGTTGCCCCTAAGACCTTTAATATATCGCCAAATACAGGTAGCTTGGCTTCTAC
+CTTCACCGTTGTTCGGCCGATGAAATGCATATGCATAACATCGTCTTTGGTGGTTCCCCTCATCAGTGGCTCTATCTGAA
+CGCGCTCTCCACTGCTTAATGACATTCCTTTCCCGATTAAAAAATCTGTCAGATCGGATGTGGTCGGCCCGAAAACAGTT
+CTGGCAAAACCAATGGTGTCGCCTTCAACAAACAAAAAAGATGGGAATCCCAATGATTCGTCATCTGCGAGGCTGTTCTT
+AATATCTTCAACTGAAGCTTTAGAGCGATTTATCTTCTGAACCAGACTCTTGTCATTTGTTTTGGTAAAGAGAAAAGTTT
+TTCCATCGATTTTATGAATATACAAATAATTGGAGCCAACCTGCAGGTGATGATTATCAGCCAGCAGAGAATTAAGGAAA
+ACAGACAGGTTTATTGAGCGCTTATCTTTCCCTTTATTTTTGCTGCGGTAAGTCGCATAAAAACCATTCTTCATAATTCA
+ATCCATTTACTATGTTATGTTCTGAGGGGAGTGAAAATTCCCCTAATTCGATGAAGATTCTTGCTCAATTGTTATCAGCT
+ATGCGCCGACCAGAACACCTTGCCGATCAGCCAAACGTCTCTTCAGGCCACTGACTAGCGATAACTTTCCCCACAACGGA
+ACAACTCTCATTGCATGGGATCATTGGGTACTGTGGGTTTAGTGGTTGTAAAAACACCTGACCGCTATCCCTGATCAGTT
+TCTTGAAGGTAAACTCATCACCCCCAAGTCTGGCTATGCAGAAATCACCTGGCTCAACAGCCTGCTCAGGGTCAACGAGA
+ATTAACATTCCGTCAGGAAAGCTTGGCTTGGAGCCTGTTGGTGCGGTCATGGAATTACCTTCAACCTCAAGCCAGAATGC
+AGAATCACTGGCTTTTTTGGTTGTGCTTACCCATCTCTCCGCATCACCTTTGGTAAAGGTTCTAAGCTTAGGTGAGAACA
+TCCCTGCCTGAACATGAGAAAAAACAGGGTACTCATACTCACTTCTAAGTGACGGCTGCATACTAACCGCTTCATACATC
+TCGTAGATTTCTCTGGCGATTGAAGGGCTAAATTCTTCAACGCTAACTTTGAGAATTTTTGTAAGCAATGCGGCGTTATA
+AGCATTTAATGCATTGATGCCATTAAATAAAGCACCAACGCCTGACTGCCCCATCCCCATCTTGTCTGCGACAGATTCCT
+GGGATAAGCCAAGTTCATTTTTCTTTTTTTCATAAATTGCTTTAAGGCGACGTGCGTCCTCAAGCTGCTCTTGTGTTAAT
+GGTTTCTTTTTTGTGCTCATACGTTAAATCTATCACCGCAAGGGATAAATATCTAACACCGTGCGTGTTGACTATTTTAC
+CTCTGGCGGTGATAATGGTTGCATGTACTAAGGAGGTTGTATGGAACAACGCATAACCCTGAAAGATTATGCAATGCGCT
+TTGGGCAAACCAAGACAGCTAAAGATCTCGGCGTATATCAAAGCGCGATCAACAAGGCCATTCATGCAGGCCGAAAGATT
+TTTTTAACTATAAACGCTGATGGAAGCGTTTATGCGGAAGAGGTAAAGCCCTTCCCGAGTAACAAAAAAACAACAGCATA
+AATAACCCCGCTCTTACACATTCCAGCCCTGAAAAAGGGCATCAAATTAAACCACACCTATGGTGTATGCATTTATTTGC
+ATACATTCAATCAATTGTTATCTAAGGAAATACTTACATATGGTTCGTGCAAACAAACGCAACGAGGCTCTACGAATCGA
+GAGTGCGTTGCTTAACAAAATCGCAATGCTTGGAACTGAGAAGACAGCGGAAGCTGTGGGCGTTGATAAGTCGCAGATCA
+GCAGGTGGAAGAGGGACTGGATTCCAAAGTTCTCAATGCTGCTTGCTGTTCTTGAATGGGGGGTCGTTGACGACGACATG
+GCTCGATTGGCGCGACAAGTTGCTGCGATTCTCACCAATAAAAAACGCCCGGCGGCAACCGAGCGTTCTGAACAAATCCA
+GATGGAGTTCTGAGGTCATTACTGGATCTATCAACAGGAGTCATTATGACAAATACAGCAAAAATACTCAACTTCGGCAG
+AGGTAACTTTGCCGGACAGGAGCGTAATGTGGCAGATCTCGATGATGGTTACGCCAGACTATCAAATATGCTGCTTGAGG
+CTTATTCGGGCGCAGATCTGACCAAGCGACAGTTTAAAGTGCTGCTTGCCATTCTGCGTAAAACCTATGGGTGGAATAAA
+CCAATGGACAGAATCACCGATTCTCAACTTAGCGAGATTACAAAGTTACCTGTCAAACGGTGCAATGAAGCCAAGTTAGA
+ACTCGTCAGAATGAATATTATCAAGCAGCAAGGCGGCATGTTTGGACCAAATAAAAACATCTCAGAATGGTGCATCCCTC
+AAAACGAGGGAAAATCCCCTAAAACGAGGGATAAAACATCCCTCAAATTGGGGGATTGCTATCCCTCAAAACAGGGGGAC
+ACAAAAGACACTATTACAAAAGAAAAAAGAAAAGATTATTCGTCAGAGAATTCTGGCGAATCCTCTGACCAGCCAGAAAA
+CGACCTTTCTGTGGTGAAACCGGATGCTGCAATTCAGAGCGGCAGCAAGTGGGGGACAGCAGAAGACCTGACCGCCGCAG
+AGTGGATGTTTGACATGGTGAAGACTATCGCACCATCAGCCAGAAAACCGAATTTTGCTGGGTGGGCTAACGATATCCGC
+CTGATGCGTGAACGTGACGGACGTAACCACCGCGACATGTGTGTGCTGTTCCGCTGGGCATGCCAGGACAACTTCTGGTC
+CGGTAACGTGCTGAGCCCGGCCAAACTCCGCGATAAGTGGACCCAACTCGAAATCAACCGTAACAAGCAACAGGCAGGCG
+TGACAGCCAGCAAACCAAAACTCGACCTGACAAACACAGACTGGATTTACGGGGTGGATCTATGAAAAACATCGCCGCAC
+AGATGGTTAACTTTGACCGTGAGCAGATGCGTCGGATCGCCAACAACATGCCGGAACAGTACGACGAAAAGCCGCAGGTA
+CAGCAGGTAGCGCAGATCATCAACGGTGTGTTCAGCCAGTTACTGGCAACTTTCCCGGCGAGCCTGGCTAACCGTGACCA
+GAACGAAGTGAACGAAATCCGTCGCCAGTGGGTTCTGGCTTTTCGGGAAAACGGGATCACCACGATGGAACAGGTTAACG
+CAGGAATGCGCGTAGCCCGTCGGCAGAATCGACCATTTCTGCCATCACCCGGGCAGTTTGTTGCATGGTGCCGGGAAGAA
+GCATCCGTTACCGCCGGACTGCCAAACGTCAGCGAGCTGGTTGATATGGTTTACGAGTATTGCCGGAAGCGAGGCCTGTA
+TCCGGATGCGGAGTCTTATCCGTGGAAATCAAACGCGCACTACTGGCTGGTTACCAACCTGTATCAGAACATGCGGGCCA
+ATGCGCTTACTGATGCGGAATTACGCCGTAAGGCCGCAGATGAGCTTGTCCATATGACTGCGAGAATTAACCGTGGTGAG
+GCGATCCCTGAACCAGTAAAACAACTTCCTGTCATGGGCGGTAGACCTCTAAATCGTGCACAGGCTCTGGCGAAGATCGC
+AGAAATCAAAGCTAAGTTCGGACTGAAAGGAGCAAGTGTATGACGGGCAAAGAGGCAATTATTCATTACCTGGGGACGCA
+TAATAGCTTCTGTGCGCCGGACGTTGCCGCGCTAACAGGCGCAACAGTAACCAGCATAAATCAGGCCGCGGCTAAAATGG
+CACGGGCAGGTCTTCTGGTTATCGAAGGTAAGGTCTGGCGAACGGTGTATTACCGGTTTGCTACCAGGGAAGAACGGGAA
+GGAAAGATGAGCACGAACCTGGTTTTTAAGGAGTGTCGCCAGAGTGCCGCGATGAAACGGGTATTGGCGGTATATGGAGT
+TAAAAGATGACCATCTACATTACTGAGCTAATAACAGGCCTGCTGGTAATCGCAGGCCTTTTTATTTGGGGGAGAGGGAA
+GTCATGAAAAAACTAACCTTTGAAATTCGATCTCCAGCACATCAGCAAAACGCTATTCACGCAGTACAGCAAATCCTTCC
+AGACCCAACCAAACCAATCGTAGTAACCATTCAGGAACGCAACCGCAGCTTAGACCAAAACAGGAAGCTATGGGCCTGCT
+TAGGTGACGTCTCTCGTCAGGTTGAATGGCATGGTCGCTGGCTGGATGCAGAAAGCTGGAAGTGTGTGTTTACCGCAGCA
+TTAAAGCAGCAGGATGTTGTTCCTAACCTTGCCGGGAATGGCTTTGTGGTAATAGGCCAGTCAACCAGCAGGATGCGTGT
+AGGCGAATTTGCGGAGCTATTAGAGCTTATACAGGCATTCGGTACAGAGCGTGGCGTTAAGTGGTCAGACGAAGCGAGAC
+TGGCTCTGGAGTGGAAAGCGAGATGGGGAGACAGGGCTGCATGATAAATGTCGTTAGTTTCTCCGGTGGCAGGACGTCAG
+CATATTTGCTCTGGCTAATGGAGCAAAAGCGACGGGCAGGTAAAGACGTGCATTACGTTTTCATGGATACAGGTTGTGAA
+CATCCAATGACATATCGGTTTGTCAGGGAAGTTGTGAAGTTCTGGGATATACCGCTCACCGTATTGCAGGTTGATATCAA
+CCCGGAGCTTGGACAGCCAAATGGTTATACGGTATGGGAACCAAAGGATATTCAGACGCGAATGCCTGTTCTGAAGCCAT
+TTATCGATATGGTAAAGAAATATGGCACTCCATACGTCGGCGGCGCGTTCTGCACTGACAGATTAAAACTCGTTCCCTTC
+ACCAAATACTGTGATGACCATTTCGGGCGAGGGAATTACACCACGTGGATTGGCATCAGAGCTGATGAACCGAAGCGGCT
+AAAGCCAAAGCCTGGAATCAGATATCTTGCTGAACTGTCAGACTTTGAGAAGGAAGATATCCTCGCATGGTGGAAGCAAC
+AACCATTCGATTTGCAAATACCGGAACATCTCGGTAACTGCATATTCTGCATTAAAAAATCAACGCAAAAAATCGGACTT
+GCCTGCAAAGATGAGGAGGGATTGCAGCGTGTTTTTAATGAGGTCATCACGGGATCCCATGTGCGTGACGGACATCGGGA
+AACGCCAAAGGAGATTATGTACCGAGGAAGAATGTCGCTGGACGGTATCGCGAAAATGTATTCAGAAAATGATTATCAAG
+CCCTGTATCAGGACATGGTACGAGCTAAAAGATTCGATACCGGCTCTTGTTCTGAGTCATGCGAAATATTTGGAGGGCAG
+CTTGATTTCGACTTCGGGAGGGAAGCTGCATGATGCGATGTTATCGGTGCGGTGAATGCAAAGAAGATAACCGCTTCCGA
+CCAAATCAACCTTACTGGAATCGATGGTGTCTCCGGTGTGAAAGAACACCAACAGGGGTGTTACCACTACCGCAGGAAAA
+GGAGGACGTGTGGCGAGACAGCGACGAAGTATCACCGACATAATCTGCGAAAACTGCAAATACCTTCCAACGAAACGCAC
+CAGAAATAAACCCAAGCCAATCCCAAAAGAATCTGACGTAAAAACCTTCAACTACACGGCTCACCTGTGGGATATCCGGT
+GGCTAAGACGTCGTGCGAGGAAAACAAGGTGATTGACCAAAATCGAAGTTACGAACAAGAAAGCGTCGAGCGAGCTTTAA
+CGTGCGCTAACTGCGGTCAGAAGCTGCATGTGCTGGAAGTTCACGTGTGTGAGCACTGCTGCGCAGAACTGATGAGCGAT
+CCGAATAGCTCGATGCACGAGGAAGAAGATGATGGCTAAACCAGCGCGAAGACGATGTAAAAACGATGAATGCCGGGAAT
+GGTTTCACCCTGCATTCGCTAATCAGTGGTGGTGCTCTCCAGAGTGTGGAACCAAGATAGCACTCGAACGACGAAGTAAA
+GAACGCGAAAAAGCGGAAAAAGCAGCAGAGAAGAAACGACGACGAGAGGAGCAGAAACAGAAAGATAAACTTAAGATTCG
+AAAACTCGCCTTAAAGCCCCGCAGTTACTGGATTAAACAAGCCCAACAAGCCGTAAACGCCTTCATCAGAGAAAGAGACC
+GCGACTTACCATGTATCTCGTGCGGAACGCTCACGTCTGCTCAGTGGGATGCCGGACATTACCGGACAACTGCTGCGGCA
+CCTCAACTCCGATTTAATGAACGCAATATTCACAAGCAATGCGTGGTGTGCAACCAGCACAAAAGCGGAAATCTCGTTCC
+GTATCGCGTCGAACTGATTAGCCGCATCGGGCAGGAAGCAGTAGACGAAATCGAATCAAACCATAACCGCCATCGCTGGA
+CTATCGAAGAGTGCAAGGCGATCAAGGCAGAGTACCAACAGAAACTCAAAGACCTGCGAAATAGCAGAAGTGAGGCCGCA
+TGACGTTCTCAGTAAAAACCATTCCAGACATGCTCGTTGAAACATACGGAAATCAGACAGAAGTAGCACGCAGACTGAAA
+TGTAGTCGCGGTACGGTCAGAAAATACGTTGATGATAAAGACGGGAAAATGCACGCCATCGTCAACGACGTTCTCATGGT
+TCATCGCGGATGGAGTGAAAGAGATGCGCTATTACGAAAAAATTGATGGCAGCAAATACCGAAATATTTGGGTAGTTGGC
+GATCTGCACGGATGCTACACGAACCTGATGAACAAACTGGATACGATTGGATTCGACAACAAAAAAGACCTGCTTATCTC
+GGTGGGCGATTTGGTTGATCGTGGTGCAGAGAACGTTGAATGCCTGGAATTAATCACATTCCCCTGGTTCAGAGCTGTAC
+GTGGAAACCATGAGCAAATGATGATTGATGGCTTATCAGAGCGTGGAAACGTTAATCACTGGCTGCTTAATGGCGGTGGC
+TGGTTCTTTAATCTCGATTACGACAAAGAAATTCTGGCTAAAGCTCTTGCCCATAAAGCAGATGAACTTCCGTTAATCAT
+CGAACTGGTGAGCAAAGATAAAAAATATGTTATCTGCCACGCCGATTATCCCTTTGACGAATACGAGTTTGGAAAGCCAG
+TTGATCATCAGCAGGTAATCTGGAACCGCGAACGAATCAGCAACTCACAAAACGGGATCGTGAAAGAAATCAAAGGCGCG
+GACACGTTCATCTTTGGTCATACGCCAGCAGTGAAACCACTCAAGTTTGCCAACCAAATGTATATCGATACCGGCGCAGT
+GTTCTGCGGAAACCTAACATTGATTCAGGTACAGGGAGAAGGCGCATGAGACTCGAAAGCGTAGCTAAATTTCATTCGCC
+AAAAAGCCCGATGATGAGCGACTCACCACGGGCCACGGCTTCTGACTCTCTTTCCGGTACTGATGTGATGGCTGCTATGG
+GGATGGCGCAATCACAAGCCGGATTCGGTATGGCTGCATTCTGCGGTAAGCACGAACTCAGCCAGAACGACAAACAAAAG
+GCTATCAACTATCTGATGCAATTTGCACACAAGGTATCGGGGAAATACCGTGGTGTGGCAAAGCTTGAAGGAAATACTAA
+GGCAAAGGTACTGCAAGTGCTCGCAACATTCGCTTATGCGGATTATTGCCGTAGTGCCGCGACGCCGGGGGCAAGATGCA
+GAGATTGCCATGGTACAGGCCGTGCGGTTGATATTGCCAAAACAGAGCTGTGGGGGAGAGTTGTCGAGAAAGAGTGCGGA
+AGATGCAAAGGCGTCGGCTATTCAAGGATGCCAGCAAGCGCAGCATATCGCGCTGTGACGATGCTAATCCCAAACCTTAC
+CCAACCCACCTGGTCACGCACTGTTAAGCCGCTGTATGACGCTCTGGTGGTGCAATGCCACAAAGAAGAGTCAATCGCAG
+ACAACATTTTGAATGCGGTCACACGTTAGCAGCATGATTGCCACGGATGGCAACATATTAACGGCATGATATTGACTTAT
+TGAATAAAATTGGGTAAATTTGACTCAACGATGGGTTAATTCGCTCGTTGTGGTAGTGAGATGAAAAGAGGCGGCGCTTA
+CTACCGATTCCGCCTAGTTGGTCACTTCGACGTATCGTCTGGAACTCCAACCATCGCAGGCAGAGAGGTCTGCAAAATGC
+AATCCCGAAACAGTTCGCAGGTAATAGTTAGAGCCTGCATAACGGTTTCGGGATTTTTTATATCTGCACAACAGGTAAGA
+GCATTGAGTCGATAATCGTGAAGAGTCGGCGAGCCTGGTTAGCCAGTGCTCTTTCCGTTGTGCTGAATTAAGCGAATACC
+GGAAGCAGAACCGGATCACCAAATGCGTACAGGCGTCATCGCCGCCCAGCAACAGCACAACCCAAACTGAGCCGTAGCCA
+CTGTCTGTCCTGAATTCATTAGTAATAGTTACGCTGCGGCCTTTTACACATGACCTTCGTGAAAGCGGGTGGCAGGAGGT
+CGCGCTAACAACCTCCTGCCGTTTTGCCCGTGCATATCGGTCACGAACAAATCTGATTACTAAACACAGTAGCCTGGATT
+TGTTCTATCAGTAATCGACCTTATTCCTAATTAAATAGAGCAAATCCCCTTATTGGGGGTAAGACATGAAGATGCCAGAA
+AAACATGACCTGTTGGCCGCCATTCTCGCGGCAAAGGAACAAGGCATCGGGGCAATCCTTGCGTTTGCAATGGCGTACCT
+TCGCGGCAGATATAATGGCGGTGCGTTTACAAAAACAGTAATCGACGCAACGATGTGCGCCATTATCGCCTAGTTCATTC
+GTGACCTTCTCGACTTCGCCGGACTAAGTAGCAATCTCGCTTATATAACGAGCGTGTTTATCGGCTACATCGGTACTGAC
+TCGATTGGTTCGCTTATCAAACGCTTCGCTGCTAAAAAAGCCGGAGTAGAAGATGGTAGAAATCAATAATCAACGTAAGG
+CGTTCCTCGATATGCTGGCGTGGTCGGAGGGAACTGATAACGGACGTCAGAAAACCAGAAATCATGGTTATGACGTCATT
+GTAGGCGGAGAGCTATTTACTGATTACTCCGATCACCCTCGCAAACTTGTCACGCTAAACCCAAAACTCAAATCAACAGG
+CGCCGGACGCTACCAGCTTCTTTCCCGTTGGTGGGATGCCTACCGCAAGCAGCTTGGCCTGAAAGACTTCTCTCCGAAAA
+GTCAGGACGCTGTGGCATTGCAGCAGATTAAGGAGCGTGGCGCTTTACCTATGATTGATCGTGGTGATATCCGTCAGGCA
+ATCGACCGTTGCAGCAATATCTGGGCTTCACTGCCGGGCGCTGGTTATGGTCAGTTCGAGCATAAGGCTGACAGCCTGAT
+TGCAAAATTCAAAGAAGCGGGCGGAACGGTCAGAGAGATTGATGTATGAGCAGAGTCACCGCGATTATCTCCGCTCTGGT
+TATCTGCATCATCGTCTGCCTGTCATGGGCTGTTAATCATTACCGTGATAACGCCATTACCTACAAAGCCCAGCGCGACA
+AAAATGCCAGAGAACTGAAGCTGGCGAACGCGGCAATTACTGACATGCAGATGCGTCAGCGTGATGTTGCTGCGCTCGAT
+GCAAAATACACGAAGGAGTTAGCTGATGCTAAAGCTGAAAATGATGCTCTGCGTGATGATGTTGCCGCTGGTCGTCGTCG
+GTTGCACATCAAAGCAGTCTGTCAGTCAGTGCGTGAAGCCACCACCGCCTCCGGCGTGGATAATGCAGCCTCCCCCCGAC
+TGGCAGACACCGCTGAACGGGATTATTTCACCCTCAGAGAGAGGCTGATCACTATGCAAAAACAACTGGAAGGAACCCAG
+AAGTATATTAATGAGCAGTGCAGATAGAGTTGCCCATATCGATGGGCAACTCATGCAATTATTGTGAGCAATACACACGC
+GCTTCCAGCGGAGTATAAATGCCTAAAGTAATAAAACCGAGCAATCCATTTACGAATGTTTGCTGGGTTTCTGTTTTAAC
+AACATTTTCTGCGCCGCCACAAATTTTGGCTGCATCGACAGTTTTCTTCTGCCCAATTCCAGAAACGAAGAAATGATGGG
+TGATGGTTTCCTTTGGTGCTACTGCTGCCGGTTTGTTTTGAACAGTAAACGTCTGTTGAGCACATCCTGTAATAAGCAGG
+GCCAGCGCAGTAGCGAGTAGCATTTTTTTCATGGTGTTATTCCCGATGCTTTTTGAAGTTCGCAGAATCGTATGTGTAGA
+AAATTAAACAAACCCTAAACAATGAGTTGAAATTTCATATTGTTAATATTTATTAATGTATGTCAGGTGCGATGAATCGT
+CATTGTATTCCCGGATTAACTATGTCCACAGCCCTGACGGGGAACTTCTCTGCGGGAGTGTCCGGGAATAATTAAAACGA
+TGCACACAGGGTTTAGCGCGTACACGTATTGCATTATGCCAACGCCCCGGTGCTGACACGGAAGAAACCGGACGTTATGA
+TTTAGCGTGGAAAGATTTGTGTAGTGTTCTGAATGCTCTCAGTAAATAGTAATGAATTATCAAAGGTATAGTAATATCTT
+TTATGTTCATGGATATTTGTAACCCATCGGAAAACTCCTGCTTTAGCAAGATTTTCCCTGTATTGCTGAAATGTGATTTC
+TCTTGATTTCAACCTATCATAGGACGTTTCTATAAGATGCGTGTTTCTTGAGAATTTAACATTTACAACCTTTTTAAGTC
+CTTTTATTAACACGGTGTTATCGTTTTCTAACACGATGTGAATATTATCTGTGGCTAGATAGTAAATATAATGTGAGACG
+TTGTGACGTTTTAGTTCAGAATAAAACAATTCACAGTCTAAATCTTTTCGCACTTGATCGAATATTTCTTTAAAAATGGC
+AACCTGAGCCATTGGTAAAACCTTCCATGTGATACGAGGGCGCGTAGTTTGCATTATCGTTTTTATCGTTTCAATCTGGT
+CTGACCTCCTTGTGTTTTGTTGATGATTTATGTCAAATATTAGGAATGTTTTCACTTAATAGTATTGGTTGCGTAACAAA
+GTGCGGTCCTGCTGGCATTCTGGAGGGAAATACAACCGACAGATGTATGTAAGGCCAACGTGCTCAAATCTTCATACAGA
+AAGATTTGAAGTAATATTTTAACCGCTAGATGAAGAGCAAGCGCATGGAGCGACAAAATGAATAAAGAACAATCTGCTGA
+TGATCCCTCCGTGGATCTGATTCGTGTAAAAAATATGCTTAATAGCACCATTTCTATGAGTTACCCTGATGTTGTAATTG
+CATGTATAGAACATAAGGTGTCTCTGGAAGCATTCAGAGCAATTGAGGCAGCGTTGGTGAAGCACGATAATAATATGAAG
+GATTATTCCCTGGTGGTTGACTGATCACCATAACTGCTAATCATTCAAACTATTTAGTCTGTGACAGAGCCAACACGCAG
+TCTGTCACTGTCAGGAAAGTGGTAAAACTGCAACTCAATTACTGCAATGCCCTCGTAATTAAGTGAATTTACAATATCGT
+CCTGTTCGGAGGGAAGAACGCGGGATGTTCATTCTTCATCACTTTTAATTGATGTATATGCTCTCTTTTCTGACGTTAGT
+CTCCGACGGCAGGCTTCAATGACCCAGGCTGAGAAATTCCCGGACCCTTTTTGCTCAAGAGCGATGTTAATTTGTTCAAT
+CATTTGGTTAGGAAAGCGGATGTTGCGGGTTGTTGTTCTGCGGGTTCTGTTCTTCGTTGACATGAGGTTGCCCCGTATTC
+AGTGTCGCTGATTTGTATTGTCTGAAGTTGTTTTTACGTTAAGTTGATGCAGATCAATTAATACGATACCTGCGTCATAA
+TTGATTATTTGACGTGGTTTGATGGCCTCCACGCACGTTGTGATATGTAGATGATAATCATTATCACTTTACGGGTCCTT
+TCCGGTGATCCGACAGGTTACG
diff --git a/pbcore/data/lambdaNEB.fa.fai b/pbcore/data/lambdaNEB.fa.fai
new file mode 100644
index 0000000..064af36
--- /dev/null
+++ b/pbcore/data/lambdaNEB.fa.fai
@@ -0,0 +1 @@
+lambda_NEB3011	48502	16	80	81
diff --git a/pbcore/data/m110818_075520_42141_c100129202555500000315043109121112_s1_p0.bas.h5 b/pbcore/data/m110818_075520_42141_c100129202555500000315043109121112_s1_p0.bas.h5
new file mode 100644
index 0000000..34df9ed
Binary files /dev/null and b/pbcore/data/m110818_075520_42141_c100129202555500000315043109121112_s1_p0.bas.h5 differ
diff --git a/pbcore/data/m110818_075520_42141_c100129202555500000315043109121112_s2_p0.bas.h5 b/pbcore/data/m110818_075520_42141_c100129202555500000315043109121112_s2_p0.bas.h5
new file mode 100644
index 0000000..5e254dc
Binary files /dev/null and b/pbcore/data/m110818_075520_42141_c100129202555500000315043109121112_s2_p0.bas.h5 differ
diff --git a/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.1.bax.h5 b/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.1.bax.h5
new file mode 100755
index 0000000..60eb003
Binary files /dev/null and b/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.1.bax.h5 differ
diff --git a/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.2.bax.h5 b/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.2.bax.h5
new file mode 100755
index 0000000..ebf625f
Binary files /dev/null and b/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.2.bax.h5 differ
diff --git a/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.3.bax.h5 b/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.3.bax.h5
new file mode 100755
index 0000000..0a2f3ac
Binary files /dev/null and b/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.3.bax.h5 differ
diff --git a/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.bas.h5 b/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.bas.h5
new file mode 100755
index 0000000..441e404
Binary files /dev/null and b/pbcore/data/m130522_092457_42208_c100497142550000001823078008081323_s1_p0.bas.h5 differ
diff --git a/pbcore/data/m130727_114215_42211_c100569412550000001823090301191423_s1_p0.1.ccs.h5 b/pbcore/data/m130727_114215_42211_c100569412550000001823090301191423_s1_p0.1.ccs.h5
new file mode 100755
index 0000000..3097c4d
Binary files /dev/null and b/pbcore/data/m130727_114215_42211_c100569412550000001823090301191423_s1_p0.1.ccs.h5 differ
diff --git a/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.bax.h5 b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.bax.h5
new file mode 100755
index 0000000..e8904f6
Binary files /dev/null and b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.bax.h5 differ
diff --git a/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.rgn.h5 b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.rgn.h5
new file mode 100644
index 0000000..00b0bb4
Binary files /dev/null and b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.1.rgn.h5 differ
diff --git a/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.bax.h5 b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.bax.h5
new file mode 100755
index 0000000..c603d36
Binary files /dev/null and b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.bax.h5 differ
diff --git a/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.rgn.h5 b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.rgn.h5
new file mode 100644
index 0000000..6abd850
Binary files /dev/null and b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.2.rgn.h5 differ
diff --git a/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.bax.h5 b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.bax.h5
new file mode 100755
index 0000000..3b27701
Binary files /dev/null and b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.bax.h5 differ
diff --git a/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.rgn.h5 b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.rgn.h5
new file mode 100644
index 0000000..6fa5269
Binary files /dev/null and b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.3.rgn.h5 differ
diff --git a/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.bas.h5 b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.bas.h5
new file mode 100755
index 0000000..eaf01e9
Binary files /dev/null and b/pbcore/data/m130731_192718_42129_c100564662550000001823085912221321_s1_p0.bas.h5 differ
diff --git a/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.1.bc.h5 b/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.1.bc.h5
new file mode 100644
index 0000000..479164c
Binary files /dev/null and b/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.1.bc.h5 differ
diff --git a/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.2.bc.h5 b/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.2.bc.h5
new file mode 100644
index 0000000..dcc9265
Binary files /dev/null and b/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.2.bc.h5 differ
diff --git a/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.3.bc.h5 b/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.3.bc.h5
new file mode 100644
index 0000000..3b26e19
Binary files /dev/null and b/pbcore/data/m140307_221913_42203_c100626172550000001823119008061414_s1_p0.3.bc.h5 differ
diff --git a/pbcore/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 b/pbcore/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5
new file mode 100644
index 0000000..f50c379
Binary files /dev/null and b/pbcore/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.1.bax.h5 differ
diff --git a/pbcore/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.subreads.bam b/pbcore/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.subreads.bam
new file mode 100644
index 0000000..9697285
Binary files /dev/null and b/pbcore/data/m140905_042212_sidney_c100564852550000001823085912221377_s1_X0.subreads.bam differ
diff --git a/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.1.bax.h5 b/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.1.bax.h5
new file mode 100644
index 0000000..31e8768
Binary files /dev/null and b/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.1.bax.h5 differ
diff --git a/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.2.bax.h5 b/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.2.bax.h5
new file mode 100644
index 0000000..c4b117f
Binary files /dev/null and b/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.2.bax.h5 differ
diff --git a/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.3.bax.h5 b/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.3.bax.h5
new file mode 100644
index 0000000..e0bfcfb
Binary files /dev/null and b/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.3.bax.h5 differ
diff --git a/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.bas.h5 b/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.bas.h5
new file mode 100755
index 0000000..1056612
Binary files /dev/null and b/pbcore/data/m140912_020930_00114_c100702482550000001823141103261590_s1_p0.bas.h5 differ
diff --git a/pbcore/data/variants.gff b/pbcore/data/variants.gff
new file mode 100644
index 0000000..5e15571
--- /dev/null
+++ b/pbcore/data/variants.gff
@@ -0,0 +1,11 @@
+##gff-version 3
+##pacbio-variant-version 2.1
+##date Sat Mar 22 12:16:13 2014
+##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12
+##source GenomicConsensus 0.8.0
+##source-commandline /Users/dalexander/.virtualenvs/VE/bin/variantCaller.py --algorithm=plurality -q20 -x5 pbcore/data/aligned_reads_1.cmp.h5 -r /Users/dalexander/Data/lambdaNEB.fa -o /tmp/v.gff
+##source-alignment-file /Users/dalexander/Dropbox/Sources/git/pbcore/pbcore/data/aligned_reads_1.cmp.h5
+##source-reference-file /Users/dalexander/Data/lambdaNEB.fa
+##sequence-region lambda_NEB3011 1 48502
+lambda_NEB3011	.	deletion	30890	30890	.	.	.	reference=G;variantSeq=.;frequency=2;coverage=5;confidence=25
+lambda_NEB3011	.	insertion	30924	30924	.	.	.	reference=.;variantSeq=G;frequency=2;coverage=5;confidence=25
diff --git a/pbcore/io/BarcodeH5Reader.py b/pbcore/io/BarcodeH5Reader.py
new file mode 100644
index 0000000..c6c1fad
--- /dev/null
+++ b/pbcore/io/BarcodeH5Reader.py
@@ -0,0 +1,374 @@
+#################################################################################$$
+# Copyright (c) 2011,2012, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its contributors
+#   may be used to endorse or promote products derived from this software
+#   without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#################################################################################$$
+
+import h5py as h5
+import numpy as n
+
+from pbcore.io.FofnIO import readFofn
+
+BARCODE_DELIMITER = "--"
+BC_DS_PATH        = "BarcodeCalls/best"
+BC_DS_ALL_PATH    = "BarcodeCalls/all"
+
+
+class LabeledZmw(object):
+    """A scored ZMW represents a ZMW object and its corresponding
+    barcode scores. Some fields are considered optional"""
+    def __init__(self, holeNumber, nScored, bestIdx, bestScore,
+                 secondBestIdx = -1, secondBestScore = 0,
+                 allScores = None):
+        self._holeNumber = holeNumber
+        self._nScored = nScored
+        self._bestIdx = bestIdx
+        self._bestScore = bestScore
+        self._secondBestIdx = secondBestIdx
+        self._secondBestScore = secondBestScore
+        self._allScores = allScores
+
+    def toBestRecord(self):
+        """Return a summary record suitable for storage"""
+        return (self.holeNumber, self.nScored, self.bestIdx,
+                self.bestScore, self.secondBestIdx, self.secondBestScore)
+
+    @staticmethod
+    def fromBestRecord(npRow):
+        return LabeledZmw(npRow[0], npRow[1], npRow[2],
+                          npRow[3], npRow[4], npRow[5], None)
+    @property
+    def holeNumber(self):
+        return self._holeNumber
+    @property
+    def nScored(self):
+        return self._nScored
+    @property
+    def bestIdx(self):
+        return self._bestIdx
+    @property
+    def bestScore(self):
+        return self._bestScore
+    @property
+    def averageScore(self):
+        return 0 if self.nScored <= 0 else self.bestScore/self.nScored
+    @property
+    def scoreRatio(self):
+        return 1 if self.secondBestScore == 0 or self.bestScore == 0 else \
+            self.bestScore/(1.0 * self.secondBestScore)
+    @property
+    def secondBestIdx(self):
+        return self._secondBestIdx
+    @property
+    def secondBestScore(self):
+        return self._secondBestScore
+    @property
+    def allScores(self):
+        return self._allScores
+
+    def __repr__(self):
+        return "(holeNumber = %d, nScored = %d, bestIdx = %d, bestScore = %d, averageScore = %d)" % \
+            (self.holeNumber, self.nScored, self.bestIdx, self.bestScore, self.averageScore)
+
+
+def writeBarcodeH5(labeledZmws, labeler, outFile,
+                   writeExtendedInfo = False):
+    """Write a barcode file from a list of labeled ZMWs. In addition
+    to labeledZmws, this function takes a
+    pbbarcode.BarcodeLabeler."""
+    bestScores = map(lambda z: z.toBestRecord(), labeledZmws)
+    outDta = n.vstack(bestScores)
+    outH5 = h5.File(outFile, 'a')
+
+    if BC_DS_PATH in outH5:
+        del outH5[BC_DS_PATH]
+
+    bestDS = outH5.create_dataset(BC_DS_PATH, data = outDta, dtype = "int32")
+    bestDS.attrs['movieName'] = labeler.movieName
+    bestDS.attrs['barcodes'] = n.array(labeler.barcodeLabels, dtype = h5.new_vlen(str))
+    bestDS.attrs['columnNames'] = n.array(['holeNumber', 'nAdapters', 'barcodeIdx1',
+                                           'barcodeScore1', 'barcodeIdx2', 'barcodeScore2'],
+                                          dtype = h5.new_vlen(str))
+    bestDS.attrs['scoreMode'] = labeler.scoreMode
+
+    if writeExtendedInfo:
+        # here we use the 'names' because each barcode is scored
+        # individually.
+        nBarcodes = len(labeler.barcodeNames)
+
+        def makeArray(l, v):
+            a = n.zeros(l, dtype = type(v))
+            a.fill(v)
+            return a
+
+        def makeRecord(lZmw):
+            zmws = makeArray(nBarcodes * lZmw.nScored, lZmw.holeNumber)
+            adapters = n.concatenate([makeArray(nBarcodes, i) for i in \
+                                          xrange(1, lZmw.nScored + 1)])
+            idxs = n.concatenate([range(0, nBarcodes) for i in \
+                                      xrange(0, lZmw.nScored)])
+            scores = n.concatenate(lZmw.allScores)
+            return n.transpose(n.vstack((zmws, adapters, idxs, scores)))
+
+        records = [makeRecord(lZmw) for lZmw in labeledZmws if lZmw.allScores]
+        records = n.vstack(records)
+
+        if BC_DS_ALL_PATH in outH5:
+            del outH5[BC_DS_ALL_PATH]
+        allDS = outH5.create_dataset(BC_DS_ALL_PATH, data = records, dtype = 'int32')
+        allDS.attrs['movieName'] = labeler.movieName
+        # note names versus labels.
+        allDS.attrs['barcodes'] = n.array(labeler.barcodeNames, dtype = h5.new_vlen(str))
+        allDS.attrs['columnNames'] = n.array(['holeNumber', 'adapter', 'barcodeIdx', 'score'],
+                                             dtype = h5.new_vlen(str))
+    # close the file at the very end.
+    outH5.close()
+
+
+class BarcodeH5Reader(object):
+    def __init__(self, fname):
+
+        try:
+            self.h5File = h5.File(fname, "r")
+        except IOError:
+            raise IOError("Invalid or nonexistent bc file %s" % fname)
+
+        self.bestDS = self.h5File[BC_DS_PATH]
+
+        self._scoreMode = self.bestDS.attrs['scoreMode']
+        self._barcodeLabels = self.bestDS.attrs['barcodes']
+        self._movieName = self.bestDS.attrs['movieName']
+        # zmw => LabeledZmw
+        labeledZmws = [LabeledZmw.fromBestRecord(self.bestDS[i,:]) for i in
+                       xrange(0, self.bestDS.shape[0])]
+        self.labeledZmws = dict([(lZmw.holeNumber, lZmw) for lZmw in labeledZmws])
+
+        # barcode => LabeledZmws
+        self.bcLabelToLabeledZmws = {l:[] for l in self.barcodeLabels}
+        for lZmw in self.labeledZmws.values():
+            d = self.bcLabelToLabeledZmws[self.barcodeLabels[lZmw.bestIdx]]
+            d.append(lZmw)
+
+    @property
+    def holeNumbers(self):
+        return sorted(self.labeledZmws.keys())
+    @property
+    def barcodeLabels(self):
+        return self._barcodeLabels
+    @property
+    def scoreMode(self):
+        """String specifying whether the barcodes were score symmetrically or in pairs"""
+        return self._scoreMode
+    @property
+    def movieName(self):
+        return self._movieName
+
+    def labeledZmwFromHoleNumber(self, holeNumber):
+        """Returns a LabeledZmw object from the holeNumber"""
+        try:
+            return self.labeledZmws[holeNumber]
+        except KeyError:
+            raise KeyError("holeNumber %d not labeled" % holeNumber)
+
+    def labeledZmwsFromBarcodeLabel(self, bcLabel):
+        """Returns a list of LabeledZmw objects for the particular
+        barcode label, an empty list if there are no ZMWs for this
+        barcode."""
+        return self.bcLabelToLabeledZmws[bcLabel]
+
+    def __iter__(self):
+        for key in self.holeNumbers:
+            yield self.labeledZmws[key]
+
+
+class MPBarcodeH5Reader(object):
+    def __init__(self, parts):
+        self._parts = parts
+        def rng(x):
+            return (n.min(x), n.max(x))
+        # these aren't the ranges of ZMWs, but the ranges for the
+        # scored ZMWs.
+        self._bins = map(lambda z : rng(z.holeNumbers), self._parts)
+
+    def choosePart(self, holeNumber):
+        for i,b in enumerate(self._bins):
+            if holeNumber >= b[0] and holeNumber <= b[1]:
+                return self._parts[i]
+        # Return None meaning the zmw is ouf of the range of
+        # the scored ZMWs for all parts.
+        return None
+
+    @property
+    def barcodeLabels(self):
+        return self._parts[0].barcodeLabels
+    @property
+    def scoreMode(self):
+        """String specifying whether the barcodes were score symmetrically or in pairs"""
+        return self._parts[0].scoreMode
+
+    def labeledZmwFromHoleNumber(self, holeNumber):
+        """Returns a LabeledZmw object from the holeNumber"""
+        part = self.choosePart(holeNumber)
+        if part:
+            return part.labeledZmwFromHoleNumber(holeNumber)
+        else:
+            raise KeyError("holeNumber: %d not labeled" % holeNumber)
+
+    def labeledZmwsFromBarcodeLabel(self, bcLabel):
+        lzmws = reduce(lambda x,y: x + y,
+                      map(lambda z: z.labeledZmwsFromBarcodeLabel(bcLabel),
+                          self._parts))
+        return sorted(lzmws, key=lambda z: z.holeNumber)
+
+    def __iter__(self):
+        for reader in self._parts:
+            for labeledZmw in reader:
+                yield labeledZmw
+
+    def __getitem__(self, item):
+        if (isinstance(item, int) or
+            issubclass(type(item), n.integer)):
+            return self.labeledZmwFromHoleNumber(item)
+        elif isinstance(item, str):
+            return self.labeledZmwsFromBarcodeLabel(item)
+        elif isinstance(item, slice):
+            return [ self.labeledZmwFromHoleNumber(self, item)
+                    for r in xrange(*item.indices(len(self)))]
+        elif isinstance(item, list) or isinstance(item, n.ndarray):
+            if len(item) == 0:
+                return []
+            else:
+                entryType = type(item[0])
+                if entryType == int or issubclass(entryType, n.integer):
+                    return [ self.labeledZmwFromHoleNumber(r) for r in item ]
+                elif entryType == bool or issubclass(entryType, n.bool_):
+                    return [ self.labeledZmwFromHoleNumber(r) for r in n.flatnonzero(item) ]
+        raise TypeError, "Invalid type for BasH5Reader slicing"
+
+
+class BarcodeH5Fofn(object):
+    def __init__(self, *args):
+
+        bcFilenames = []
+        for arg in args:
+            if arg.endswith(".fofn"):
+                for fn in readFofn(arg):
+                    bcFilenames.append(fn)
+            else:
+                bcFilenames.append(arg)
+
+        self._bcH5s = [BarcodeH5Reader(fname) for fname in
+                       bcFilenames]
+        self._byMovie = {}
+        for bc in self._bcH5s:
+            if bc.movieName not in self._byMovie:
+                self._byMovie[bc.movieName] = [bc]
+            else:
+                self._byMovie[bc.movieName].append(bc)
+
+        self.mpReaders = { movieName: parts[0] if len(parts) == 1 else MPBarcodeH5Reader(parts)
+                           for movieName, parts in self._byMovie.iteritems() }
+
+    @property
+    def holeNumbers(self):
+        return sorted([hn for reader in self._bcH5s
+                          for hn in reader.holeNumbers])
+    @property
+    def movieNames(self):
+        return self.mpReaders.keys()
+    @property
+    def barcodeLabels(self):
+        return self._bcH5s[0].barcodeLabels
+    @property
+    def scoreMode(self):
+        """String specifying whether the barcodes were score symmetrically or in pairs"""
+        return self._bcH5s[0].scoreMode
+
+    def labeledZmwsFromBarcodeLabel(self, item):
+        lzmws = reduce(lambda x,y: x + y,
+                      map(lambda z: z.labeledZmwsFromBarcodeLabel(item),
+                          self._bcH5s))
+        return sorted(lzmws, key=lambda z: z.holeNumber )
+
+    def labeledZmwFromName(self, item):
+        indices = item.rstrip("/").split("/")
+
+        if (len(indices) < 1):
+            raise KeyError("Invalid slice of BarcodeH5Fofn")
+
+        if len(indices) >= 1:
+            result = self.readerForMovie(indices[0])
+        if len(indices) >= 2:
+            if indices[1] in self.barcodeLabels:
+                return result.labeledZmwsFromBarcodeLabel(indices[1])
+            try:
+                indexNum = int(indices[1])
+            except ValueError:
+                ValueError("Invalid hole number or barcode name {0} as second index".format(indices[1]))
+            result = result[indexNum]
+        return result
+
+    def labeledZmwFromHoleNumber(self, item):
+        if len(self.movieNames) > 1:
+            raise ValueError("Cannot slice by holeNumber with multiple movies")
+        else:
+            movie = self.movieNames[0]
+            reader = self.mpReaders[movie]
+            return reader[item]
+
+    def readerForMovie(self, movieName):
+        """Return a BarcodeH5Reader for a movieName"""
+        return self.mpReaders[movieName]
+
+    def __iter__(self):
+        for reader in self._bcH5s:
+            for labeledZmw in reader:
+                yield labeledZmw
+
+    def __getitem__(self, item):
+        """
+        Get a BarcodeH5Reader or LabeledZmw by movie name, zmw name, subread id,
+        or ccs id, using standard PacBio naming conventions.  Examples:
+
+          - ["F3--R3"]                        -> List of LabeledZmws
+          - ["m110818_..._s1_p0"]             -> BarcodeH5Reader
+          - ["m110818_,,,_s1_p9/F3--R3"]      -> List of LabeledZmws
+          - ["m110818_..._s1_p0/24480"]       -> LabeledZmw
+          - ["m110818_..._s1_p0/24480/20_67"] -> LabeledZmw
+        """
+
+        if (isinstance(item, int) or
+            issubclass(type(item), n.integer)):
+            return self.labeledZmwFromHoleNumber(item)
+        elif isinstance(item, str):
+            if item in self.barcodeLabels:
+                return self.labeledZmwsFromBarcodeLabel(item)
+            elif item in self.movieNames:
+                return self.readerForMovie(item)
+            else:
+                return self.labeledZmwFromName(item)
+        else:
+            raise ValueError("BcH5Fofn slice must be a barcode, name or hole number")
diff --git a/pbcore/io/BasH5IO.py b/pbcore/io/BasH5IO.py
new file mode 100644
index 0000000..2dd4b2e
--- /dev/null
+++ b/pbcore/io/BasH5IO.py
@@ -0,0 +1,1026 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Authors: David Alexander, Jim Bullard
+
+__all__ = [ "BasH5Reader"     ,
+            "BaxH5Reader"     ,
+            "BasH5Collection" ]
+
+import h5py, numpy as np, os.path as op
+from bisect import bisect_left, bisect_right
+from operator import getitem
+from itertools import groupby
+from collections import OrderedDict
+
+from pbcore.io.FofnIO import readFofn
+from pbcore.chemistry import (decodeTriple,
+                              tripleFromMetadataXML,
+                              ChemistryLookupError)
+from ._utils import arrayFromDataset, CommonEqualityMixin
+
+
+def intersectRanges(r1, r2):
+    b1, e1 = r1
+    b2, e2 = r2
+    b, e = max(b1, b2), min(e1, e2)
+    return (b, e) if (b < e) else None
+
+def rangeLength(r):
+    b, e = r
+    return e - b
+
+def removeNones(lst):
+    return filter(lambda x: x!=None, lst)
+
+# ZMW hole Types
+SEQUENCING_ZMW = 0
+
+# Region types
+ADAPTER_REGION = 0
+INSERT_REGION  = 1
+HQ_REGION      = 2
+
+# This seems to be the magic incantation to get a RecArray that can be
+# indexed to yield a record that can then be accessed using dot
+# notation.
+def toRecArray(dtype, arr):
+    return np.rec.array(arr, dtype=dtype).flatten()
+
+REGION_TABLE_DTYPE = [("holeNumber",  np.int32),
+                      ("regionType",  np.int32),
+                      ("regionStart", np.int32),
+                      ("regionEnd",   np.int32),
+                      ("regionScore", np.int32) ]
+
+def _makeQvAccessor(featureName):
+    def f(self):
+        return self.qv(featureName)
+    return f
+
+class Zmw(CommonEqualityMixin):
+    """
+    A Zmw represents all data from a ZMW (zero-mode waveguide) hole
+    within a bas.h5 movie file.  Accessor methods provide convenient
+    access to the read (or subreads), and to the region table entries
+    for this hole.
+    """
+    __slots__ = [ "baxH5", "holeNumber", "index"]
+
+    def __init__(self, baxH5, holeNumber):
+        self.baxH5               = baxH5
+        self.holeNumber          = holeNumber
+        self.index               = self.baxH5._holeNumberToIndex[holeNumber]
+
+    @property
+    def regionTable(self):
+        if self.holeNumber in self.baxH5._regionTableIndex:
+            startRow, endRow = self.baxH5._regionTableIndex[self.holeNumber]
+            return self.baxH5.regionTable[startRow:endRow]
+        else:
+            # Broken region table---primary pipeline bug (see bugs
+            # 23585, 25273).  Work around this by returning a fake
+            # regiontable consisting of an empty HQ region
+            return toRecArray(REGION_TABLE_DTYPE,
+                              [ (self.holeNumber, HQ_REGION, 0, 0, 0) ])
+
+    #
+    # The following "region" calls return one or more intervals ((int, int)).
+    #  - The default implementations perform clipping to the hqRegion.
+    #  - The "unclipped" implementations entail no clipping
+    #
+    @property
+    def adapterRegionsNoQC(self):
+        """
+        Get adapter regions as intervals, without clipping to the HQ
+        region.  Don't use this unless you know what you're doing.
+        """
+        return [ (region.regionStart, region.regionEnd)
+                 for region in self.regionTable
+                 if region.regionType == ADAPTER_REGION ]
+
+    @property
+    def adapterRegions(self):
+        """
+        Get adapter regions as intervals, performing clipping to the HQ region
+        """
+        hqRegion = self.hqRegion
+        return removeNones([ intersectRanges(hqRegion, region)
+                             for region in self.adapterRegionsNoQC ])
+
+    @property
+    def insertRegionsNoQC(self):
+        """
+        Get insert regions as intervals, without clipping to the HQ
+        region.  Don't use this unless you know what you're doing.
+        """
+        return [ (region.regionStart, region.regionEnd)
+                 for region in self.regionTable
+                 if region.regionType == INSERT_REGION ]
+
+    @property
+    def insertRegions(self):
+        """
+        Get insert regions as intervals, clipped to the HQ region
+        """
+        hqRegion = self.hqRegion
+        return removeNones([ intersectRanges(hqRegion, region)
+                             for region in self.insertRegionsNoQC ])
+    @property
+    def hqRegion(self):
+        """
+        Return the HQ region interval.
+
+        The HQ region is an interval of basecalls where the basecaller has
+        inferred that a single sequencing reaction is taking place.
+        Secondary analysis should only use subreads within the HQ
+        region.  Methods in this class, with the exception of the
+        "NoQC" methods, return data appropriately clipped/filtered to
+        the HQ region.
+        """
+        rt = self.regionTable
+        hqRows = rt[rt.regionType == HQ_REGION]
+        assert len(hqRows) == 1
+        hqRow = hqRows[0]
+        return hqRow.regionStart, hqRow.regionEnd
+
+    @property
+    def readScore(self):
+        """
+        Return the "read score", a prediction of the accuracy (between 0 and 1) of the
+        basecalls from this ZMW, from the `ReadScore` dataset in the
+        file
+        """
+        return self.zmwMetric("ReadScore")
+
+    @property
+    def productivity(self):
+        """
+        Return the 'productivity' of this ZMW, which is the estimated
+        number of polymerase reactions taking place within it.  For
+        example, a doubly-loaded ZMW would have productivity 2.
+        """
+        return self.zmwMetric("Productivity")
+
+    @property
+    def hqRegionSnr(self):
+        """
+        Return the SNRs, as a vector by channel.
+        """
+        return self.zmwMetric("HQRegionSNR")
+
+    def zmwMetric(self, name):
+        """
+        Return the value of metric 'name' from the ZMW metrics.
+        """
+        return self.baxH5.zmwMetric(name, self.index)
+
+    def listZmwMetrics(self):
+        """
+        List the available ZMW metrics for this bax.h5 file.
+        """
+        return self.baxH5.listZmwMetrics()
+
+    @property
+    def numPasses(self):
+        """
+        Return the number of passes (forward + back) across the SMRTbell
+        insert, used to forming the CCS consensus.
+        """
+        if not self.baxH5.hasConsensusBasecalls:
+            raise ValueError, "No CCS reads in this file"
+        return self.baxH5._ccsNumPasses[self.index]
+
+    #
+    # The following calls return one or more ZmwRead objects.
+    #
+    def read(self, readStart=None, readEnd=None):
+        """
+        Given no arguments, returns the entire (HQ-clipped) polymerase
+        read.  With readStart, readEnd arguments, returns the
+        specified extent of the polymerase read.
+        """
+        if not self.baxH5.hasRawBasecalls:
+            raise ValueError, "No raw reads in this file"
+        hqStart, hqEnd = self.hqRegion
+        readStart = hqStart if readStart is None else readStart
+        readEnd   = hqEnd if readEnd is None else readEnd
+        return ZmwRead(self.baxH5, self.holeNumber, readStart, readEnd)
+
+
+    def readNoQC(self, readStart=None, readEnd=None):
+        """
+        Given no arguments, returns the entire polymerase read, *not
+        HQ-clipped*.  With readStart, readEnd arguments, returns the
+        specified extent of the polymerase read.
+
+        .. warning::
+
+            It is not recommended that production code use this method
+            as we make no guarantees about what happens outside of the
+            HQ region.
+        """
+        if not self.baxH5.hasRawBasecalls:
+            raise ValueError, "No raw reads in this file"
+        offsets = self.baxH5._offsetsByHole[self.holeNumber]
+        numEvent = offsets[1] - offsets[0]
+        polymeraseBegin = 0
+        polymeraseEnd = numEvent
+        readStart = polymeraseBegin if readStart is None else readStart
+        readEnd   = polymeraseEnd   if readEnd   is None else readEnd
+        return ZmwRead(self.baxH5, self.holeNumber, readStart, readEnd)
+
+    @property
+    def subreadsNoQC(self):
+        """
+        Get the subreads, including data beyond the bounds of the HQ region.
+
+        .. warning::
+
+            It is not recommended that production code use this method
+            as we make no guarantees about what happens outside of the
+            HQ region.
+        """
+        if not self.baxH5.hasRawBasecalls:
+            raise ValueError, "No raw reads in this file"
+        return [ self.read(readStart, readEnd)
+                 for (readStart, readEnd) in self.unclippedInsertRegions ]
+
+    @property
+    def subreads(self):
+        """
+        Get the subreads as a list of ZmwRead objects.  Restricts focus,
+        and clips to, the HQ region.  This method can be used by
+        production code.
+        """
+        if not self.baxH5.hasRawBasecalls:
+            raise ValueError, "No raw reads in this file"
+        return [ self.read(readStart, readEnd)
+                 for (readStart, readEnd) in self.insertRegions ]
+
+
+    @property
+    def adapters(self):
+        """
+        Get the adapter hits as a list of ZmwRead objects.  Restricts
+        focus, and clips to, the HQ region.  This method can be used
+        by production code.
+        """
+        if not self.baxH5.hasRawBasecalls:
+            raise ValueError, "No raw reads in this file"
+        return [ self.read(readStart, readEnd)
+                 for (readStart, readEnd) in self.adapterRegions ]
+
+    @property
+    def adaptersNoQC(self):
+        """
+        Get the adapters, including data beyond the bounds of the HQ
+        region.
+
+        .. warning::
+
+            It is not recommended that production code use this method
+            as we make no guarantees about what happens outside of the
+            HQ region.
+        """
+        if not self.baxH5.hasRawBasecalls:
+            raise ValueError, "No raw reads in this file"
+        return [ self.read(readStart, readEnd)
+                 for (readStart, readEnd) in self.unclippedAdapterRegions ]
+
+    @property
+    def ccsRead(self):
+        if not self.baxH5.hasConsensusBasecalls:
+            raise ValueError, "No CCS reads in this file"
+        baseOffset  = self.baxH5._ccsOffsetsByHole[self.holeNumber]
+        if (baseOffset[1] - baseOffset[0]) <= 0:
+            return None
+        else:
+            return CCSZmwRead(self.baxH5, self.holeNumber, 0,
+                              baseOffset[1] - baseOffset[0])
+
+    @property
+    def zmwName(self):
+        return "%s/%d" % (self.baxH5.movieName,
+                          self.holeNumber)
+
+    def __repr__(self):
+        return "<Zmw: %s>" % self.zmwName
+
+
+class ZmwRead(CommonEqualityMixin):
+    """
+    A ZmwRead represents the data features (basecalls as well as pulse
+    features) recorded from the ZMW, delimited by readStart and readEnd.
+    """
+    __slots__ = [ "baxH5", "holeNumber",
+                  "readStart", "readEnd",
+                  "offsetBegin", "offsetEnd" ]
+
+    def __init__(self, baxH5, holeNumber, readStart, readEnd):
+        self.baxH5        = baxH5
+        self.holeNumber   = holeNumber
+        self.readStart    = readStart
+        self.readEnd      = readEnd
+        zmwOffsetBegin, zmwOffsetEnd = self._getOffsets()[self.holeNumber]
+        self.offsetBegin = zmwOffsetBegin + self.readStart
+        self.offsetEnd   = zmwOffsetBegin + self.readEnd
+        if not (zmwOffsetBegin   <=
+                self.offsetBegin <=
+                self.offsetEnd   <=
+                zmwOffsetEnd):
+            raise IndexError, "Invalid slice of Zmw!"
+
+    def _getBasecallsGroup(self):
+        return self.baxH5._basecallsGroup
+
+    def _getOffsets(self):
+        return self.baxH5._offsetsByHole
+
+    @property
+    def zmw(self):
+        return self.baxH5[self.holeNumber]
+
+    @property
+    def readName(self):
+        return "%s/%d_%d" % (self.zmw.zmwName,
+                             self.readStart,
+                             self.readEnd)
+
+    def __repr__(self):
+        return "<%s: %s>" % (self.__class__.__name__,
+                             self.readName)
+
+    def __len__(self):
+        return self.readEnd - self.readStart
+
+    def basecalls(self):
+        return arrayFromDataset(self._getBasecallsGroup()["Basecall"],
+                                self.offsetBegin, self.offsetEnd).tostring()
+
+    def qv(self, qvName):
+        return arrayFromDataset(self._getBasecallsGroup()[qvName],
+                                self.offsetBegin, self.offsetEnd)
+
+    PreBaseFrames  = _makeQvAccessor("PreBaseFrames")
+    IPD            = _makeQvAccessor("PreBaseFrames")
+
+    WidthInFrames  = _makeQvAccessor("WidthInFrames")
+    PulseWidth     = _makeQvAccessor("WidthInFrames")
+
+    QualityValue   = _makeQvAccessor("QualityValue")
+    InsertionQV    = _makeQvAccessor("InsertionQV")
+    DeletionQV     = _makeQvAccessor("DeletionQV")
+    DeletionTag    = _makeQvAccessor("DeletionTag")
+    MergeQV        = _makeQvAccessor("MergeQV")
+    SubstitutionQV = _makeQvAccessor("SubstitutionQV")
+    SubstitutionTag = _makeQvAccessor("SubstitutionTag")
+
+
+class CCSZmwRead(ZmwRead):
+    """
+    Class providing access to the CCS (circular consensus sequencing)
+    data calculated for a ZMW.
+    """
+    def _getBasecallsGroup(self):
+        return self.baxH5._ccsBasecallsGroup
+
+    def _getOffsets(self):
+        return self.baxH5._ccsOffsetsByHole
+
+    @property
+    def readName(self):
+        return "%s/ccs" % self.zmw.zmwName
+
+def _makeOffsetsDataStructure(h5Group):
+    numEvent   = h5Group["ZMW/NumEvent"].value
+    holeNumber = h5Group["ZMW/HoleNumber"].value
+    endOffset = np.cumsum(numEvent)
+    beginOffset = np.hstack(([0], endOffset[0:-1]))
+    offsets = zip(beginOffset, endOffset)
+    return dict(zip(holeNumber, offsets))
+
+def _makeRegionTableIndex(regionTableHoleNumbers):
+    #  returns a dict: holeNumber -> (startRow, endRow)
+    diffs = np.ediff1d(regionTableHoleNumbers,
+                       to_begin=[1], to_end=[1])
+    changepoints = np.flatnonzero(diffs)
+    startsAndEnds = zip(changepoints[:-1],
+                        changepoints[1:])
+    return dict(zip(np.unique(regionTableHoleNumbers),
+                    startsAndEnds))
+
+class BaxH5Reader(object):
+    """
+    The `BaxH5Reader` class provides access to bax.h5 file and
+    single-part bas.h5 files.
+    """
+    def __init__(self, filename, regionH5Filename=None):
+        try:
+            self.filename = op.abspath(op.expanduser(filename))
+            self.file = h5py.File(self.filename, "r")
+        except IOError:
+            raise IOError, ("Invalid or nonexistent bax/bas file %s" % filename)
+
+        #
+        # Raw base calls?
+        #
+        if "/PulseData/BaseCalls/Basecall" in self.file:
+            self._basecallsGroup = self.file["/PulseData/BaseCalls"]
+            self._offsetsByHole  = _makeOffsetsDataStructure(self._basecallsGroup)
+            self.hasRawBasecalls = True
+        else:
+            self.hasRawBasecalls = False
+        #
+        # CCS base calls?
+        #
+        if "/PulseData/ConsensusBaseCalls" in self.file:
+            self._ccsBasecallsGroup = self.file["/PulseData/ConsensusBaseCalls"]
+            self._ccsOffsetsByHole  = _makeOffsetsDataStructure(self._ccsBasecallsGroup)
+            self._ccsNumPasses      = self._ccsBasecallsGroup["Passes/NumPasses"]
+            self.hasConsensusBasecalls = True
+        else:
+            self.hasConsensusBasecalls = False
+
+        self._mainBasecallsGroup = self._basecallsGroup if self.hasRawBasecalls \
+                                   else self._ccsBasecallsGroup
+
+        if regionH5Filename is None:
+            # load region information from the bas/bax file
+            self._loadRegions(self.file)
+        else:
+            # load region information from a separate region file
+            self.loadExternalRegions(regionH5Filename)
+
+        # Create a variable to store the chemistry information
+        self._sequencingChemistry = None
+        #
+        # ZMW metric cache -- probably want to move prod and readScore
+        # here.
+        #
+        self.__metricCache = {}
+
+    def _loadRegions(self, fh):
+        """
+        Loads region table information from the given file handle and applies
+        it to the ZMW data.
+        """
+        holeNumbers = self._mainBasecallsGroup["ZMW/HoleNumber"].value
+        self._holeNumberToIndex = dict(zip(holeNumbers, range(len(holeNumbers))))
+
+        #
+        # Region table
+        #
+        self.regionTable = toRecArray(REGION_TABLE_DTYPE,
+                                      fh["/PulseData/Regions"].value)
+
+        self._regionTableIndex = _makeRegionTableIndex(self.regionTable.holeNumber)
+        isHqRegion     = self.regionTable.regionType == HQ_REGION
+        hqRegions      = self.regionTable[isHqRegion]
+
+        if len(hqRegions) != len(holeNumbers):
+            # Bug 23585: pre-2.1 primary had a bug where a bas file
+            # could get a broken region table, lacking an HQ region
+            # entry for a ZMW.  This happened fairly rarely, mostly on
+            # very long traces.  Workaround here is to rebuild HQ
+            # regions table with empty HQ region entries for those
+            # ZMWs.
+            hqRegions_ = toRecArray(REGION_TABLE_DTYPE,
+                                    np.zeros(shape=len(holeNumbers),
+                                             dtype=REGION_TABLE_DTYPE))
+            hqRegions_.holeNumber = holeNumbers
+            for record in hqRegions:
+                hn = record.holeNumber
+                hqRegions_[self._holeNumberToIndex[hn]] = record
+            hqRegions = hqRegions_
+
+        hqRegionLength = hqRegions.regionEnd - hqRegions.regionStart
+        holeStatus     = self._mainBasecallsGroup["ZMW/HoleStatus"].value
+
+        #
+        # Sequencing ZMWs - Note: this differs from Primary's
+        # definition. To obtain those values, one would use the
+        # `allSequencingZmws` property.
+        #
+        self._sequencingZmws = \
+            holeNumbers[(holeStatus == SEQUENCING_ZMW)                       &
+                        (self._mainBasecallsGroup["ZMW/NumEvent"].value > 0) &
+                        (hqRegionLength >  0)]
+
+        self._allSequencingZmws = holeNumbers[holeStatus == SEQUENCING_ZMW]
+
+    def loadExternalRegions(self, regionH5Filename):
+        """
+        Loads regions defined in the given file, overriding those found in the
+        bas/bax file.
+        """
+        try:
+            fh = h5py.File(op.abspath(op.expanduser(regionH5Filename)), "r")
+        except IOError:
+            raise IOError, ("Invalid or nonexistent file %s" % regionH5Filename)
+
+        self._loadRegions(fh)
+        fh.close()
+
+        # A sanity check that the given region table provides information for
+        # hole numbers contain in this base file.
+        baxHoleNumbers = self._mainBasecallsGroup["ZMW/HoleNumber"].value
+        rgnHoleNumbers = self.regionTable.holeNumber
+        if not np.in1d(rgnHoleNumbers, baxHoleNumbers).all():
+            msg = "Region file (%s) does not contain the same hole numbers as " \
+                  "bas/bax file (%s)"
+            raise IOError, (msg % (regionH5Filename, self.filename))
+
+    @property
+    def sequencingZmws(self):
+        """
+        A list of the hole numbers that produced useable sequence data.
+        Specifically, this means ZMWs that have an HQ region.
+        """
+        return self._sequencingZmws
+
+    @property
+    def allSequencingZmws(self):
+        """
+        A list of the hole numbers that are capable of producing
+        sequencing data. This differs from the `sequencingZmws` in
+        that zmws are not filtered according to their HQ status. This
+        number is fixed per chip, whereas the `sequencingZmws` depends
+        on things such as loading.
+        """
+        return self._allSequencingZmws
+
+    def __getitem__(self, holeNumber):
+        return Zmw(self, holeNumber)
+
+    #
+    # Iterators over Zmws, ZmwReads
+    #
+
+    def __iter__(self):
+        for holeNumber in self.sequencingZmws:
+            yield self[holeNumber]
+
+    def reads(self):
+        if self.hasRawBasecalls:
+            for zmw in self:
+                yield zmw.read()
+
+    def subreads(self):
+        if self.hasRawBasecalls:
+            for zmw in self:
+                for subread in zmw.subreads:
+                    yield subread
+
+    def ccsReads(self):
+        if self.hasConsensusBasecalls:
+            for zmw in self:
+                if zmw.ccsRead is not None:
+                    yield zmw.ccsRead
+
+    # ------------------------------
+
+    @property
+    def movieName(self):
+        movieNameAttr = self.file["/ScanData/RunInfo"].attrs["MovieName"]
+
+        # In old bas.h5 files, attributes of ScanData/RunInfo are stored as
+        # strings in arrays of length one.
+        if (isinstance(movieNameAttr, (np.ndarray, list)) and
+                len(movieNameAttr) == 1):
+            movieNameString = movieNameAttr[0]
+        else:
+            movieNameString = movieNameAttr
+
+        if not isinstance(movieNameString, basestring):
+            raise TypeError("Unsupported movieName {m} of type {t}."
+                             .format(m=movieNameString,
+                                     t=type(movieNameString)))
+        return movieNameString
+
+    @property
+    def _chemistryBarcodeTripleInFile(self):
+        """
+        The chemistry barcode triple consists of (BindingKit,
+        SequencingKit, SoftwareVersion) and is written on the
+        instrument to the bax file as of primary version 2.1.  Prior
+        to that, it was only written in the metadata.xml.
+        """
+        try:
+            bindingKit      = self.file["/ScanData/RunInfo"].attrs["BindingKit"]
+            sequencingKit   = self.file["/ScanData/RunInfo"].attrs["SequencingKit"]
+            # version string in bas file looks like "2.1.1.1.x", we have to extract
+            # the "2.1"
+            tmp = self.file["/PulseData/BaseCalls"].attrs["ChangeListID"]
+            swVersion= ".".join(tmp.split(".")[0:2])
+            return (bindingKit, sequencingKit, swVersion)
+        except:
+            return None
+
+    @property
+    def _chemistryBarcodeTripleFromMetadataXML(self):
+        try:
+            movieName = self.movieName
+            _up = op.dirname(op.dirname(self.filename))
+            metadataLocation = op.join(_up, movieName + ".metadata.xml")
+            triple = tripleFromMetadataXML(metadataLocation)
+            return triple
+        except ChemistryLookupError:
+            return None
+
+    @property
+    def chemistryBarcodeTriple(self):
+        triple = self._chemistryBarcodeTripleInFile or self._chemistryBarcodeTripleFromMetadataXML
+        if triple:
+            return triple
+        else:
+            raise ChemistryLookupError, "Could not find chemistry barcodes in file or companion metadata.xml"
+
+    @property
+    def sequencingChemistry(self):
+        """
+        Find the name of the chemistry by consulting, in order of preference:
+          1) Barcode triple in file
+          2) "SequencingChemistry" attr in file (chemistry override)
+          3) metadata.xml companion file
+        """
+        if self._sequencingChemistry is None:
+            triple = self._chemistryBarcodeTripleInFile
+            if triple is not None:
+                self._sequencingChemistry = decodeTriple(*triple)
+            elif "SequencingChemistry" in self.file["/ScanData/RunInfo"].attrs:
+                self._sequencingChemistry = self.file["/ScanData/RunInfo"].attrs["SequencingChemistry"]
+            else:
+                tripleFromXML = self._chemistryBarcodeTripleFromMetadataXML
+                if tripleFromXML is not None:
+                    self._sequencingChemistry = decodeTriple(*tripleFromXML)
+                else:
+                    raise ChemistryLookupError, "Chemistry information could not be found for this file"
+        return self._sequencingChemistry
+
+    def __len__(self):
+        return len(self.sequencingZmws)
+
+    def close(self):
+        if hasattr(self, "file") and self.file is not None:
+            self.file.close()
+            self.file = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def listZmwMetrics(self):
+        return self._basecallsGroup["ZMWMetrics"].keys()
+
+    def zmwMetric(self, name, index):
+        # we are going to cache these lazily because it is very likely
+        # that if one ZMW asked for the metric others aren't far
+        # behind.
+        if name not in self.__metricCache:
+            k = "/".join(("ZMWMetrics", name))
+            self.__metricCache[name] = self._mainBasecallsGroup[k].value
+
+        v = self.__metricCache[name]
+        if len(v.shape) > 1:
+            return v[index,]
+        else:
+            return v[index]
+
+
+class BasH5Reader(object):
+    """
+    .. testsetup:: *
+
+       from pbcore.io import BasH5Reader
+       from pbcore import data
+       filename = data.getBasH5s()[0]
+       b = BasH5Reader(filename)
+       zmw8 = b[8]
+
+    The `BasH5Reader` provides access to the basecall and pulse metric
+    data encoded in PacBio bas.h5 files.  To access data using a
+    `BasH5Reader`, the standard idiom is:
+
+    1. Index into the `BasH5Reader` using the ZMW hole number to get a `Zmw` object::
+
+        >>> b
+        <BasH5Reader: m110818_075520_42141_c100129202555500000315043109121112_s1_p0>
+        >>> zmw8 = b[8]
+        >>> zmw8
+        <Zmw: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8>
+
+    2. Extract `ZmwRead` objects from the `Zmw` object by:
+
+       - Using the `.subreads` property to extract the subreads, which
+         are the subintervals of the raw read corresponding to the
+         SMRTbell insert::
+
+           >>> subreads = zmw8.subreads
+           >>> print subreads
+           [<ZmwRead: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/3381_3881>,
+           <ZmwRead: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/3924_4398>,
+           <ZmwRead: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/4445_4873>,
+           <ZmwRead: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/4920_5354>,
+           <ZmwRead: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/5413_5495>]
+
+       - For CCS bas files, using the `.ccsRead` property to extract
+         the CCS (consensus) read, which is a consensus sequence
+         precomputed from the subreads.  Older bas files, from when
+         CCS was computed on the instrument, may contain both CCS- and
+         sub- reads.
+
+           >>> zmw8.ccsRead
+           <CCSZmwRead: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/ccs>
+
+       - Use the `.read()` method to get the full raw read, or
+         `.read(start, end)` to extract a custom subinterval.
+
+           >>> zmw8.read()
+           <ZmwRead: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/3381_5495>
+           >>> zmw8.read(3390, 3400)
+           <ZmwRead: m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/3390_3400>
+
+    3. With a `ZmwRead` object in hand, extract the desired
+       basecalls and pulse metrics::
+
+         >>> subreads[0].readName
+         "m110818_075520_42141_c100129202555500000315043109121112_s1_p0/8/3381_3881"
+         >>> subreads[0].basecalls()
+         "AGCCCCGTCGAGAACATACAGGTGGCCAATTTCACAGCCTCTTGCCTGGGCGATCCCGAACATCGCACCGGA..."
+         >>> subreads[0].InsertionQV()
+         array([12, 12, 10,  2,  7, 14, 13, 18, 15, 16, 16, 15, 10, 12,  3, 14, ...])
+
+    Note that not every ZMW on a chip produces usable sequencing
+    data.  The `BasH5Reader` has a property `sequencingZmws` is a list
+    of the hole numbers where usable sequence was recorded.
+    Iteration over the `BasH5Reader` object allows you to iterate over
+    the `Zmw` objects providing usable sequence.
+    """
+    def __init__(self, *args):
+        assert len(args) > 0
+
+        if len(args) == 1:
+            filename = args[0]
+            try:
+                self.filename = op.abspath(op.expanduser(filename))
+                self.file = h5py.File(self.filename, "r")
+            except IOError:
+                raise IOError, ("Invalid or nonexistent bas/bax file %s" % filename)
+
+
+            # Is this a multi-part or single-part?
+            if self.file.get("MultiPart"):
+                directory = op.dirname(self.filename)
+                self._parts = [ BaxH5Reader(op.join(directory, fn))
+                                for fn in self.file["/MultiPart/Parts"] ]
+                self._holeLookupVector = self.file["/MultiPart/HoleLookup"][:,1]
+                self._holeLookup = self._holeLookupVector.__getitem__
+            else:
+                self._parts = [ BaxH5Reader(self.filename) ]
+                self._holeLookup = (lambda holeNumber: 1)
+        else:
+            partFilenames    = args
+            self.filename    = None
+            self.file        = None
+            self._parts      = [ BaxH5Reader(fn) for fn in partFilenames ]
+            holeLookupDict   = { hn : (i + 1)
+                                 for i in xrange(len(self._parts))
+                                 for hn in self._parts[i]._holeNumberToIndex }
+            self._holeLookup = lambda hn: holeLookupDict[hn]
+        self._sequencingZmws = np.concatenate([ part.sequencingZmws
+                                                for part in self._parts ])
+
+    @property
+    def parts(self):
+        return self._parts
+
+    @property
+    def sequencingZmws(self):
+        return self._sequencingZmws
+
+    @property
+    def allSequencingZmws(self):
+        return np.concatenate([ part.allSequencingZmws
+                                for part in self._parts ])
+
+    @property
+    def hasConsensusBasecalls(self):
+        return all(part.hasConsensusBasecalls for part in self._parts)
+
+    @property
+    def hasRawBasecalls(self):
+        return all(part.hasRawBasecalls for part in self._parts)
+
+
+    #
+    # Iterators
+    #
+
+    def __iter__(self):
+        """
+        Iterate over ZMWs
+        """
+        for holeNumber in self.sequencingZmws:
+            yield self[holeNumber]
+
+    def reads(self):
+        for part in self._parts:
+            for read in part.reads():
+                yield read
+
+    def subreads(self):
+        for part in self._parts:
+            for subread in part.subreads():
+                yield subread
+
+    def ccsReads(self):
+        for part in self._parts:
+            for ccsRead in part.ccsReads():
+                yield ccsRead
+
+    # ----------
+
+    def __len__(self):
+        return len(self.sequencingZmws)
+
+    def _getitemScalar(self, holeNumber):
+        part = self.parts[self._holeLookup(holeNumber)-1]
+        return part[holeNumber]
+
+    def __getitem__(self, holeNumbers):
+        if (isinstance(holeNumbers, int) or
+            issubclass(type(holeNumbers), np.integer)):
+            return self._getitemScalar(holeNumbers)
+        elif isinstance(holeNumbers, slice):
+            return [ self._getitemScalar(r)
+                     for r in xrange(*holeNumbers.indices(len(self)))]
+        elif isinstance(holeNumbers, list) or isinstance(holeNumbers, np.ndarray):
+            if len(holeNumbers) == 0:
+                return []
+            else:
+                entryType = type(holeNumbers[0])
+                if entryType == int or issubclass(entryType, np.integer):
+                    return [ self._getitemScalar(r) for r in holeNumbers ]
+                elif entryType == bool or issubclass(entryType, np.bool_):
+                    return [ self._getitemScalar(r) for r in np.flatnonzero(holeNumbers) ]
+        raise TypeError, "Invalid type for BasH5Reader slicing"
+
+    @property
+    def movieName(self):
+        return self._parts[0].movieName
+
+    @property
+    def chemistryBarcodeTriple(self):
+        return self._parts[0].chemistryBarcodeTriple
+
+    @property
+    def sequencingChemistry(self):
+        return self._parts[0].sequencingChemistry
+
+    def __len__(self):
+        return len(self.sequencingZmws)
+
+    def close(self):
+        if hasattr(self, "file") and self.file is not None:
+            self.file.close()
+            self.file = None
+        for part in self.parts:
+            part.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def __iter__(self):
+        for holeNumber in self.sequencingZmws:
+            yield self[holeNumber]
+
+    def __repr__(self):
+        return "<BasH5Reader: %s>" % self.movieName
+
+    # Make cursor classes available
+    Zmw        = Zmw
+    ZmwRead    = ZmwRead
+    CCSZmwRead = CCSZmwRead
+
+def sniffMovieName(basFilename):
+    # The clean way to do this is the get the moviename attribute from
+    # the file, but unfortunately that approach is unusable slow.
+    # Here we assume that the filename follows the standard PacBio
+    # naming convention.
+    movieName = op.basename(basFilename).split(".")[0]
+    return movieName
+
+class BasH5Collection(object):
+    """
+    Class representing a collection of base call (bas/bax) files.
+
+    Can be initialized from a list of bas/bax files, or an input.fofn
+    file containing a list of bas/bax files
+    """
+
+    def __init__(self, *args):
+        #
+        # Implementation notes: find all the bas/bax files, and group
+        # them together by movieName
+        #
+        basFilenames = []
+        for arg in args:
+            if arg.endswith(".fofn"):
+                for fn in readFofn(arg):
+                    basFilenames.append(fn)
+            else:
+                basFilenames.append(arg)
+
+        movieNames = map(sniffMovieName, basFilenames)
+        movieNamesAndFiles = sorted(zip(movieNames, basFilenames))
+
+        self.readers = OrderedDict(
+            [ (k , BasH5Reader(*[val[1] for val in v]))
+              for k, v in groupby(movieNamesAndFiles, lambda t: t[0]) ])
+
+    @property
+    def movieNames(self):
+        return self.readers.keys()
+
+    def __getitem__(self, key):
+        """
+        Slice by movie name, zmw name, or zmw range name, using standard
+        PacBio naming conventions.  Examples:
+
+          - ["m110818_..._s1_p0"]             -> BasH5Reader
+          - ["m110818_..._s1_p0/24480"]       -> Zmw
+          - ["m110818_..._s1_p0/24480/20_67"] -> ZmwRead
+          - ["m110818_..._s1_p0/24480/ccs"]   -> CCSZmwRead
+        """
+        indices = key.rstrip("/").split("/")
+
+        if len(indices) < 1:
+            raise KeyError("Invalid slice of BasH5Collection")
+
+        if len(indices) >= 1:
+            result = self.readers[indices[0]]
+        if len(indices) >= 2:
+            result = result[int(indices[1])]
+        if len(indices) >= 3:
+            if indices[2] == "ccs":
+                result = result.ccsRead
+            else:
+                start, end = map(int, indices[2].split("_"))
+                result = result.read(start, end)
+        return result
+
+    #
+    # Iterators over Zmw, ZmwRead objects
+    #
+
+    def __iter__(self):
+        for reader in self.readers.values():
+            for zmw in reader: yield zmw
+
+    def reads(self):
+        for reader in self.readers.values():
+            for read in reader.reads():
+                yield read
+
+    def subreads(self):
+        for reader in self.readers.values():
+            for read in reader.subreads():
+                yield read
+
+    def ccsReads(self):
+        for reader in self.readers.values():
+            for read in reader.ccsReads():
+                yield read
diff --git a/pbcore/io/FastaIO.py b/pbcore/io/FastaIO.py
new file mode 100644
index 0000000..64f1af4
--- /dev/null
+++ b/pbcore/io/FastaIO.py
@@ -0,0 +1,459 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+"""
+Streaming I/O support for FASTA files.
+"""
+
+__all__ = [ "FastaRecord",
+            "FastaReader",
+            "FastaWriter",
+            "FastaTable",
+            "IndexedFastaReader",
+            "splitFastaHeader"]
+
+from .base import ReaderBase, WriterBase
+from ._utils import splitFileContents
+from pbcore import sequence
+from pbcore.util.decorators import deprecated
+
+import md5, mmap, numpy as np, re
+from collections import namedtuple, OrderedDict, Sequence
+from os.path import abspath, expanduser, isfile
+
+
+def splitFastaHeader( name ):
+    """
+    Split a FASTA/FASTQ header into its id and comment components
+    """
+    nameParts = re.split('\s', name, maxsplit=1)
+    id_ = nameParts[0]
+    if len(nameParts) > 1:
+        comment = nameParts[1].strip()
+    else:
+        comment = None
+    return (id_, comment)
+
+class FastaRecord(object):
+    """
+    A FastaRecord object models a named sequence in a FASTA file.
+    """
+    DELIMITER = ">"
+    COLUMNS   = 60
+
+    def __init__(self, header, sequence):
+        try:
+            assert "\n" not in header
+            assert "\n" not in sequence
+            assert self.DELIMITER not in sequence
+            self._header = header
+            self._sequence = sequence
+            self._md5 = md5.md5(self.sequence).hexdigest()
+            self._id, self._comment = splitFastaHeader(header)
+        except AssertionError:
+            raise ValueError("Invalid FASTA record data")
+
+    @property
+    def header(self):
+        """
+        The header of the sequence in the FASTA file, equal to the entire
+        first line of the FASTA record following the '>' character.
+
+        .. warning::
+
+           You should almost certainly be using "id", not "header".
+        """
+        return self._header
+
+    @property
+    def name(self):
+        """
+        DEPRECATED: The name of the sequence in the FASTA file, equal to
+        the entire FASTA header following the '>' character
+        """
+        return self._header
+
+    @property
+    def id(self):
+        """
+        The id of the sequence in the FASTA file, equal to the FASTA header
+        up to the first whitespace.
+        """
+        return self._id
+
+    @property
+    def comment(self):
+        """
+        The comment associated with the sequence in the FASTA file, equal to
+        the contents of the FASTA header following the first whitespace
+        """
+        return self._comment
+
+    @property
+    def sequence(self):
+        """
+        The sequence for the record as present in the FASTA file.
+        (Newlines are removed but otherwise no sequence normalization
+        is performed).
+        """
+        return self._sequence
+
+    @property
+    @deprecated
+    def length(self):
+        """
+        Get the length of the FASTA sequence
+        """
+        return len(self._sequence)
+
+    @property
+    def md5(self):
+        """
+        The MD5 checksum (hex digest) of `sequence`
+        """
+        return self._md5
+
+    @classmethod
+    def fromString(cls, s):
+        """
+        Interprets a string as a FASTA record.  Does not make any
+        assumptions about wrapping of the sequence string.
+        """
+        try:
+            lines = s.splitlines()
+            assert len(lines) > 1
+            assert lines[0][0] == cls.DELIMITER
+            header = lines[0][1:]
+            sequence = "".join(lines[1:])
+            return FastaRecord(header, sequence)
+        except AssertionError:
+            raise ValueError("String not recognized as a valid FASTA record")
+
+    def reverseComplement(self, preserveHeader=False):
+        """
+        Return a new FastaRecord with the reverse-complemented DNA sequence.
+        Optionally, supply a name
+        """
+        rcSequence = sequence.reverseComplement(self.sequence)
+        if preserveHeader:
+            return FastaRecord(self.header, rcSequence)
+        else:
+            rcName = '{0} [revcomp]'.format(self.header.strip())
+            return FastaRecord(rcName, rcSequence)
+
+    def __len__(self):
+        return len(self._sequence)
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return (self.header   == other.header and
+                    self.sequence == other.sequence)
+        else:
+            return False
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __repr__(self):
+        return "<FastaRecord: %s>" % self.header
+
+    def __str__(self):
+        """
+        Output a string representation of this FASTA record, observing
+        standard conventions about sequence wrapping.
+        """
+        return (">%s\n" % self.header) + \
+            wrap(self.sequence, self.COLUMNS)
+
+
+class FastaReader(ReaderBase):
+    """
+    Streaming reader for FASTA files, useable as a one-shot iterator
+    over FastaRecord objects.  Agnostic about line wrapping.
+
+    Example:
+
+    .. doctest::
+
+        >>> from pbcore.io import FastaReader
+        >>> from pbcore import data
+        >>> filename = data.getTinyFasta()
+        >>> r = FastaReader(filename)
+        >>> for record in r:
+        ...     print record.header, len(record.sequence), record.md5
+        ref000001|EGFR_Exon_2 183 e3912e9ceacd6538ede8c1b2adda7423
+        ref000002|EGFR_Exon_3 203 4bf218da37175a91869033024ac8f9e9
+        ref000003|EGFR_Exon_4 215 245bc7a046aad0788c22b071ed210f4d
+        ref000004|EGFR_Exon_5 157 c368b8191164a9d6ab76fd328e2803ca
+        >>> r.close()
+
+    """
+    DELIMITER = ">"
+
+    def __iter__(self):
+        try:
+            parts = splitFileContents(self.file, ">")
+            assert "" == next(parts)
+            for part in parts:
+                yield FastaRecord.fromString(">" + part)
+        except AssertionError:
+            raise ValueError("Invalid FASTA file")
+
+
+class FastaWriter(WriterBase):
+    """
+    A FASTA file writer class
+
+    Example:
+
+    .. doctest::
+
+        >>> from pbcore.io import FastaWriter
+        >>> with FastaWriter("output.fasta.gz") as writer:
+        ...     writer.writeRecord("dog", "GATTACA")
+        ...     writer.writeRecord("cat", "CATTACA")
+
+    (Notice that underlying file will be automatically closed after
+    exit from the `with` block.)
+
+    .. testcleanup::
+
+        import os; os.unlink("output.fasta.gz")
+
+    """
+    def writeRecord(self, *args):
+        """
+        Write a FASTA record to the file.  If given one argument, it is
+        interpreted as a ``FastaRecord``.  Given two arguments, they
+        are interpreted as the name and the sequence.
+        """
+        if len(args) not in (1, 2):
+            raise ValueError
+        if len(args) == 1:
+            record = args[0]
+            assert isinstance(record, FastaRecord)
+        else:
+            header, sequence = args
+            record = FastaRecord(header, sequence)
+        self.file.write(str(record))
+        self.file.write("\n")
+
+
+##
+## Utility functions for FastaReader
+##
+def wrap(s, columns):
+    return "\n".join(s[start:start+columns]
+                     for start in xrange(0, len(s), columns))
+
+
+
+# ------------------------------------------------------------------------------
+# IndexedFastaReader: random access Fasta class
+#
+
+FaiRecord = namedtuple("FaiRecord", ("id", "comment", "header", "length", "offset", "lineWidth", "stride"))
+
+def faiFilename(fastaFilename):
+    return fastaFilename + ".fai"
+
+def loadFastaIndex(faidxFilename, fastaView):
+
+    if not isfile(faidxFilename): # os.path.isfile
+        raise IOError("Companion FASTA index (.fai) file not found or "
+                      "malformatted! Use 'samtools faidx' to generate FASTA "
+                      "index.")
+
+    tbl = []
+    # NB: We have to look back in the FASTA to find the full header;
+    # only "id" makes it into the fai.
+    offsetEnd = 0
+    for line in open(faidxFilename):
+        length, offset, lineWidth, blen = map(int, line.split()[-4:])
+        newlineWidth = blen - lineWidth                                # 2 for DOS, 1 for UNIX
+        header_    = fastaView[offsetEnd:offset]
+        assert (header_[0] == ">" and header_[-1] == "\n")
+        header     = header_[1:-newlineWidth]
+        id, comment = splitFastaHeader(header)
+        q, r = divmod(length, lineWidth)
+        numNewlines = q + (r > 0)
+        offsetEnd = offset + length + numNewlines*newlineWidth
+        record = FaiRecord(id, comment, header, length, offset, lineWidth, blen)
+        tbl.append(record)
+    return tbl
+
+def fileOffset(faiRecord, pos):
+    """
+    Find the in-file position (in bytes) corresponding to the position
+    in the named contig, using the FASTA index.
+    """
+    q, r = divmod(pos, faiRecord.lineWidth)
+    offset = faiRecord.offset + q*faiRecord.stride + r
+    return offset
+
+class MmappedFastaSequence(Sequence):
+    """
+    A string-like view of a contig sequence that is backed by a file
+    using mmap.
+    """
+    def __init__(self, view, faiRecord):
+        self.view = view
+        self.faiRecord = faiRecord
+
+    def __getitem__(self, spec):
+        if isinstance(spec, slice):
+            start, stop, stride = spec.indices(len(self))
+            if stride != 1:
+                raise ValueError, "Unsupported stride"
+        elif spec < 0:
+            start = self.faiRecord.length + spec
+            stop = start + 1
+            stride = 1
+        else:
+            start = spec
+            stop = start + 1
+            stride = 1
+        if not (0 <= start <= stop <= self.faiRecord.length):
+            raise IndexError, "Out of bounds"
+        startOffset = fileOffset(self.faiRecord, start)
+        endOffset   = fileOffset(self.faiRecord, stop)
+        snip = self.view[startOffset:endOffset].translate(None, "\r\n")
+        return snip
+
+    def __len__(self):
+        return self.faiRecord.length
+
+    def __eq__(self, other):
+        return (isinstance(other, MmappedFastaSequence) and
+                self[:] == other[:])
+
+class IndexedFastaRecord(object):
+    def __init__(self, view, faiRecord):
+        self.view = view
+        self.faiRecord = faiRecord
+
+    @property
+    def name(self):
+        return self.header
+
+    @property
+    def header(self):
+        return self.faiRecord.header
+
+    @property
+    def id(self):
+        return self.faiRecord.id
+
+    @property
+    def comment(self):
+        return self.faiRecord.comment
+
+    @property
+    def sequence(self):
+        return MmappedFastaSequence(self.view, self.faiRecord)
+
+    @property
+    @deprecated
+    def length(self):
+        return self.faiRecord.length
+
+    def __len__(self):
+        return self.faiRecord.length
+
+    def __repr__(self):
+        return "<IndexedFastaRecord: %s>" % self.header
+
+    def __eq__(self, other):
+        return (isinstance(other, IndexedFastaRecord) and
+                self.header == other.header and
+                self.sequence == other.sequence)
+
+class IndexedFastaReader(ReaderBase, Sequence):
+    """
+    Random-access FASTA file reader.
+
+    Requires that the lines of the FASTA file be fixed-length and that
+    there is a FASTA index file (generated by `samtools faidx`) with
+    name `fastaFilename.fai` in the same directory.
+
+    .. doctest::
+
+        >>> from pbcore.io import FastaTable
+        >>> from pbcore import data
+        >>> filename = data.getFasta()
+        >>> t = IndexedFastaReader(filename)
+        >>> print t[:4] # doctest: +NORMALIZE_WHITESPACE
+        [<IndexedFastaRecord: ref000001|EGFR_Exon_2>,
+         <IndexedFastaRecord: ref000002|EGFR_Exon_3>,
+         <IndexedFastaRecord: ref000003|EGFR_Exon_4>,
+         <IndexedFastaRecord: ref000004|EGFR_Exon_5>]
+        >>> t.close()
+
+    """
+    def __init__(self, filename):
+        self.filename = abspath(expanduser(filename))
+        self.file = open(self.filename, "r")
+        self.view = mmap.mmap(self.file.fileno(), 0,
+                              prot=mmap.PROT_READ)
+        self.faiFilename = faiFilename(self.filename)
+        self.fai = loadFastaIndex(self.faiFilename, self.view)
+        self.contigLookup = self._loadContigLookup()
+
+    def _loadContigLookup(self):
+        contigLookup = dict()
+        for (pos, faiRecord) in enumerate(self.fai):
+            contigLookup[pos]              = faiRecord
+            contigLookup[faiRecord.id]     = faiRecord
+            contigLookup[faiRecord.header] = faiRecord
+        return contigLookup
+
+    def __getitem__(self, key):
+        if key < 0:
+            key = len(self) + key
+
+        if isinstance(key, slice):
+            indices = xrange(*key.indices(len(self)))
+            return [ IndexedFastaRecord(self.view, self.contigLookup[i])
+                     for i in indices ]
+        elif key in self.contigLookup:
+            return IndexedFastaRecord(self.view, self.contigLookup[key])
+        else:
+            raise IndexError, "Contig not in FastaTable"
+
+    def __iter__(self):
+        return (self[i] for i in xrange(len(self)))
+
+    def __len__(self):
+        return len(self.fai)
+
+# old name for IndexedFastaReader was FastaTable
+FastaTable = IndexedFastaReader
diff --git a/pbcore/io/FastqIO.py b/pbcore/io/FastqIO.py
new file mode 100644
index 0000000..3a8706b
--- /dev/null
+++ b/pbcore/io/FastqIO.py
@@ -0,0 +1,259 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+"""
+I/O support for FASTQ files
+"""
+
+__all__ = [ "FastqRecord",
+            "FastqReader",
+            "FastqWriter",
+            "qvsFromAscii",
+            "asciiFromQvs" ]
+import numpy as np
+from .base import ReaderBase, WriterBase
+from .FastaIO import splitFastaHeader
+from pbcore import sequence
+from pbcore.util.decorators import deprecated
+
+class FastqRecord(object):
+    """
+    A ``FastqRecord`` object models a named sequence and its quality
+    values in a FASTQ file.  For reference consult `Wikipedia's FASTQ
+    entry`_. We adopt the Sanger encoding convention, allowing the
+    encoding of QV values in [0, 93] using ASCII 33 to 126. We only
+    support FASTQ files in the four-line convention (unwrapped).
+    Wrapped FASTQ files are generally considered a bad idea as the @,
+    + delimiters can also appear in the quality string, thus parsing
+    cannot be done safely.
+
+    .. _Wikipedia's FASTQ entry: http://en.wikipedia.org/wiki/FASTQ_format
+    """
+    DELIMITER1 = "@"
+    DELIMITER2 = "+"
+
+    def __init__(self, header, sequence, quality=None, qualityString=None):
+        try:
+            assert "\n" not in header
+            assert "\n" not in sequence
+            self._header = header
+            self._sequence = sequence
+            self._id, self._comment = splitFastaHeader(header)
+
+            # Only one of quality, qualityString should be provided
+            assert (quality is None) != (qualityString is None)
+            if quality is not None:
+                self._quality = quality
+            else:
+                self._quality = qvsFromAscii(qualityString)
+            assert len(self.sequence) == len(self.quality)
+        except AssertionError:
+            raise ValueError("Invalid FASTQ record data")
+
+    @property
+    def header(self):
+        """
+        The header of the sequence in the FASTQ file
+        """
+        return self._header
+
+    @property
+    def name(self):
+        """
+        DEPRECATED: The name of the sequence in the FASTQ file
+        """
+        return self._header
+
+    @property
+    def id(self):
+        """
+        The id of the sequence in the FASTQ file, equal to the FASTQ header
+        up to the first whitespace.
+        """
+        return self._id
+
+    @property
+    @deprecated
+    def length(self):
+        """
+        The length of the sequence
+        """
+        return len(self.sequence)
+
+    @property
+    def comment(self):
+        """
+        The comment associated with the sequence in the FASTQ file, equal to
+        the contents of the FASTQ header following the first whitespace
+        """
+        return self._comment
+
+    @property
+    def sequence(self):
+        """
+        The sequence for the record as present in the FASTQ file.
+        """
+        return self._sequence
+
+    @property
+    def quality(self):
+        """
+        The quality values, as an array of integers
+        """
+        return self._quality
+
+    @property
+    def qualityString(self):
+        """
+        The quality values as an ASCII-encoded string
+        """
+        return asciiFromQvs(self._quality)
+
+    @classmethod
+    def fromString(cls, s):
+        """
+        Interprets a string as a FASTQ record. Only supports four-line
+        format, as wrapped FASTQs can't easily be safely parsed.
+        """
+        try:
+            lines = s.rstrip().splitlines()
+            assert len(lines) == 4
+            assert lines[0][0] == cls.DELIMITER1
+            assert lines[2][0] == cls.DELIMITER2
+            assert len(lines[1]) == len(lines[3])
+            header = lines[0][1:]
+            sequence = lines[1]
+            quality = qvsFromAscii(lines[3])
+            return FastqRecord(header, sequence, quality)
+        except AssertionError:
+            raise ValueError("String not recognized as a valid FASTQ record")
+
+    def reverseComplement(self, preserveHeader=False):
+        """
+        Return a new FastaRecord with the reverse-complemented DNA sequence.
+        Optionally, supply a name
+        """
+        rcSequence = sequence.reverseComplement(self.sequence)
+        rcQuality = sequence.reverse(self.quality)
+        if preserveHeader:
+            return FastqRecord(self.header, rcSequence, rcQuality)
+        else:
+            rcName = '{0} [revcomp]'.format(self.header.strip())
+            return FastqRecord(rcName, rcSequence, rcQuality)
+
+    def __len__(self):
+        return len(self._sequence)
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return (self.header   == other.header and
+                    self.sequence == other.sequence and
+                    np.array_equiv(self.quality, other.quality))
+        else:
+            return False
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __str__(self):
+        """
+        Output a string representation of this FASTQ record, in
+        standard four-line format.
+        """
+        return "\n".join([self.DELIMITER1 + self.header,
+                          self.sequence,
+                          self.DELIMITER2,
+                          self.qualityString])
+
+class FastqReader(ReaderBase):
+    """
+    Reader for FASTQ files, useable as a one-shot iterator over
+    FastqRecord objects.  FASTQ files must follow the four-line
+    convention.
+    """
+    def __iter__(self):
+        """
+        One-shot iteration support
+        """
+        while True:
+            lines = [next(self.file) for i in xrange(4)]
+            yield FastqRecord(lines[0][1:-1],
+                              lines[1][:-1],
+                              qualityString=lines[3][:-1])
+
+
+class FastqWriter(WriterBase):
+    """
+    A FASTQ file writer class
+
+    Example:
+
+    .. doctest::
+
+        >>> from pbcore.io import FastqWriter
+        >>> with FastqWriter("output.fq.gz") as writer:
+        ...     writer.writeRecord("dog", "GATTACA", [35]*7)
+        ...     writer.writeRecord("cat", "CATTACA", [35]*7)
+
+    .. testcleanup::
+
+        import os; os.unlink("output.fq.gz")
+
+    (Notice that underlying file will be automatically closed after
+    exit from the `with` block.)
+    """
+    def writeRecord(self, *args):
+        """
+        Write a FASTQ record to the file.  If given one argument, it is
+        interpreted as a ``FastqRecord``.  Given three arguments, they
+        are interpreted as the name, sequence, and quality.
+        """
+        if len(args) not in (1, 3):
+            raise ValueError
+        if len(args) == 1:
+            record = args[0]
+            assert isinstance(record, FastqRecord)
+        else:
+            header, sequence, quality = args
+            record = FastqRecord(header, sequence, quality)
+        self.file.write(str(record))
+        self.file.write("\n")
+
+
+##
+## Utility
+##
+def qvsFromAscii(s):
+    return (np.fromstring(s, dtype=np.uint8) - 33)
+
+def asciiFromQvs(a):
+    return (np.clip(a, 0, 93).astype(np.uint8) + 33).tostring()
diff --git a/pbcore/io/FofnIO.py b/pbcore/io/FofnIO.py
new file mode 100644
index 0000000..10d6bdb
--- /dev/null
+++ b/pbcore/io/FofnIO.py
@@ -0,0 +1,96 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,g SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Authors: David Alexander
+
+from pbcore.io.base import getFileHandle
+from os.path import dirname, isabs, join, abspath, expanduser
+import xml.etree.ElementTree as ET
+
+
+__all__ = [ "readFofn",
+            "readInputXML",
+            "enumeratePulseFiles" ]
+
+def readFofn(f):
+    """
+    Return iterator over filenames in a FOFN ("file-of-filenames")
+    file or file-like object.
+
+    If f is a path to a true FOFN on disk, any paths listed in the
+    FOFN that are relative (i.e., do not contain a leading '/') will
+    be reckoned from the directory containing the FOFN.
+    """
+    if isinstance(f, basestring):
+        fofnRoot = dirname(abspath(expanduser(f)))
+    else:
+        fofnRoot = None
+
+    for line in getFileHandle(f):
+        path = line.rstrip()
+        if not path:
+            continue            # skip empty lines
+        elif isabs(path):
+            yield path
+        elif fofnRoot is not None:
+            yield join(fofnRoot, path)
+        else:
+            raise IOError, "Cannot handle relative paths in StringIO FOFN"
+
+def readInputXML(fname):
+    tree = ET.parse(fname)
+    root = tree.getroot()
+    for elt in root.iter():
+        if elt.tag=="location":
+            yield elt.text
+
+def enumeratePulseFiles(fname):
+    """
+    A pulse file is a file with suffix .bax.h5, .plx.h5, or bas.h5
+
+    fname is either a name of a pulse file, a list of names of pulse
+    files, a FOFN (file of file names) listing pulse files, or an
+    input.xml file.
+
+    This is a generalization of readFofn for the case where fname is
+    of type fofn|pulse, provided for convenience for tools that accept
+    such an argument.
+    """
+    if isinstance(fname, list):
+        for fname_ in fname:
+            yield fname_
+    elif fname.endswith(".fofn"):
+        for pls in readFofn(fname):
+            yield pls
+    elif fname.endswith(".xml"):
+        for pls in readInputXML(fname):
+            yield pls
+    else:
+        yield fname
diff --git a/pbcore/io/GffIO.py b/pbcore/io/GffIO.py
new file mode 100644
index 0000000..1a3a2ce
--- /dev/null
+++ b/pbcore/io/GffIO.py
@@ -0,0 +1,233 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+"""
+I/O support for GFF3 files.
+
+The specification for the GFF format is available at
+    http://www.sequenceontology.org/gff3.shtml
+"""
+
+__all__ = [ "Gff3Record",
+            "GffReader",
+            "GffWriter" ]
+
+from .base import ReaderBase, WriterBase
+from collections import OrderedDict
+from copy import copy as shallow_copy
+
+class Gff3Record(object):
+    """
+    Class for GFF record, providing uniform access to standard
+    GFF fields and attributes.
+
+    .. doctest::
+
+        >>> from pbcore.io import Gff3Record
+        >>> record = Gff3Record("chr1", 10, 11, "insertion",
+        ...                     attributes=[("foo", "1"), ("bar", "2")])
+        >>> record.start
+        10
+        >>> record.foo
+        '1'
+        >>> record.baz = 3
+        >>> del record.baz
+
+    Attribute access using record.fieldName notation raises ``ValueError``
+    if an attribute named fieldName doesn't exist.  Use::
+
+        >>> record.get(fieldName)
+
+    to fetch a field or attribute with None default or::
+
+        >>> record.get(fieldName, defaultValue)
+
+    to fetch the field or attribute with a custom default.
+    """
+    _GFF_COLUMNS = [ "seqid", "source", "type",
+                     "start", "end", "score",
+                     "strand", "phase", "attributes" ]
+
+    def __init__(self, seqid, start, end, type,
+                 score=".", strand=".", phase=".",
+                 source=".", attributes=()):
+        self.seqid  = seqid
+        self.source = source
+        self.type   = type
+        self.start  = start
+        self.end    = end
+        self.score  = score
+        self.strand = strand
+        self.phase  = phase
+        self.attributes = OrderedDict(attributes)
+
+    def copy(self):
+        """
+        Return a shallow copy
+        """
+        return shallow_copy(self)
+
+    @classmethod
+    def fromString(cls, s):
+        """
+        Parse a string as a GFF record.
+        Trailing whitespace is ignored.
+        """
+        columns = s.rstrip().rstrip(";").split("\t")
+        try:
+            assert len(columns) == len(cls._GFF_COLUMNS)
+            attributes = map(tupleFromGffAttribute, columns[-1].split(";"))
+            (_seqid, _source, _type, _start,
+             _end, _score, _strand, _phase)  = columns[:-1]
+            return Gff3Record(_seqid, int(_start), int(_end), _type,
+                              _score, _strand, _phase, _source, attributes)
+        except (AssertionError, ValueError):
+            raise ValueError("Could not interpret string as a Gff3Record: %s" % s)
+
+
+    @staticmethod
+    def _formatField(field):
+        if type(field) == float:
+            return "%.2f" % field
+        else:
+            return "%s" % field
+
+    def __str__(self):
+        formattedAttributes = ";".join(
+            ("%s=%s" % (k, self._formatField(v))
+             for (k, v) in self.attributes.iteritems()))
+        formattedFixedColumns = "\t".join(
+            self._formatField(getattr(self, k))
+            for k in self._GFF_COLUMNS[:-1])
+        return "%s\t%s" % (formattedFixedColumns,
+                           formattedAttributes)
+
+    #
+    # Access to the attributes list using
+    # dot notation, providing a uniform
+    # interface.  Exception if attribute
+    # not found.
+    #
+    def __getattr__(self, name):
+        if name in self.attributes:
+            return self.attributes[name]
+        else:
+            raise AttributeError
+
+    def __setattr__(self, name, value):
+        if name in self._GFF_COLUMNS:
+            object.__setattr__(self, name, value)
+        else:
+            self.attributes[name] = value
+
+    def __delattr__(self, name):
+        del self.attributes[name]
+
+    #
+    # Access without exceptions.
+    #
+    def get(self, name, default=None):
+        return getattr(self, name, default)
+
+    def put(self, name, value):
+        setattr(self, name, value)
+
+class GffReader(ReaderBase):
+    """
+    A GFF file reader class
+    """
+    def _readHeaders(self):
+        headers = []
+        firstLine = None
+        for line in self.file:
+            if line.startswith("##"):
+                headers.append(line.rstrip())
+            else:
+                firstLine = line
+                break
+        return headers, firstLine
+
+    def __init__(self, f):
+        super(GffReader, self).__init__(f)
+        self.headers, self.firstLine = self._readHeaders()
+
+    def __iter__(self):
+        if self.firstLine:
+            yield Gff3Record.fromString(self.firstLine)
+            self.firstLine = None
+        for line in self.file:
+            yield Gff3Record.fromString(line)
+
+
+class GffWriter(WriterBase):
+    """
+    A GFF file writer class
+    """
+    def __init__(self, f):
+        super(GffWriter, self).__init__(f)
+        self.writeHeader("##gff-version 3")
+
+    def writeHeader(self, headerLine):
+        if not headerLine.startswith("##"):
+            raise ValueError("GFF headers must start with ##")
+        self.file.write("{0}\n".format(headerLine.rstrip()))
+
+    def writeRecord(self, record):
+        assert isinstance(record, Gff3Record)
+        self.file.write("{0}\n".format(str(record)))
+
+#
+# Utility functions
+#
+
+def floatValue(s):
+    try:
+        return float(s)
+    except:
+        return None
+
+def integerValue(s):
+    try:
+        return int(s)
+    except:
+        return None
+
+def grok(s):
+    iv = integerValue(s)
+    if iv is not None: return iv
+    fv = floatValue(s)
+    if fv is not None: return fv
+    return s
+
+def tupleFromGffAttribute(s):
+    k, v = s.split("=")
+    return k, grok(v)
diff --git a/pbcore/io/__init__.py b/pbcore/io/__init__.py
new file mode 100644
index 0000000..89a588f
--- /dev/null
+++ b/pbcore/io/__init__.py
@@ -0,0 +1,40 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+from .BasH5IO import *
+from .FastaIO import *
+from .FastqIO import *
+from .FofnIO  import *
+from .GffIO   import *
+from .base    import *
+
+from .align   import *
+
+from .opener  import *
diff --git a/pbcore/io/_utils.py b/pbcore/io/_utils.py
new file mode 100644
index 0000000..a138cb3
--- /dev/null
+++ b/pbcore/io/_utils.py
@@ -0,0 +1,246 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+from __future__ import absolute_import
+import h5py, numpy as np
+from cStringIO import StringIO
+
+
+def arrayFromDataset(ds, offsetBegin, offsetEnd):
+    """
+    Extract a one-dimensional array from an HDF5 dataset.
+    """
+    shape = (offsetEnd - offsetBegin,)
+    a = np.ndarray(shape=shape, dtype=ds.dtype)
+    mspace = h5py.h5s.create_simple(shape)
+    fspace = ds.id.get_space()
+    fspace.select_hyperslab((offsetBegin,), shape, (1,))
+    ds.id.read(mspace, fspace, a)
+    return a
+
+
+def splitFileContents(f, delimiter, BLOCKSIZE=8192):
+    """
+    Same semantics as f.read().split(delimiter), but with memory usage
+    determined by largest chunk rather than entire file size
+    """
+    remainder = StringIO()
+    while True:
+        block = f.read(BLOCKSIZE)
+        if not block:
+            break
+        parts = block.split(delimiter)
+        remainder.write(parts[0])
+        for part in parts[1:]:
+            yield remainder.getvalue()
+            remainder = StringIO()
+            remainder.write(part)
+    yield remainder.getvalue()
+
+
+
+# For reasons that are obscure to me, the recarray outer join
+# functionality in numpy's lib.recfunctions is broken as of numpy
+# 1.6.1.  Here is the implementation I found in matplotlib (BSD
+# compatible license; need to add license note to LICENSE), which
+# seems to work.
+#  --DHA
+
+def is_string_like(obj):
+    'Return True if *obj* looks like a string'
+    if isinstance(obj, (str, unicode)): return True
+    # numpy strings are subclass of str, ma strings are not
+    if ma.isMaskedArray(obj):
+        if obj.ndim == 0 and obj.dtype.kind in 'SU':
+            return True
+        else:
+            return False
+    try: obj + ''
+    except: return False
+    return True
+
+def rec_join(key, r1, r2, jointype='inner', defaults=None, r1postfix='1', r2postfix='2'):
+    """
+    Join record arrays *r1* and *r2* on *key*; *key* is a tuple of
+    field names -- if *key* is a string it is assumed to be a single
+    attribute name. If *r1* and *r2* have equal values on all the keys
+    in the *key* tuple, then their fields will be merged into a new
+    record array containing the intersection of the fields of *r1* and
+    *r2*.
+
+    *r1* (also *r2*) must not have any duplicate keys.
+
+    The *jointype* keyword can be 'inner', 'outer', 'leftouter'.  To
+    do a rightouter join just reverse *r1* and *r2*.
+
+    The *defaults* keyword is a dictionary filled with
+    ``{column_name:default_value}`` pairs.
+
+    The keywords *r1postfix* and *r2postfix* are postfixed to column names
+    (other than keys) that are both in *r1* and *r2*.
+    """
+
+    if is_string_like(key):
+        key = (key, )
+
+    for name in key:
+        if name not in r1.dtype.names:
+            raise ValueError('r1 does not have key field %s'%name)
+        if name not in r2.dtype.names:
+            raise ValueError('r2 does not have key field %s'%name)
+
+    def makekey(row):
+        return tuple([row[name] for name in key])
+
+    r1d = dict([(makekey(row),i) for i,row in enumerate(r1)])
+    r2d = dict([(makekey(row),i) for i,row in enumerate(r2)])
+
+    r1keys = set(r1d.keys())
+    r2keys = set(r2d.keys())
+
+    common_keys = r1keys & r2keys
+
+    r1ind = np.array([r1d[k] for k in common_keys])
+    r2ind = np.array([r2d[k] for k in common_keys])
+
+    common_len = len(common_keys)
+    left_len = right_len = 0
+    if jointype == "outer" or jointype == "leftouter":
+        left_keys = r1keys.difference(r2keys)
+        left_ind = np.array([r1d[k] for k in left_keys])
+        left_len = len(left_ind)
+    if jointype == "outer":
+        right_keys = r2keys.difference(r1keys)
+        right_ind = np.array([r2d[k] for k in right_keys])
+        right_len = len(right_ind)
+
+    def key_desc(name):
+        'if name is a string key, use the larger size of r1 or r2 before merging'
+        dt1 = r1.dtype[name]
+        if dt1.type != np.string_:
+            return (name, dt1.descr[0][1])
+
+        dt2 = r1.dtype[name]
+        assert dt2==dt1
+        if dt1.num>dt2.num:
+            return (name, dt1.descr[0][1])
+        else:
+            return (name, dt2.descr[0][1])
+
+
+    keydesc = [key_desc(name) for name in key]
+
+    def mapped_r1field(name):
+        """
+        The column name in *newrec* that corresponds to the column in *r1*.
+        """
+        if name in key or name not in r2.dtype.names: return name
+        else: return name + r1postfix
+
+    def mapped_r2field(name):
+        """
+        The column name in *newrec* that corresponds to the column in *r2*.
+        """
+        if name in key or name not in r1.dtype.names: return name
+        else: return name + r2postfix
+
+    r1desc = [(mapped_r1field(desc[0]), desc[1]) for desc in r1.dtype.descr if desc[0] not in key]
+    r2desc = [(mapped_r2field(desc[0]), desc[1]) for desc in r2.dtype.descr if desc[0] not in key]
+    newdtype = np.dtype(keydesc + r1desc + r2desc)
+
+    newrec = np.recarray((common_len + left_len + right_len,), dtype=newdtype)
+
+    if defaults is not None:
+        for thiskey in defaults:
+            if thiskey not in newdtype.names:
+                warnings.warn('rec_join defaults key="%s" not in new dtype names "%s"'%(
+                    thiskey, newdtype.names))
+
+    for name in newdtype.names:
+        dt = newdtype[name]
+        if dt.kind in ('f', 'i'):
+            newrec[name] = 0
+
+    if jointype != 'inner' and defaults is not None: # fill in the defaults enmasse
+        newrec_fields = newrec.dtype.fields.keys()
+        for k, v in defaults.items():
+            if k in newrec_fields:
+                newrec[k] = v
+
+    for field in r1.dtype.names:
+        newfield = mapped_r1field(field)
+        if common_len:
+            newrec[newfield][:common_len] = r1[field][r1ind]
+        if (jointype == "outer" or jointype == "leftouter") and left_len:
+            newrec[newfield][common_len:(common_len+left_len)] = r1[field][left_ind]
+
+    for field in r2.dtype.names:
+        newfield = mapped_r2field(field)
+        if field not in key and common_len:
+            newrec[newfield][:common_len] = r2[field][r2ind]
+        if jointype == "outer" and right_len:
+            newrec[newfield][-right_len:] = r2[field][right_ind]
+
+    newrec.sort(order=key)
+
+    return newrec
+
+
+def drop_fields(rec, names):
+    """
+    Return a new numpy record array with fields in *names* dropped.
+    """
+
+    names = set(names)
+    Nr = len(rec)
+
+    newdtype = np.dtype([(name, rec.dtype[name]) for name in rec.dtype.names
+                       if name not in names])
+
+    newrec = np.recarray(rec.shape, dtype=newdtype)
+    for field in newdtype.names:
+        newrec[field] = rec[field]
+
+    return newrec
+
+def print_rec_array(rec):
+    """
+    Pretty-print a recarray
+    """
+    print "foo"
+
+
+class CommonEqualityMixin(object):
+    def __eq__(self, other):
+        return (isinstance(other, self.__class__)
+            and self.__dict__ == other.__dict__)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
diff --git a/pbcore/io/align/BamAlignment.py b/pbcore/io/align/BamAlignment.py
new file mode 100644
index 0000000..f192ceb
--- /dev/null
+++ b/pbcore/io/align/BamAlignment.py
@@ -0,0 +1,571 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+from functools import wraps
+from bisect import bisect_right, bisect_left
+
+from pbcore.sequence import reverseComplement
+from ._BamSupport import *
+from ._AlignmentMixin import AlignmentRecordMixin
+
+__all__ = [ "BamAlignment" ]
+
+
+def _unrollCigar(cigar, exciseSoftClips=False):
+    """
+    Run-length decode the cigar (input is BAM packed CIGAR, not a cigar string)
+
+    Removes hard clip ops from the output.  Remove all?
+    """
+    cigarArray = np.array(cigar, dtype=int)
+    hasHardClipAtLeft = cigarArray[0,0] == BAM_CHARD_CLIP
+    hasHardClipAtRight = cigarArray[-1,0] == BAM_CHARD_CLIP
+    ncigar = len(cigarArray)
+    x = np.s_[int(hasHardClipAtLeft) : ncigar - int(hasHardClipAtRight)]
+    ops = np.repeat(cigarArray[x,0], cigarArray[x,1])
+    if exciseSoftClips:
+        return ops[ops != BAM_CSOFT_CLIP]
+    else:
+        return ops
+
+def _makePulseFeatureAccessor(featureName):
+    def f(self, aligned=True, orientation="native"):
+        return self.pulseFeature(featureName, aligned, orientation)
+    return f
+
+
+def requiresReference(method):
+    @wraps(method)
+    def f(bamAln, *args, **kwargs):
+        if not bamAln.bam.isReferenceLoaded:
+            raise UnavailableFeature, "this feature requires loaded reference sequence"
+        else:
+            return method(bamAln, *args, **kwargs)
+    return f
+
+def requiresPbi(method):
+    @wraps(method)
+    def f(bamAln, *args, **kwargs):
+        if bamAln.rowNumber is None:
+            raise UnavailableFeature, "this feature requires a PacBio BAM index"
+        else:
+            return method(bamAln, *args, **kwargs)
+    return f
+
+def requiresMapping(method):
+    @wraps(method)
+    def f(bamAln, *args, **kwargs):
+        if bamAln.isUnmapped:
+            raise UnavailableFeature, "this feature requires a *mapped* BAM record"
+        else:
+            return method(bamAln, *args, **kwargs)
+    return f
+
+
+class BamAlignment(AlignmentRecordMixin):
+    def __init__(self, bamReader, pysamAlignedRead, rowNumber=None):
+        #TODO: make these __slot__
+        self.peer        = pysamAlignedRead
+        self.bam         = bamReader
+        self.rowNumber   = rowNumber
+        self.tStart      = self.peer.pos
+        self.tEnd        = self.peer.aend
+        # Our terminology doesn't agree with pysam's terminology for
+        # "query", "read".  This makes this code confusing.
+        if self.peer.is_reverse:
+            clipLeft  = self.peer.rlen - self.peer.qend
+            clipRight = self.peer.qstart
+        else:
+            clipLeft  = self.peer.qstart
+            clipRight = self.peer.rlen - self.peer.qend
+        self.aStart = self.qStart + clipLeft
+        self.aEnd   = self.qEnd   - clipRight
+
+        # Cache of unrolled cigar, in genomic orientation
+        self._unrolledCigar = None
+
+
+    @property
+    def reader(self):
+        return self.bam
+
+    @property
+    def qId(self):
+        return self.readGroupInfo.ID
+
+    @property
+    def qName(self):
+        return self.peer.qname
+
+    @property
+    def qStart(self):
+        return self.peer.opt("qs")
+
+    @property
+    def qEnd(self):
+        return self.peer.opt("qe")
+
+    @property
+    def qLen(self):
+        return self.peer.query_length
+
+    @property
+    def tId(self):
+        return self.peer.tid
+
+    @property
+    def isMapped(self):
+        return not self.isUnmapped
+
+    @property
+    def isUnmapped(self):
+        return self.peer.is_unmapped
+
+    @property
+    def isReverseStrand(self):
+        return self.peer.is_reverse
+
+    @property
+    def isForwardStrand(self):
+        return not self.peer.is_reverse
+
+    @property
+    def HoleNumber(self):
+        return self.peer.opt("zm")
+
+    @property
+    def MapQV(self):
+        return self.peer.mapq
+
+    @requiresMapping
+    def clippedTo(self, refStart, refEnd):
+        """
+        Return a new `BamAlignment` that refers to a subalignment of
+        this alignment, as induced by clipping to reference
+        coordinates `refStart` to `refEnd`.
+
+        .. warning::
+            This function takes time linear in the length of the alignment.
+        """
+        assert type(self) is BamAlignment
+        if (refStart >= refEnd or
+            refStart >= self.tEnd or
+            refEnd   <= self.tStart):
+            raise IndexError, "Clipping query does not overlap alignment"
+
+        # The clipping region must intersect the alignment, though it
+        # does not have to be contained wholly within it.
+        refStart = max(self.referenceStart, refStart)
+        refEnd   = min(self.referenceEnd,   refEnd)
+        refPositions = self.referencePositions(orientation="genomic")
+        readPositions = self.readPositions(orientation="genomic")
+        uc = self.unrolledCigar(orientation="genomic")
+
+        # Clipping positions within the alignment array
+        clipStart = bisect_right(refPositions, refStart) - 1
+        clipEnd   = bisect_left(refPositions, refEnd)
+
+        tStart = refStart
+        tEnd   = refEnd
+        cUc = uc[clipStart:clipEnd]
+        readLength = sum(cUc != BAM_CDEL)
+        if self.isForwardStrand:
+            aStart = readPositions[clipStart]
+            aEnd = aStart + readLength
+        else:
+            aEnd   = readPositions[clipStart] + 1
+            aStart = aEnd - readLength
+        return ClippedBamAlignment(self, tStart, tEnd, aStart, aEnd, cUc)
+
+    @property
+    @requiresMapping
+    def referenceInfo(self):
+        return self.bam.referenceInfo(self.referenceId)
+
+    @property
+    @requiresMapping
+    def referenceName(self):
+        return self.referenceInfo.FullName
+
+    @property
+    def movieName(self):
+        return self.readGroupInfo.MovieName
+
+    @property
+    def readGroupInfo(self):
+        return self.bam.readGroupInfo(rgAsInt(self.peer.opt("RG")))
+
+    @property
+    def readType(self):
+        return self.readGroupInfo.ReadType
+
+    @property
+    def sequencingChemistry(self):
+        return self.readGroupInfo.SequencingChemistry
+
+    @property
+    def referenceId(self):
+        return self.tId
+
+    @property
+    def queryStart(self):
+        return self.qStart
+
+    @property
+    def queryEnd(self):
+        return self.qEnd
+
+
+    #TODO: provide this in cmp.h5 but throw "unsupported"
+    @property
+    def queryName(self):
+        return self.peer.qname
+
+    @property
+    @requiresPbi
+    def identity(self):
+        if self.readLength == 0:
+            return 0.
+        else:
+            return 1. - float(self.nMM + self.nIns + self.nDel)/self.readLength
+
+    @property
+    def numPasses(self):
+        return self.peer.opt("np")
+
+    @property
+    def zScore(self):
+        raise UnavailableFeature("No ZScore in BAM")
+
+    @property
+    def barcode(self):
+        raise Unimplemented()
+
+    @property
+    def barcodeName(self):
+        raise Unimplemented()
+
+    @requiresReference
+    def transcript(self, orientation="native", style="gusfield"):
+        """
+        A text representation of the alignment moves (see Gusfield).
+        This can be useful in pretty-printing an alignment.
+        """
+        uc = self.unrolledCigar(orientation)
+        ref = np.fromstring(self.reference(aligned=True, orientation=orientation), dtype=np.int8)
+        read = np.fromstring(self.read(aligned=True, orientation=orientation), dtype=np.int8)
+        isMatch = (ref == read)
+
+        # Disambiguate the "M" op
+        cigarPlus = uc
+        cigarPlus[(~isMatch) & (cigarPlus == BAM_CMATCH)] = BAM_CDIFF   # 'X'
+        cigarPlus[( isMatch) & (cigarPlus == BAM_CMATCH)] = BAM_CEQUAL  # '='
+
+        #                                    MIDNSHP=X
+        _exoneratePlusTrans = np.fromstring("Z  ZZZZ|*", dtype=np.int8)
+        _exonerateTrans     = np.fromstring("Z  ZZZZ| ", dtype=np.int8)
+        _cigarTrans         = np.fromstring("ZIDZZZZMM", dtype=np.int8)
+        _gusfieldTrans      = np.fromstring("ZIDZZZZMR", dtype=np.int8)
+
+        if   style == "exonerate+": return _exoneratePlusTrans [cigarPlus].tostring()
+        elif style == "exonerate":  return _exonerateTrans     [cigarPlus].tostring()
+        elif style == "cigar":      return _cigarTrans         [cigarPlus].tostring()
+        else:                       return _gusfieldTrans      [cigarPlus].tostring()
+
+
+    @requiresReference
+    def reference(self, aligned=True, orientation="native"):
+        if not (orientation == "native" or orientation == "genomic"):
+            raise ValueError, "Bad `orientation` value"
+        tSeq = self.bam.referenceFasta[self.referenceName].sequence[self.tStart:self.tEnd]
+        shouldRC = orientation == "native" and self.isReverseStrand
+        tSeqOriented = reverseComplement(tSeq) if shouldRC else tSeq
+        if aligned:
+            x = np.fromstring(tSeqOriented, dtype=np.int8)
+            y = self._gapifyRef(x, orientation)
+            return y.tostring()
+        else:
+            return tSeqOriented
+
+    @requiresMapping
+    def unrolledCigar(self, orientation="native"):
+        """
+        Run-length decode the CIGAR encoding, and orient.  Clipping ops are removed.
+        """
+        if self.isUnmapped: return None
+
+        if self._unrolledCigar is None:
+            self._unrolledCigar = _unrollCigar(self.peer.cigar, exciseSoftClips=True)
+
+        if (orientation == "native" and self.isReverseStrand):
+            return self._unrolledCigar[::-1]
+        else:
+            return self._unrolledCigar
+
+    @requiresMapping
+    def referencePositions(self, aligned=True, orientation="native"):
+        """
+        Returns an array of reference positions.
+
+        If aligned is True, the array has the same length as the
+        alignment and referencePositions[i] = reference position of
+        the i'th column in the oriented alignment.
+
+        If aligned is False, the array has the same length as the read
+        and referencePositions[i] = reference position of the i'th
+        base in the oriented read.
+        """
+        assert (aligned in (True, False) and
+                orientation in ("native", "genomic"))
+
+        ucOriented = self.unrolledCigar(orientation)
+        refNonGapMask = (ucOriented != BAM_CINS)
+
+        if self.isReverseStrand and orientation == "native":
+            pos = self.tEnd - 1 - np.hstack([0, np.cumsum(refNonGapMask[:-1])])
+        else:
+            pos = self.tStart + np.hstack([0, np.cumsum(refNonGapMask[:-1])])
+
+        if aligned:
+            return pos
+        else:
+            return pos[ucOriented != BAM_CDEL]
+
+    def readPositions(self, aligned=True, orientation="native"):
+        """
+        Returns an array of read positions.
+
+        If aligned is True, the array has the same length as the
+        alignment and readPositions[i] = read position of the i'th
+        column in the oriented alignment.
+
+        If aligned is False, the array has the same length as the
+        mapped reference segment and readPositions[i] = read position
+        of the i'th base in the oriented reference segment.
+        """
+        assert (aligned in (True, False) and
+                orientation in ("native", "genomic"))
+
+        ucOriented = self.unrolledCigar(orientation)
+        readNonGapMask = (ucOriented != BAM_CDEL)
+
+        if self.isReverseStrand and orientation == "genomic":
+            pos = self.aEnd - 1 - np.hstack([0, np.cumsum(readNonGapMask[:-1])])
+        else:
+            pos = self.aStart + np.hstack([0, np.cumsum(readNonGapMask[:-1])])
+
+        if aligned:
+            return pos
+        else:
+            return pos[ucOriented != BAM_CINS]
+
+
+    def pulseFeature(self, featureName, aligned=True, orientation="native"):
+        """
+        Retrieve the pulse feature as indicated.
+        - `aligned`    : whether gaps should be inserted to reflect the alignment
+        - `orientation`: "native" or "genomic"
+
+        Note that this function assumes the the feature is stored in
+        native orientation in the file, so it is not appropriate to
+        use this method to fetch the read or the qual, which are
+        oriented genomically in the file.
+        """
+        if not (orientation == "native" or orientation == "genomic"):
+            raise ValueError, "Bad `orientation` value"
+        if self.isUnmapped and (orientation != "native" or aligned == True):
+            raise UnavailableFeature, \
+                "Cannot get genome oriented/aligned features from unmapped BAM record"
+        # 1. Extract in native orientation
+        tag, kind_, dtype_ = PULSE_FEATURE_TAGS[featureName]
+        data_ = self.peer.opt(tag)
+        if isinstance(data_, str):
+            data = np.fromstring(data_, dtype=dtype_)
+        else:
+            # This is about 300x slower than the fromstring above.
+            # Unless pysam exposes  buffer or numpy interface,
+            # is is going to be very slow.
+            data = np.fromiter(data_, dtype=dtype_)
+        del data_
+        assert len(data) == self.peer.rlen
+
+        # 2. Decode
+        if kind_ == "qv":
+            data -= 33
+        elif kind_ == "time":
+            data = codeToFrames(data)
+
+        # 3. Clip
+        # [s, e) delimits the range, within the query, that is in the aligned read.
+        # This will be determined by the soft clips actually in the file as well as those
+        # imposed by the clipping API here.
+        s = self.aStart - self.qStart
+        e = self.aEnd   - self.qStart
+        assert s >= 0 and e <= len(data)
+        clipped = data[s:e]
+
+        # 4. Orient
+        shouldReverse = self.isReverseStrand and orientation == "genomic"
+        if kind_ == "base":
+            ungapped = reverseComplementAscii(clipped) if shouldReverse else clipped
+        else:
+            ungapped = clipped[::-1] if shouldReverse else clipped
+
+        # 5. Gapify if requested
+        if aligned == False:
+            return ungapped
+        else:
+            return self._gapifyRead(ungapped, orientation)
+
+    def _gapifyRead(self, data, orientation):
+        return self._gapify(data, orientation, BAM_CDEL)
+
+    def _gapifyRef(self, data, orientation):
+        return self._gapify(data, orientation, BAM_CINS)
+
+    def _gapify(self, data, orientation, gapOp):
+        if self.isUnmapped: return data
+
+        # Precondition: data must already be *in* the specified orientation
+        if data.dtype == np.int8:
+            gapCode = ord("-")
+        else:
+            gapCode = data.dtype.type(-1)
+        uc = self.unrolledCigar(orientation=orientation)
+        alnData = np.repeat(np.array(gapCode, dtype=data.dtype), len(uc))
+        gapMask = (uc == gapOp)
+        alnData[~gapMask] = data
+        return alnData
+
+    IPD            = _makePulseFeatureAccessor("IPD")
+    PulseWidth     = _makePulseFeatureAccessor("PulseWidth")
+    #QualityValue   = _makePulseFeatureAccessor("QualityValue")
+    InsertionQV    = _makePulseFeatureAccessor("InsertionQV")
+    DeletionQV     = _makePulseFeatureAccessor("DeletionQV")
+    DeletionTag    = _makePulseFeatureAccessor("DeletionTag")
+    MergeQV        = _makePulseFeatureAccessor("MergeQV")
+    SubstitutionQV = _makePulseFeatureAccessor("SubstitutionQV")
+
+    def read(self, aligned=True, orientation="native"):
+        if not (orientation == "native" or orientation == "genomic"):
+            raise ValueError, "Bad `orientation` value"
+        if self.isUnmapped and (orientation != "native" or aligned == True):
+            raise UnavailableFeature, \
+                "Cannot get genome oriented/aligned features from unmapped BAM record"
+        data = np.fromstring(self.peer.seq, dtype=np.int8)
+        s = self.aStart - self.qStart
+        e = self.aEnd   - self.qStart
+        l = self.qLen
+        # clip
+        assert l == len(data) and s >= 0 and e <= l
+        if self.isForwardStrand: clipped = data[s:e]
+        else:                    clipped = data[(l-e):(l-s)]
+        # orient
+        shouldReverse = self.isReverseStrand and orientation == "native"
+        ungapped = reverseComplementAscii(clipped) if shouldReverse else clipped
+        # gapify
+        if aligned: r = self._gapifyRead(ungapped, orientation)
+        else:       r = ungapped
+        return r.tostring()
+
+    def __repr__(self):
+        if self.isUnmapped:
+            return "Unmapped BAM record: " + self.queryName
+        else:
+            return "BAM alignment: %s @ %s  %3d  %9d  %9d" \
+            % (self.queryName, ("+" if self.isForwardStrand else "-"),
+               self.referenceId, self.tStart, self.tEnd)
+
+    def __str__(self):
+        if self.bam.isReferenceLoaded:
+            COLUMNS = 80
+            val = ""
+            val += repr(self) + "\n\n"
+            val += "Read:        " + self.readName           + "\n"
+            val += "Reference:   " + self.referenceName      + "\n\n"
+            val += "Read length: " + str(self.readLength)    + "\n"
+            #val += "Identity:    " + "%0.3f" % self.identity + "\n"
+
+            alignedRead = self.read()
+            alignedRef = self.reference()
+            transcript = self.transcript(style="exonerate+")
+            refPos = self.referencePositions()
+            refPosString = "".join([str(pos % 10) for pos in refPos])
+            for i in xrange(0, len(alignedRef), COLUMNS):
+                val += "\n"
+                val += "  " + refPosString[i:i+COLUMNS] + "\n"
+                val += "  " + alignedRef  [i:i+COLUMNS] + "\n"
+                val += "  " + transcript  [i:i+COLUMNS] + "\n"
+                val += "  " + alignedRead [i:i+COLUMNS] + "\n"
+                val += "\n"
+            return val
+        else:
+            return repr(self)
+
+    def __cmp__(self, other):
+        return cmp((self.referenceId, self.tStart, self.tEnd),
+                   (other.referenceId, other.tStart, other.tEnd))
+
+    @requiresPbi
+    def __getattr__(self, key):
+        if key in self.bam.pbi.columnNames:
+            return self.bam.pbi[self.rowNumber][key]
+        else:
+            raise AttributeError, "no such column in pbi index"
+
+    def __dir__(self):
+        if self.bam.pbi is not None:
+            return self.bam.pbi.columnNames
+
+class ClippedBamAlignment(BamAlignment):
+    def __init__(self, aln, tStart, tEnd, aStart, aEnd, unrolledCigar):
+        # Self-consistency checks
+        assert aln.isMapped
+        assert tStart <= tEnd
+        assert aStart <= aEnd
+        assert sum(unrolledCigar != BAM_CDEL) == (aEnd - aStart)
+
+        # Assigment
+        self.peer           = aln.peer
+        self.bam            = aln.bam
+        self.rowNumber      = aln.rowNumber
+        self.tStart         = tStart
+        self.tEnd           = tEnd
+        self.aStart         = aStart
+        self.aEnd           = aEnd
+        self._unrolledCigar = unrolledCigar  # genomic orientation
+
+    def unrolledCigar(self, orientation="native"):
+        if orientation=="native" and self.isReverseStrand:
+            return self._unrolledCigar[::-1]
+        else:
+            return self._unrolledCigar
diff --git a/pbcore/io/align/BamIO.py b/pbcore/io/align/BamIO.py
new file mode 100644
index 0000000..01de9ce
--- /dev/null
+++ b/pbcore/io/align/BamIO.py
@@ -0,0 +1,394 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+__all__ = [ "BamReader", "IndexedBamReader" ]
+
+from pysam import Samfile
+from pbcore.io import FastaTable
+from pbcore.chemistry import decodeTriple, ChemistryLookupError
+
+import numpy as np
+from itertools import groupby
+from functools import wraps
+from os.path import abspath, expanduser, exists
+
+from ..base import ReaderBase
+from .PacBioBamIndex import PacBioBamIndex
+from .BamAlignment import *
+from ._BamSupport import *
+from ._AlignmentMixin import AlignmentReaderMixin, IndexedAlignmentReaderMixin
+
+
+def requiresBai(method):
+    @wraps(method)
+    def f(bamReader, *args, **kwargs):
+        if not bamReader.peer._hasIndex():
+            raise UnavailableFeature, "this feature requires an standard BAM index file (bam.bai)"
+        else:
+            return method(bamReader, *args, **kwargs)
+    return f
+
+
+class _BamReaderBase(ReaderBase):
+    """
+    The BamReader class provides a high-level interface to PacBio BAM
+    files.  If a PacBio BAM index (bam.pbi file) is present and the
+    user instantiates the BamReader using the reference FASTA as the
+    second argument, the BamReader will provide an interface
+    compatible with CmpH5Reader.
+    """
+    def _loadReferenceInfo(self):
+        refRecords = self.peer.header["SQ"]
+        refNames   = [r["SN"] for r in refRecords]
+        refLengths = [r["LN"] for r in refRecords]
+        refMD5s    = [r["M5"] for r in refRecords]
+        refIds = map(self.peer.gettid, refNames)
+        nRefs = len(refRecords)
+
+        if nRefs > 0:
+            self._referenceInfoTable = np.rec.fromrecords(zip(
+                refIds,
+                refIds,
+                refNames,
+                refNames,
+                refLengths,
+                refMD5s,
+                np.zeros(nRefs, dtype=np.uint32),
+                np.zeros(nRefs, dtype=np.uint32)),
+                dtype=[('ID', '<i8'), ('RefInfoID', '<i8'),
+                       ('Name', 'O'), ('FullName', 'O'),
+                       ('Length', '<i8'), ('MD5', 'O'),
+                       ('StartRow', '<u4'), ('EndRow', '<u4')])
+            self._referenceDict = {}
+            self._referenceDict.update(zip(refIds, self._referenceInfoTable))
+            self._referenceDict.update(zip(refNames, self._referenceInfoTable))
+        else:
+            self._referenceInfoTable = None
+            self._referenceDict = None
+
+    def _loadReadGroupInfo(self):
+        rgs = self.peer.header["RG"]
+        readGroupTable_ = []
+        pulseFeaturesInAll_ = frozenset(PULSE_FEATURE_TAGS.keys())
+        for rg in rgs:
+            rgID = rgAsInt(rg["ID"])
+            rgName = rg["PU"]
+            ds = dict([pair.split("=") for pair in rg["DS"].split(";") if pair != ""])
+            # spec: we only consider first two components of basecaller version
+            # in "chem" lookup
+            basecallerVersion = ".".join(ds["BASECALLERVERSION"].split(".")[0:2])
+            triple = ds["BINDINGKIT"], ds["SEQUENCINGKIT"], basecallerVersion
+            rgChem = decodeTriple(*triple)
+            rgReadType = ds["READTYPE"]
+            readGroupTable_.append((rgID, rgName, rgReadType, rgChem))
+            pulseFeaturesInAll_ = pulseFeaturesInAll_.intersection(ds.keys())
+
+        self._readGroupTable = np.rec.fromrecords(
+            readGroupTable_,
+            dtype=[("ID"                 , np.int32),
+                   ("MovieName"          , "O"),
+                   ("ReadType"           , "O"),
+                   ("SequencingChemistry", "O")])
+        assert len(set(self._readGroupTable.ID)) == len(self._readGroupTable), \
+            "First 8 chars of read group IDs must be unique!"
+
+        self._readGroupDict = { rg.ID : rg
+                                for rg in self._readGroupTable }
+
+        self._pulseFeaturesAvailable = pulseFeaturesInAll_
+
+
+    def _loadProgramInfo(self):
+        pgRecords = [ (pg["ID"], pg.get("VN", None), pg.get("CL", None))
+                      for pg in self.peer.header.get("PG", []) ]
+
+        if len(pgRecords) > 0:
+            self._programTable = np.rec.fromrecords(
+                pgRecords,
+                dtype=[("ID"     ,     "O"),
+                       ("Version",     "O"),
+                       ("CommandLine", "O")])
+        else:
+            self._programTable = None
+
+    def _loadReferenceFasta(self, referenceFastaFname):
+        ft = FastaTable(referenceFastaFname)
+        # Verify that this FASTA is in agreement with the BAM's
+        # reference table---BAM should be a subset.
+        fastaIdsAndLens = set((c.id, len(c)) for c in ft)
+        bamIdsAndLens   = set((c.Name, c.Length) for c in self.referenceInfoTable)
+        if not bamIdsAndLens.issubset(fastaIdsAndLens):
+            raise ReferenceMismatch, "FASTA file must contain superset of reference contigs in BAM"
+        self.referenceFasta = ft
+
+    def _checkFileCompatibility(self):
+        # Verify that this is a "pacbio" BAM file of version at least
+        # 3.0b3
+        try:
+            checkedVersion = self.version
+        except:
+            raise IncompatibleFile(
+                "This BAM file is incompatible with this API " +
+                "(only PacBio BAM files version >= 3.0b3 are supported)")
+
+    def __init__(self, fname, referenceFastaFname=None):
+        self.filename = fname = abspath(expanduser(fname))
+        self.peer = Samfile(fname, "rb", check_sq=False)
+        self._checkFileCompatibility()
+
+        self._loadReferenceInfo()
+        self._loadReadGroupInfo()
+        self._loadProgramInfo()
+
+        self.referenceFasta = None
+        if referenceFastaFname is not None:
+            if self.isUnmapped:
+                raise ValueError, "Unmapped BAM file--reference FASTA should not be given as argument to BamReader"
+            self._loadReferenceFasta(referenceFastaFname)
+
+    @property
+    def isIndexLoaded(self):
+        return self.index is not None
+
+    @property
+    def isReferenceLoaded(self):
+        return self.referenceFasta is not None
+
+    @property
+    def isUnmapped(self):
+        return not(self.isMapped)
+
+    @property
+    def isMapped(self):
+        return len(self.peer.header["SQ"]) > 0
+
+    @property
+    def alignmentIndex(self):
+        raise UnavailableFeature("BAM has no alignment index")
+
+    @property
+    def movieNames(self):
+        return set([mi.MovieName for mi in self.readGroupTable])
+
+    @property
+    def readGroupTable(self):
+        return self._readGroupTable
+
+    def readGroupInfo(self, readGroupId):
+        return self._readGroupDict[readGroupId]
+
+    @property
+    def sequencingChemistry(self):
+        """
+        List of the sequencing chemistries by movie.  Order is
+        unspecified.
+        """
+        return list(self.readGroupTable.SequencingChemistry)
+
+    @property
+    def referenceInfoTable(self):
+        return self._referenceInfoTable
+
+    #TODO: standard?  how about subread instead?  why capitalize ccs?
+    # can we standardize this?  is cDNA an additional possibility
+    @property
+    def readType(self):
+        """
+        Either "standard", "CCS", "mixed", or "unknown", to represent the
+        type of PacBio reads aligned in this BAM file.
+        """
+        readTypes = self.readGroupTable.ReadType
+        if all(readTypes == "SUBREAD"):
+            return "standard"
+        elif all(readTypes == "CCS"):
+            return "CCS"
+        elif all((readTypes == "CCS") | (readTypes == "SUBREAD")):
+            return "mixed"
+        else:
+            return "unknown"
+
+    @property
+    def version(self):
+        return self.peer.header["HD"]["pb"]
+
+    def versionAtLeast(self, minimalVersion):
+        raise Unimplemented()
+
+    def softwareVersion(self, programName):
+        raise Unimplemented()
+
+    @property
+    def isSorted(self):
+        return self.peer.header["HD"]["SO"] == "coordinate"
+
+    @property
+    def isBarcoded(self):
+        raise Unimplemented()
+
+    @property
+    def isEmpty(self):
+        return (len(self) == 0)
+
+    def referenceInfo(self, key):
+        return self._referenceDict[key]
+
+    def atOffset(self, offset):
+        self.peer.seek(offset)
+        return BamAlignment(self, next(self.peer))
+
+    def hasPulseFeature(self, featureName):
+        return featureName in self._pulseFeaturesAvailable
+
+    def pulseFeaturesAvailable(self):
+        return self._pulseFeaturesAvailable
+
+    @property
+    def barcode(self):
+        raise Unimplemented()
+
+    @property
+    def barcodeName(self):
+        raise Unimplemented()
+
+    @property
+    def barcodes(self):
+        raise Unimplemented()
+
+    @requiresBai
+    def __len__(self):
+        return self.peer.mapped + self.peer.unmapped
+
+    def close(self):
+        if hasattr(self, "file") and self.file is not None:
+            self.file.close()
+            self.file = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+
+class BamReader(_BamReaderBase, AlignmentReaderMixin):
+    """
+    Reader for a BAM with a bam.bai (SAMtools) index, but not a
+    bam.pbi (PacBio) index.  Supports basic BAM operations.
+    """
+    def __init__(self, fname, referenceFastaFname=None):
+        super(BamReader, self).__init__(fname, referenceFastaFname)
+
+    def __iter__(self):
+        self.peer.reset()
+        for a in self.peer:
+            yield BamAlignment(self, a)
+
+    # TODO: cmp.h5 readsInRange only accepts int key, not string.
+    # that's just lame, fix it.
+    def readsInRange(self, winId, winStart, winEnd, justIndices=False):
+        # PYSAM BUG: fetch doesn't work if arg 1 is tid and not rname
+        if not isinstance(winId, str):
+            winId = self.peer.getrname(winId)
+        if justIndices == True:
+            raise UnavailableFeature("BAM is not random-access")
+        else:
+            return ( BamAlignment(self, it)
+                     for it in self.peer.fetch(winId, winStart, winEnd, multiple_iterators=False) )
+
+    def __getitem__(self, rowNumbers):
+        raise UnavailableFeature("Use IndexedBamReader to get row-number based slicing.")
+
+
+
+class IndexedBamReader(_BamReaderBase, IndexedAlignmentReaderMixin):
+    """
+    A `IndexedBamReader` is a BAM reader class that uses the
+    ``bam.pbi`` (PacBio BAM index) file to enable random access by
+    "row number" and to provide access to precomputed semantic
+    information about the BAM records
+    """
+    def __init__(self, fname, referenceFastaFname=None):
+        super(IndexedBamReader, self).__init__(fname, referenceFastaFname)
+        self.pbi = None
+        pbiFname = self.filename + ".pbi"
+        if exists(pbiFname):
+            self.pbi = PacBioBamIndex(pbiFname)
+        else:
+            raise IOError, "IndexedBamReader requires bam.pbi index file"
+
+    def atRowNumber(self, rn):
+        offset = self.pbi.virtualFileOffset[rn]
+        self.peer.seek(offset)
+        return BamAlignment(self, next(self.peer), rn)
+
+    def readsInRange(self, winId, winStart, winEnd, justIndices=False):
+        if isinstance(winId, str):
+            winId = self.referenceInfo(winId).ID
+        ix = self.pbi.rangeQuery(winId, winStart, winEnd)
+        if justIndices:
+            return ix
+        else:
+            return self[ix]
+
+    def __iter__(self):
+        for rn in xrange(len(self.pbi)):
+            yield self.atRowNumber(rn)
+
+    def __len__(self):
+        return len(self.pbi)
+
+    def __getitem__(self, rowNumbers):
+        if (isinstance(rowNumbers, int) or
+            issubclass(type(rowNumbers), np.integer)):
+            return self.atRowNumber(rowNumbers)
+        elif isinstance(rowNumbers, slice):
+            return [ self.atRowNumber(r)
+                     for r in xrange(*rowNumbers.indices(len(self)))]
+        elif isinstance(rowNumbers, list) or isinstance(rowNumbers, np.ndarray):
+            if len(rowNumbers) == 0:
+                return []
+            else:
+                entryType = type(rowNumbers[0])
+                if entryType == int or issubclass(entryType, np.integer):
+                    return [ self.atRowNumber(r) for r in rowNumbers ]
+                elif entryType == bool or issubclass(entryType, np.bool_):
+                    return [ self.atRowNumber(r) for r in np.flatnonzero(rowNumbers) ]
+        raise TypeError, "Invalid type for IndexedBamReader slicing"
+
+    def __getattr__(self, key):
+        if key in self.pbi.columnNames:
+            return self.pbi[key]
+        else:
+            raise AttributeError, "no such column in pbi index"
+
+    def __dir__(self):
+        return self.pbi.columnNames
diff --git a/pbcore/io/align/BlasrIO.py b/pbcore/io/align/BlasrIO.py
new file mode 100644
index 0000000..0d80df0
--- /dev/null
+++ b/pbcore/io/align/BlasrIO.py
@@ -0,0 +1,116 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+from pbcore.io.base import ReaderBase
+
+__all__ = [ "M4Record",
+            "M4Reader",
+            "M5Record",
+            "M5Reader" ]
+
+class MalformattedRecord(Exception): pass
+
+class M4Record(object):
+    """
+    Record for alignment summary record output from BLASR -m 4 option
+    """
+    @classmethod
+    def fromString(cls, s):
+        obj = cls()
+        try:
+            columns = s.strip().split()
+            obj.qName             = columns[0]
+            obj.tName             = columns[1]
+            obj.score             = int(columns[2])
+            obj.percentSimilarity = float(columns[3])
+            obj.qStrand           = int(columns[4])
+            obj.qStart            = int(columns[5])
+            obj.qEnd              = int(columns[6])
+            obj.qLength           = int(columns[7])
+            obj.tStrand           = int(columns[8])
+            obj.tStart            = int(columns[9])
+            obj.tEnd              = int(columns[10])
+            obj.tLength           = int(columns[11])
+            obj.mapQV             = int(columns[12])
+            return obj
+        except:
+            raise MalformattedRecord(s)
+
+class M4Reader(ReaderBase):
+    """
+    Reader for -m 4 formatted alignment summary information from BLASR
+    """
+    def __iter__(self):
+        for line in self.file:
+            yield M4Record.fromString(line)
+
+
+
+class M5Record(object):
+    """
+    Record for alignment summary record output from BLASR -m 5 option
+    """
+    @classmethod
+    def fromString(cls, s):
+        obj = cls()
+        try:
+            columns = s.strip().split()
+            obj.qName        = columns[0]
+            obj.qLength      = int(columns[1])
+            obj.qStart       = int(columns[2])
+            obj.qEnd         = int(columns[3])
+            obj.qStrand      = columns[4]
+            obj.tName        = columns[5]
+            obj.tLength      = int(columns[6])
+            obj.tStart       = int(columns[7])
+            obj.tEnd         = int(columns[8])
+            obj.tStrand      = columns[9]
+            obj.score        = float(columns[10])
+            obj.numMatch     = int(columns[11])
+            obj.numMismatch  = int(columns[12])
+            obj.numIns       = int(columns[13])
+            obj.numDel       = int(columns[14])
+            obj.mapQV        = int(columns[15])
+            obj.qAlignedSeq  = columns[16]
+            obj.matchPattern = columns[17]
+            obj.tAlignedSeq  = columns[18]
+            return obj
+        except:
+            raise MalformattedRecord(s)
+
+class M5Reader(ReaderBase):
+    """
+    Reader for -m 5 formatted alignment summary information from BLASR
+    """
+    def __iter__(self):
+        for line in self.file:
+            yield M5Record.fromString(line)
diff --git a/pbcore/io/align/CmpH5IO.py b/pbcore/io/align/CmpH5IO.py
new file mode 100755
index 0000000..b94126d
--- /dev/null
+++ b/pbcore/io/align/CmpH5IO.py
@@ -0,0 +1,1277 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+__all__ = [ "CmpH5Reader",
+            "CmpH5Alignment",
+            "EmptyCmpH5Error" ]
+
+import h5py, numpy as np, warnings
+from bisect import bisect_left, bisect_right
+from collections import Counter, OrderedDict
+from itertools import groupby
+from os.path import abspath, expanduser
+from pbcore.io.rangeQueries import makeReadLocator
+from pbcore.io._utils import rec_join, arrayFromDataset
+from pbcore.io.FastaIO import splitFastaHeader
+from pbcore.io.base import ReaderBase
+from pbcore.chemistry import decodeTriple, ChemistryLookupError
+from pbcore.util.decorators import deprecated
+
+from ._AlignmentMixin import AlignmentRecordMixin, IndexedAlignmentReaderMixin
+
+# ========================================
+#  Data manipulation routines.
+#
+GAP = 0b0000
+
+_basemap =  { 0b0000 : ord("-"),
+              0b0001 : ord("A"),
+              0b0010 : ord("C"),
+              0b0100 : ord("G"),
+              0b1000 : ord("T"),
+              0b1111 : ord("N") }
+
+_cBasemap = { 0b0000 : ord("-"),
+              0b0001 : ord("T"),
+              0b0010 : ord("G"),
+              0b0100 : ord("C"),
+              0b1000 : ord("A"),
+              0b1111 : ord("N") }
+
+_basemapArray  = np.ndarray(shape=(max(_basemap.keys()) + 1,), dtype=np.byte)
+_cBasemapArray = np.ndarray(shape=(max(_basemap.keys()) + 1,), dtype=np.byte)
+
+for (e, v) in _basemap.iteritems():
+    _basemapArray[e] = v
+for (e, v) in _cBasemap.iteritems():
+    _cBasemapArray[e] = v
+
+_baseEncodingToInt = np.array([-1]*16)
+_baseEncodingToInt[0b0000] = 0
+_baseEncodingToInt[0b0001] = 1
+_baseEncodingToInt[0b0010] = 2
+_baseEncodingToInt[0b0100] = 3
+_baseEncodingToInt[0b1000] = 4
+_baseEncodingToInt[0b1111] = 5  # 'N' base
+
+# These are 2D tables indexed by (readInt, refInt)
+# 'N' base is never considered a mismatch.
+_gusfieldTranscriptTable = \
+    np.fromstring("ZDDDDDZ"
+                  "IMRRRMZ"
+                  "IRMRRMZ"
+                  "IRRMRMZ"
+                  "IRRRMMZ"
+                  "IMMMMMZ"
+                  "ZZZZZZZ", dtype=np.uint8).reshape(7, 7)
+_cigarTranscriptTable = \
+    np.fromstring("ZDDDDDZ"
+                  "IMMMMMZ"
+                  "IMMMMMZ"
+                  "IMMMMMZ"
+                  "IMMMMMZ"
+                  "IMMMMMZ"
+                  "ZZZZZZZ", dtype=np.uint8).reshape(7, 7)
+_exonerateTranscriptTable = \
+    np.fromstring("Z     Z"
+                  " |   |Z"
+                  "  |  |Z"
+                  "   | |Z"
+                  "    ||Z"
+                  " |||||Z"
+                  "ZZZZZZZ", dtype=np.uint8).reshape(7, 7)
+_exoneratePlusTranscriptTable = \
+    np.fromstring("Z     Z"
+                  " |***|Z"
+                  " *|**|Z"
+                  " **|*|Z"
+                  " ***||Z"
+                  " |||||Z"
+                  "ZZZZZZZ", dtype=np.uint8).reshape(7, 7)
+
+class EmptyCmpH5Error(Exception):
+    """An exception raised when CmpH5Reader tries to read from a
+    cmp.h5 with no alignments.
+    """
+    pass
+
+def readFromAlignmentArray(a, gapped=True, complement=False):
+    """
+    Decode the read component of an alignment array.
+    """
+    if complement:
+        r = _cBasemapArray[a >> 4]
+    else:
+        r = _basemapArray[a >> 4]
+    if not gapped:
+        r = r[r != ord("-")]
+    return  r.tostring()
+
+def referenceFromAlignmentArray(a, gapped=True, complement=False):
+    """
+    Decode the reference component of an alignment array.
+    """
+    if complement:
+        r = _cBasemapArray[a & 0b1111]
+    else:
+        r = _basemapArray[a & 0b1111]
+    if not gapped:
+        r = r[r != ord("-")]
+    return  r.tostring()
+
+def ungappedPulseArray(a):
+    """
+    Return a pulse array with encoded gaps removed.
+    """
+    dtype = a.dtype
+    if dtype == np.float32:
+        return a[~np.isnan(a)]
+    elif dtype == np.uint8:
+        return a[a != np.uint8(-1)]
+    elif dtype == np.uint16:
+        return a[a != np.uint16(-1)]
+    elif dtype == np.uint32:
+        return a[a != np.uint32(-1)]
+    elif dtype == np.int8:
+        return a[a != ord("-")]
+    else:
+        raise Exception, "Invalid pulse array type"
+
+
+
+# ========================================
+# Alignment record type
+#
+
+ALIGNMENT_INDEX_COLUMNS = ["AlnID", "AlnGroupID", "MovieID", "RefGroupID",
+                           "tStart", "tEnd", "RCRefStrand", "HoleNumber",
+                           "SetNumber", "StrobeNumber", "MoleculeID",
+                           "rStart", "rEnd", "MapQV", "nM", "nMM", "nIns",
+                           "nDel", "Offset_begin", "Offset_end", "nBackRead",
+                           "nReadOverlap"]
+
+ALIGNMENT_INDEX_DTYPE = [(COLUMN_NAME, np.uint32)
+                         for COLUMN_NAME in ALIGNMENT_INDEX_COLUMNS]
+
+
+OFFSET_TABLE_DTYPE = [ ("ID",       np.uint32),
+                       ("StartRow", np.uint32),
+                       ("EndRow",   np.uint32) ]
+
+
+def _makePulseFeatureAccessor(featureName):
+    def f(self, aligned=True, orientation="native"):
+        return self.pulseFeature(featureName, aligned, orientation)
+    return f
+
+class CmpH5Alignment(AlignmentRecordMixin):
+    """
+    A lightweight class representing a single alignment record in a
+    CmpH5 file, providing access to all columns of a single row of the
+    alignment index, and on-demand access to the corresponding
+    sequence and pulse features.
+
+    `CmpH5Alignment` objects are obtained by slicing a
+    `CmpH5Reader` object:
+
+    .. doctest::
+
+        >>> c[26]
+        CmpH5 alignment: +    1       7441       7699
+
+        >>> print c[26]
+        CmpH5 alignment: +    1       7441       7699
+        <BLANKLINE>
+        Read:        m110818_075520_42141_c100129202555500000315043109121112_s2_p0/1009/44_322
+        Reference:   lambda_NEB3011
+        <BLANKLINE>
+        Read length: 278
+        Concordance: 0.871
+        <BLANKLINE>
+          12345678901234567890123456789001223456789012345678890112345678990112344567890011
+          AACTGGTCACGGTCGTGGCACTGGTGAAG-CT-GCATACTGATGCACTT-CAC-GCCACGCG-GG-ATG-AACCTG-T-G
+          |||||||  ||||  ||||||||| |||| || ||||||||| |||||  ||| |||||||| || ||| |||||| | |
+          AACTGGT--CGGT--TGGCACTGG-GAAGCCTTGCATACTGA-GCACT-CCACGGCCACGCGGGGAATGAAACCTGGTGG
+        <BLANKLINE>
+        <BLANKLINE>
+          23456789012345678900123456678901234456789012345678901234566789011234556789012345
+          GCATTTGTGCTGCCGGGA-ACGGCG-TTTCGTGT-CTCTGCCGGTGTGGCAGCCGAA-ATGAC-AGAG-CGCGGCCTGGC
+          |||||||||||||||||| |||||| |||||| | |||||||||||||||||||||| ||||| |||| |||||||||||
+          GCATTTGTGCTGCCGGGAAACGGCGTTTTCGT-TCCTCTGCCGGTGTGGCAGCCGAAAATGACAAGAGCCGCGGCCTGGC
+        <BLANKLINE>
+        <BLANKLINE>
+          67899012345677890123456789901123456789901233456789012345678901234556778901223455
+          CAG-AATGCAAT-AACGGGAGGCGC-TG-TGGCTGAT-TTCG-ATAACCTGTTCGATGCTGCCAT-TG-CCCGC-GCC-G
+          ||| |||||||| |||||||||||| || |||||||| |||| |||||||||||||||||||||| || ||||| ||| |
+          CAGAAATGCAATAAACGGGAGGCGCTTGCTGGCTGATTTTCGAATAACCTGTTCGATGCTGCCATTTGACCCGCGGCCGG
+        <BLANKLINE>
+        <BLANKLINE>
+          6678901234567889012345667890123456789012345678
+          -ATGAAACGATAC-GCGGGTAC-ATGGGAACGTCAGCCACCATTAC
+           |||||||||||| |||||||| |||||||||||||||||||||||
+          AATGAAACGATACGGCGGGTACAATGGGAACGTCAGCCACCATTAC
+        <BLANKLINE>
+        <BLANKLINE>
+
+    The `orientation` argument to some data access methods determines
+    how reverse-strand alignments are returned to the user.  `.cmp.h5`
+    files natively encode reverse strand reads in read-order,
+    uncomplemented, with the *reference* reverse-complemented.  Most
+    analysis applications will want to use the data in this order,
+    which we term the *NATIVE* orientation.
+
+    Some applications that involve collating or displaying the reads
+    aligned to the reference genome want the reference to be presented
+    in its genomic order, and the *read* to be reverse-complemented.
+    We term this *GENOMIC* orientation.
+
+    Methods prefixed with *aligned* return data (bases or features)
+    that include gaps, which are encoded according to the data type.
+    Methods not prefixed with *aligned* excise the gaps.
+
+    Sequence and pulse features are not cached.
+    """
+    __slots__ = ["cmpH5", "rowNumber"]
+
+    def __init__(self, cmph5, rowNumber):
+        self.cmpH5 = cmph5
+        self.rowNumber = rowNumber
+
+    @property
+    def reader(self):
+        return self.cmpH5
+
+    def clippedTo(self, refStart, refEnd):
+        """
+        Return a new `CmpH5Alignment` that refers to a subalignment of
+        this alignment, as induced by clipping to reference
+        coordinates `refStart` to `refEnd`.
+
+        .. warning::
+            This function takes time linear in the length of the alignment.
+        """
+        if (refStart >= refEnd or
+            refStart >= self.tEnd or
+            refEnd   <= self.tStart):
+            raise IndexError, "Clipping query does not overlap alignment"
+        else:
+            return ClippedCmpH5Alignment(self, refStart, refEnd)
+
+    @property
+    def _alignmentGroup(self):
+        return self.cmpH5._alignmentGroup(self.AlnGroupID)
+
+    @property
+    def referenceInfo(self):
+        return self.cmpH5.referenceInfo(self.RefGroupID)
+
+    @property
+    def referenceName(self):
+        return self.referenceInfo.FullName
+
+    @property
+    def ReadGroupID(self):
+        return np.int32(self.MovieID)
+
+    @property
+    def qId(self):
+        # Forward compatibility with BAM API
+        return self.ReadGroupID
+
+    @property
+    def aStart(self):
+        # Forward compatibility with BAM API
+        return self.rStart
+
+    @property
+    def aEnd(self):
+        return self.rEnd
+
+    @property
+    def holeNumber(self):
+        # Forward compatibility with BAM API
+        return self.HoleNumber
+
+    @property
+    def mapQV(self):
+        # Forward compatibility with BAM API
+        return self.MapQV
+
+    @property
+    def readGroupInfo(self):
+        """
+        Returns the corresponding record from the `readGroupTable`.
+        """
+        # TODO: add doctest
+        return self.cmpH5.readGroupInfo(self.ReadGroupID)
+
+    @property
+    def movieInfo(self):
+        """
+        .. deprecated:: 0.9.2
+           Use :attr:`readGroupInfo`, which is compatible with BAM usage
+
+        Returns a record (extracted from the cmph5's `movieInfoTable`)
+        containing information about the movie that the read was
+        extracted from.  This record should be accessed using
+        dot-notation, according to the column names documented in
+        `movieInfoTable`.
+        """
+        return self.cmpH5.movieInfo(self.MovieID)
+
+    @property
+    def movieName(self):
+        return self.cmpH5._movieInfo(self.MovieID).Name
+
+    @property
+    def isForwardStrand(self):
+        return self.RCRefStrand == 0
+
+    @property
+    def isReverseStrand(self):
+        return self.RCRefStrand == 1
+
+    @property
+    def referenceId(self):
+        return self.RefGroupID
+
+    @property
+    def identity(self):
+        """
+        Return the identity of this alignment, calculated as
+        (#matchs / read length)
+
+        .. doctest::
+
+            >>> c[26].identity
+            0.87050359712230219
+        """
+        if self.readLength == 0:
+            return 0.
+        else:
+            return 1. - float(self.nMM + self.nIns + self.nDel)/self.readLength
+
+    @property
+    def accuracy(self):
+        """
+        Return the identity of this alignment, calculated as
+        (#matchs / read length)
+
+        .. deprecated:: 0.9.5
+           Use :attr:`identity`
+        """
+        return self.identity
+
+    @property
+    def similarity(self):
+        """
+        Replicates the pctsimilarity field from blasr, calculated as
+        #matches/mean(aligned_length, read_length)
+        """
+        meanLength = (self.readLength + self.referenceSpan)/2.0
+
+        if meanLength == 0:
+            return 0.
+        else:
+            return float(self.nM/meanLength)
+
+    @property
+    def numPasses(self):
+        """
+        (CCS only) The number subreads that were used to produce this CCS read.
+        """
+        return self.cmpH5.numPasses[self.rowNumber]
+
+    @property
+    def zScore(self):
+        """
+        (PacBio internal files only)
+
+        The z-score of the alignment, using a null model of a random
+        sequence alignment.
+        """
+        return self.cmpH5.zScore[self.rowNumber]
+
+    @property
+    def barcode(self):
+        """
+        The barcode ID (integer key) for this alignment's read
+        Behavior undefined if file is not barcoded.
+        """
+        return self.cmpH5.barcodes[self.rowNumber]
+
+    @property
+    def barcodeName(self):
+        """
+        The barcode name (string) for this alignment's read
+        Behavior undefined if file is not barcoded.
+        """
+        return self.cmpH5.barcodeName[self.barcode]
+
+    @property
+    def sequencingChemistry(self):
+        return self.cmpH5.sequencingChemistry[self.MovieID-1]
+
+    def alignmentArray(self, orientation="native"):
+        """
+        Direct access to the raw, encoded aligment array, which is a
+        packed representation of the aligned read and reference.
+        """
+        alnDs = self._alignmentGroup["AlnArray"]
+        alnArray = arrayFromDataset(alnDs, self.Offset_begin, self.Offset_end)
+        if self.RCRefStrand and (orientation == "genomic"):
+            return alnArray[::-1]
+        else:
+            return alnArray
+
+    def transcript(self, orientation="native", style="gusfield"):
+        """
+        A text representation of the alignment moves (see Gusfield).
+        This can be useful in pretty-printing an alignment.
+        """
+        if style == "exonerate+":
+            tbl = _exoneratePlusTranscriptTable
+        elif style == "exonerate":
+            tbl = _exonerateTranscriptTable
+        elif style == "cigar":
+            tbl = _cigarTranscriptTable
+        else:
+            tbl = _gusfieldTranscriptTable
+        alnArr = self.alignmentArray(orientation)
+        readBaseInts = _baseEncodingToInt[alnArr >> 4]
+        refBaseInts  = _baseEncodingToInt[alnArr  & 0b1111]
+        return tbl[readBaseInts, refBaseInts].tostring()
+
+    def read(self, aligned=True, orientation="native"):
+        """
+        Return the read portion of the alignment as a string.
+
+        If `aligned` is true, the aligned representation is returned,
+        including gaps; otherwise the unaligned read basecalls are
+        returned.
+
+        If `orientation` is "native", the returned read bases are
+        presented in the order they were read by the sequencing
+        machine.  If `orientation` is "genomic", the returned read
+        bases are presented in such a way as to collate with the
+        forward strand of the reference---which requires reverse
+        complementation of reverse-strand reads.
+        """
+        return readFromAlignmentArray(self.alignmentArray(orientation),
+                                      gapped=aligned,
+                                      complement=(self.RCRefStrand and
+                                                  orientation == "genomic"))
+
+    @property
+    def readType(self):
+        return self.cmpH5.readType
+
+    def reference(self, aligned=True, orientation="native"):
+        """
+        Return the read portion of the alignment as a string.
+
+        If `aligned` is true, the aligned representation of the
+        reference is returned, including gaps; otherwise the unaligned
+        reference bases are returned.
+
+        If `orientation` is "native", the reference is presented in
+        the order it is stored in the cmp.h5 file---for reverse-strand
+        reads, the reference is reverse-complemented.  If
+        `orientation` is "genomic", the forward strand reference is returned.
+        """
+        return referenceFromAlignmentArray(self.alignmentArray(orientation),
+                                           gapped=aligned,
+                                           complement=(self.RCRefStrand and
+                                                       orientation == "genomic"))
+
+    def referencePositions(self, orientation="native"):
+        """
+        Returns an array of reference positions such that
+        referencePositions[i] = reference position of the i'th column
+        in the alignment.  Insertions are grouped with the following
+        reference base, in the specified orientation.
+
+        Length of output array = length of alignment
+        """
+        referenceNonGapMask = (self.alignmentArray(orientation) & 0b1111) != GAP
+        if self.RCRefStrand and orientation == "native":
+            return self.tEnd - 1 - np.hstack([0, np.cumsum(referenceNonGapMask[:-1])])
+        else:
+            return self.tStart + np.hstack([0, np.cumsum(referenceNonGapMask[:-1])])
+
+    def readPositions(self, orientation="native"):
+        """
+        Returns an array of read positions such that
+        readPositions[i] = read position of the i'th column
+        in the alignment.  Insertions are grouped with the following
+        read base, in the specified orientation.
+
+        Length of output array = length of alignment
+        """
+        readNonGapMask = (self.alignmentArray(orientation) >> 4) != GAP
+        if self.RCRefStrand and orientation == "genomic":
+            return self.rEnd - 1 - np.hstack([0, np.cumsum(readNonGapMask[:-1])])
+        else:
+            return self.rStart + np.hstack([0, np.cumsum(readNonGapMask[:-1])])
+
+    def pulseFeature(self, featureName, aligned=True, orientation="native"):
+        """
+        Access a pulse feature by name.
+        """
+        pulseDataset = self._alignmentGroup[featureName]
+        pulseArray = arrayFromDataset(pulseDataset, self.Offset_begin, self.Offset_end)
+        if self.RCRefStrand and orientation == "genomic":
+            alignedPulseArray = pulseArray[::-1]
+        else:
+            alignedPulseArray = pulseArray
+        if aligned:
+            return alignedPulseArray
+        else:
+            return ungappedPulseArray(alignedPulseArray)
+
+    IPD            = _makePulseFeatureAccessor("IPD")
+    PulseWidth     = _makePulseFeatureAccessor("PulseWidth")
+    QualityValue   = _makePulseFeatureAccessor("QualityValue")
+    InsertionQV    = _makePulseFeatureAccessor("InsertionQV")
+    DeletionQV     = _makePulseFeatureAccessor("DeletionQV")
+    DeletionTag    = _makePulseFeatureAccessor("DeletionTag")
+    MergeQV        = _makePulseFeatureAccessor("MergeQV")
+    SubstitutionQV = _makePulseFeatureAccessor("SubstitutionQV")
+
+    def __getattr__(self, key):
+        return self.cmpH5.alignmentIndex[self.rowNumber][key]
+
+    def __repr__(self):
+        return "CmpH5 alignment: %s  %3d  %9d  %9d" \
+            % (("+" if self.isForwardStrand else "-"),
+               self.RefGroupID, self.tStart, self.tEnd)
+
+    def __str__(self):
+        COLUMNS = 80
+        val = ""
+        val += repr(self) + "\n\n"
+        val += "Read:        " + self.readName           + "\n"
+        val += "Reference:   " + self.referenceName      + "\n\n"
+        val += "Read length: " + str(self.readLength)    + "\n"
+        val += "Concordance: " + "%0.3f" % self.identity + "\n"
+
+        alignedRead = self.read()
+        alignedRef = self.reference()
+        transcript = self.transcript(style="exonerate+")
+        refPos = self.referencePositions()
+        refPosString = "".join([str(pos % 10) for pos in refPos])
+        for i in xrange(0, len(alignedRef), COLUMNS):
+            val += "\n"
+            val += "  " + refPosString[i:i+COLUMNS] + "\n"
+            val += "  " + alignedRef  [i:i+COLUMNS] + "\n"
+            val += "  " + transcript  [i:i+COLUMNS] + "\n"
+            val += "  " + alignedRead [i:i+COLUMNS] + "\n"
+            val += "\n"
+        return val
+
+    def __cmp__(self, other):
+        return cmp((self.RefGroupID, self.tStart, self.tEnd),
+                   (other.RefGroupID, other.tStart, other.tEnd))
+
+    def __dir__(self):
+        # Special magic improving IPython completion
+        return ALIGNMENT_INDEX_COLUMNS
+
+class ClippedCmpH5Alignment(CmpH5Alignment):
+    """
+    An alignment from a cmp.h5 file that has been clipped to specified
+    reference bounds using the `CmpH5Alignment.clippedTo` method.
+    """
+    # We use these fields to shadow fields in the
+    # alignment index row.
+    __slots__ = [ "tStart",
+                  "tEnd",
+                  "rStart",
+                  "rEnd",
+                  "Offset_begin",
+                  "Offset_end",
+                  "nM",
+                  "nMM",
+                  "nIns",
+                  "nDel"  ]
+
+    def __init__(self, aln, refStart, refEnd):
+        # The clipping region must intersect the alignment, though it
+        # does not have to be contained wholly within it.
+        refStart = max(aln.referenceStart, refStart)
+        refEnd   = min(aln.referenceEnd,   refEnd)
+        assert refStart <= refEnd
+
+        super(ClippedCmpH5Alignment, self).__init__(aln.cmpH5, aln.rowNumber)
+        refPositions = aln.referencePositions(orientation="genomic")
+        readPositions = aln.readPositions(orientation="genomic")
+
+        # Clipping positions within the alignment array
+        clipStart = bisect_right(refPositions, refStart) - 1
+        clipEnd   = bisect_left(refPositions, refEnd)
+
+        # Overlay the new bounds.
+        self.tStart = refStart
+        self.tEnd   = refEnd
+        if aln.isForwardStrand:
+            self.Offset_begin = aln.Offset_begin + clipStart
+            self.Offset_end   = aln.Offset_begin + clipEnd
+            self.rStart = readPositions[clipStart]
+        else:
+            self.Offset_begin = aln.Offset_end - clipEnd
+            self.Offset_end   = aln.Offset_end - clipStart
+            self.rEnd   = readPositions[clipStart] + 1
+        alnMoveCounts = Counter(self.transcript(style="gusfield"))
+        self.nM   = alnMoveCounts["M"]
+        self.nMM  = alnMoveCounts["R"]
+        self.nIns = alnMoveCounts["I"]
+        self.nDel = alnMoveCounts["D"]
+        readLength = self.nM + self.nMM + self.nIns
+        if aln.isForwardStrand:
+            self.rEnd = self.rStart + readLength
+        else:
+            self.rStart = self.rEnd - readLength
+        assert self.rStart <= self.rEnd
+
+
+# ========================================
+# CmpH5 reader class
+#
+class CmpH5Reader(ReaderBase, IndexedAlignmentReaderMixin):
+    """
+    The `CmpH5Reader` class is a lightweight and efficient API for
+    accessing PacBio ``cmp.h5`` alignment files.  Alignment records
+    can be obtained via random access (via Python indexing/slicing),
+    iteration, or range queries (via readsInRange).
+
+    .. testsetup:: *
+
+        from pbcore import data
+        from pbcore.io import CmpH5Reader
+        filename = data.getCmpH5()
+        c = CmpH5Reader(filename)
+        a0 = c[0]
+        a1 = c[1]
+
+    .. doctest::
+
+        >>> import pbcore.data                # For an example data file
+        >>> from pbcore.io import CmpH5Reader
+        >>> filename = pbcore.data.getCmpH5()
+        >>> c = CmpH5Reader(filename)
+        >>> c[0]
+        CmpH5 alignment: -    1          0        290
+        >>> c[0:2]  # doctest: +NORMALIZE_WHITESPACE
+        [CmpH5 alignment: -    1          0        290,
+         CmpH5 alignment: +    1          0        365]
+        >>> sum(aln.readLength for aln in c)
+        26103
+
+    """
+    def __init__(self, filenameOrH5File, sharedAlignmentIndex=None):
+
+        # The sharedAlignmentIndex is a copy of the /AlnInfo/AlnIndex dataset
+        # for the file indicated by filenameOrH5File that's already opened and
+        # held in memory by another process. When it isn't None, this process
+        # doesn't have to keep its own copy of the dataset, which can save
+        # memory. This is useful for quiver and kineticsTools where there's a
+        # master process that opens the cmph5 file and schedules slaves that
+        # only need a read-only copy of the reader.
+
+        # It is an unchecked runtime error to supply a sharedAlignmentIndex
+        # that is not identical to the AlnIndex in the filenameOrH5File
+
+        if isinstance(filenameOrH5File, h5py.File):
+            if filenameOrH5File.mode != "r":
+                raise ValueError("HDF5 files used by CmpH5Reader must be opened read-only!")
+            self.filename = filenameOrH5File.filename
+            self.file = filenameOrH5File
+        else:
+            try:
+                self.filename = abspath(expanduser(filenameOrH5File))
+                self.file = h5py.File(self.filename, "r")
+            except IOError:
+                raise IOError, ("Invalid or nonexistent cmp.h5 file %s" % filenameOrH5File)
+
+        self._loadAlignmentInfo(sharedAlignmentIndex)
+        self._loadMovieInfo()
+        self._loadReferenceInfo()
+        self._loadMiscInfo()
+
+        # These are loaded on demand
+        self._readGroupTable = None
+        self._readGroupDict  = None
+
+    def _loadAlignmentInfo(self, sharedAlignmentIndex=None):
+        # If a sharedAlignmentIndex is not provided, read it from the file. If
+        # it is provided, don't read anything from the file or store anything
+        # else in memory
+        if sharedAlignmentIndex is None:
+            if len(self.file["/AlnInfo/AlnIndex"]) == 0:
+                raise EmptyCmpH5Error("Empty cmp.h5 file, cannot be read by CmpH5Reader")
+            rawAlignmentIndex = self.file["/AlnInfo/AlnIndex"].value
+            self._alignmentIndex = (rawAlignmentIndex.view(dtype = ALIGNMENT_INDEX_DTYPE)
+                                                     .view(np.recarray)
+                                                     .flatten())
+        else:
+            self._alignmentIndex = sharedAlignmentIndex
+            self._alignmentIndex.setflags(write=False)
+
+        # This is the only sneaky part of this whole class.  We do not
+        # store the raw h5py group object; rather we cache a dict of {
+        # dataset_name -> dataset }.  This way we avoid B-tree
+        # scanning in basic data access.
+        self._alignmentGroupById = {}
+        for (alnGroupId, alnGroupPath) in zip(self.file["/AlnGroup/ID"],
+                                              self.file["/AlnGroup/Path"]):
+            alnGroup = self.file[alnGroupPath]
+            self._alignmentGroupById[alnGroupId] = dict(alnGroup.items())
+
+
+    def _loadMovieInfo(self):
+        numMovies = len(self.file["/MovieInfo/ID"])
+
+        if "FrameRate" in self.file["/MovieInfo"]:
+            frameRate = self.file["/MovieInfo/FrameRate"].value
+            timeScale = 1.0/frameRate
+        else:
+            frameRate = [np.nan] * numMovies
+            timeScale = [1.0] * numMovies
+
+        self._movieInfoTable = np.rec.fromrecords(
+            zip(self.file["/MovieInfo/ID"],
+                self.file["/MovieInfo/Name"],
+                frameRate,
+                timeScale),
+            dtype=[("ID"                  , int),
+                   ("Name"                , object),
+                   ("FrameRate"           , float),
+                   ("TimeScale"           , float)])
+
+        self._movieDict = {}
+        for record in self._movieInfoTable:
+            assert record.ID not in self._movieDict
+            self._movieDict[record.ID] = record
+            self._movieDict[record.Name] = record
+
+    def _loadReadGroupInfo(self):
+        # This is invoked lazily to allow operation on cmp.h5s with
+        # missing chemistry info.
+        assert (self._readGroupTable is None) and (self._readGroupDict is None)
+        self._readGroupTable = np.rec.fromrecords(
+            zip(self._movieInfoTable.ID,
+                self._movieInfoTable.Name,
+                [self.readType] * len(self._movieInfoTable.ID),
+                self.sequencingChemistry),
+            dtype=[("ID"                 , np.int32),
+                   ("MovieName"          , "O"),
+                   ("ReadType"           , "O"),
+                   ("SequencingChemistry", "O")])
+        self._readGroupDict = { rg.ID : rg
+                                for rg in self._readGroupTable }
+
+    def _loadReferenceInfo(self):
+        _referenceGroupTbl = np.rec.fromrecords(
+            zip(self.file["/RefGroup/ID"],
+                self.file["/RefGroup/RefInfoID"],
+                [path[1:] for path in self.file["/RefGroup/Path"]]),
+            dtype=[("ID"       , int),
+                   ("RefInfoID", int),
+                   ("Name"     , object)])
+
+        _referenceInfoTbl = np.rec.fromrecords(
+            zip(self.file["/RefInfo/ID"],
+                self.file["/RefInfo/FullName"],
+                self.file["/RefInfo/Length"],
+                self.file["/RefInfo/MD5"]) ,
+            dtype=[("RefInfoID", int),
+                   ("FullName" , object),
+                   ("Length"   , int),
+                   ("MD5"      , object)])
+
+        self._referenceInfoTable = \
+            rec_join("RefInfoID", _referenceGroupTbl, _referenceInfoTbl, jointype="inner")
+
+        if self.isSorted:
+            _offsetTable = self.file["/RefGroup/OffsetTable"].value \
+                              .view(dtype=OFFSET_TABLE_DTYPE)       \
+                              .view(np.recarray)                    \
+                              .flatten()
+            self._referenceInfoTable = rec_join("ID",
+                                                self._referenceInfoTable,
+                                                _offsetTable,
+                                                jointype="inner")
+        self._referenceDict = {}
+        self._readLocatorByKey = {}
+        for record in self._referenceInfoTable:
+            if record.ID != -1:
+                assert record.ID != record.Name
+                shortName = splitFastaHeader(record.FullName)[0]
+                if (shortName       in self._referenceDict or
+                    record.ID       in self._referenceDict or
+                    record.Name     in self._referenceDict or
+                    record.FullName in self._referenceDict or
+                    record.MD5      in self._referenceDict):
+                    raise ValueError, "Duplicate reference contig sequence or identifier"
+                else:
+                    self._referenceDict[shortName]       = record
+                    self._referenceDict[record.ID]       = record
+                    self._referenceDict[record.Name]     = record
+                    self._referenceDict[record.FullName] = record
+                    self._referenceDict[record.MD5]      = record
+
+                if self.isSorted:
+                    readLocator = makeReadLocator(self, record.ID)
+                    self._readLocatorByKey[record.ID] = readLocator
+                    self._readLocatorByKey[shortName] = readLocator
+
+    def _loadMiscInfo(self):
+        if "NumPasses" in self.file["/AlnInfo"]:
+            self.numPasses = self.file["/AlnInfo/NumPasses"].value
+
+        if "Barcode" in self.file["/AlnInfo"]:
+            # Build forward and backwards id<->label lookup tables
+            self._barcodeName = OrderedDict(zip(self.file["/BarcodeInfo/ID"],
+                                                self.file["/BarcodeInfo/Name"]))
+            self._barcode     = OrderedDict(zip(self.file["/BarcodeInfo/Name"],
+                                                self.file["/BarcodeInfo/ID"]))
+            # Barcode ID per row
+            self._barcodes = self.file["/AlnInfo/Barcode"].value[:,1]
+
+        if "ZScore" in self.file["/AlnInfo"]:
+            self.zScore = self.file["/AlnInfo/ZScore"].value
+
+        self._sequencingChemistry = None
+
+
+    @property
+    def sequencingChemistry(self):
+        if self._sequencingChemistry is None:
+            mi = dict(self.file["/MovieInfo"])
+            if (("BindingKit" in mi) and
+                ("SequencingKit" in mi) and
+                ("SoftwareVersion" in mi)):
+                # New way
+                self._sequencingChemistry = \
+                    [ decodeTriple(bk, sk, sv)
+                      for (bk, sk, sv) in zip(
+                              mi["BindingKit"],
+                              mi["SequencingKit"],
+                              mi["SoftwareVersion"]) ]
+            elif "SequencingChemistry" in mi:
+                # Old way
+                self._sequencingChemistry = mi["SequencingChemistry"].value
+            else:
+                raise ChemistryLookupError, "Chemistry information could not be found in cmp.h5!"
+        return self._sequencingChemistry
+
+
+    @property
+    def alignmentIndex(self):
+        """
+        Return the alignment index data structure, which is the
+        central data structure in the cmp.h5 file, as a numpy
+        recarray.
+
+        The `dtype` of the recarray is::
+
+            dtype([('AlnID', int),
+                   ('AlnGroupID', int),
+                   ('MovieID', int),
+                   ('RefGroupID', int),
+                   ('tStart', int),
+                   ('tEnd', int),
+                   ('RCRefStrand', int),
+                   ('HoleNumber', int),
+                   ('SetNumber', int),
+                   ('StrobeNumber', int),
+                   ('MoleculeID', int),
+                   ('rStart', int),
+                   ('rEnd', int),
+                   ('MapQV', int),
+                   ('nM', int),
+                   ('nMM', int),
+                   ('nIns', int),
+                   ('nDel', int),
+                   ('Offset_begin', int),
+                   ('Offset_end', int),
+                   ('nBackRead', int),
+                   ('nReadOverlap', int)])
+
+        Access to the alignment index is provided to allow users to
+        perform vectorized computations over all alignments in the file.
+
+        .. doctest::
+
+            >>> c.alignmentIndex.MapQV[0:10]
+            array([254, 254,   0, 254, 254, 254, 254, 254, 254, 254], dtype=uint32)
+
+        Alignment index fields are also exposed as fields of the
+        `CmpH5Reader` object, allowing a convenient shorthand.
+
+        .. doctest::
+
+            >>> c.MapQV[0:10]
+            array([254, 254,   0, 254, 254, 254, 254, 254, 254, 254], dtype=uint32)
+
+        The alignment index row for a given alignment can also be
+        accessed directly as a field of a `CmpH5Alignment` object
+
+        .. doctest::
+
+            >>> c[26].MapQV
+            254
+        """
+        return self._alignmentIndex
+
+    @property
+    def movieInfoTable(self):
+        """
+        .. deprecated:: 0.9.2
+           Use :attr:`readGroupTable`, which is compatible with BAM usage
+
+        Return a numpy recarray summarizing source movies for the
+        reads in this file.
+
+        The `dtype` of this recarray is::
+
+            dtype([('ID', 'int'),
+                   ('Name', 'string'),
+                   ('FrameRate', 'float'),
+                   ('TimeScale', 'float')])
+
+        `TimeScale` is the factor to multiply time values (IPD,
+        PulseWidth) by in order to get times in seconds.  The
+        `FrameRate` field should *not* be used directly as it will be
+        NaN for pre-1.3 cmp.h5 files.
+        """
+        return self._movieInfoTable
+
+    @property
+    def referenceInfoTable(self):
+        """
+        .. _referenceInfoTable:
+
+        Return a numpy recarray summarizing the references that were
+        aligned against.
+
+        The `dtype` of this recarray is::
+
+            dtype([('RefInfoID', int),
+                   ('ID', int),
+                   ('Name', string),
+                   ('FullName', string),
+                   ('Length', int),
+                   ('MD5', string),
+                   ('StartRow', int),
+                   ('EndRow', int) ])
+
+        (the last two columns are omitted for unsorted `cmp.h5` files).
+        """
+        return self._referenceInfoTable
+
+    @property
+    def readType(self):
+        """
+        Either "standard" or "CCS", indicating the type of reads that
+        were aligned to the reference.
+
+        .. doctest::
+
+            >>> c.readType
+            'standard'
+        """
+        return self.file.attrs["ReadType"]
+
+    @property
+    def version(self):
+        """
+        The CmpH5 format version string.
+
+        .. doctest::
+
+            >>> c.version
+            '1.2.0.SF'
+        """
+        return self.file.attrs["Version"]
+
+    def versionAtLeast(self, minimalVersion):
+        """
+        Compare the file version to `minimalVersion`.
+
+        .. doctest::
+
+            >>> c.versionAtLeast("1.3.0")
+            False
+        """
+        myVersionTuple = map(int, self.version.split(".")[:3])
+        minimalVersionTuple = map(int, minimalVersion.split(".")[:3])
+        return myVersionTuple >= minimalVersionTuple
+
+    def softwareVersion(self, programName):
+        """
+        Return the version of program `programName` that processed
+        this file.
+        """
+        filelog = dict(zip(self.file["/FileLog/Program"],
+                           self.file["/FileLog/Version"]))
+        return filelog.get(programName, None)
+
+    @property
+    def isSorted(self):
+        return "OffsetTable" in self.file["/RefGroup"]
+
+    @property
+    def isBarcoded(self):
+        return "Barcode" in self.file["/AlnInfo"]
+
+    @property
+    def isEmpty(self):
+        return len(self.file["/AlnInfo/AlnIndex"]) == 0
+
+    def _alignmentGroup(self, alnGroupId):
+        return self._alignmentGroupById[alnGroupId]
+
+    @property
+    def movieNames(self):
+        return set([mi.Name for mi in self._movieDict.values()])
+
+    @property
+    def ReadGroupID(self):
+        return self.MovieID
+
+    @property
+    def readGroupTable(self):
+        # TODO: add doctest
+        if self._readGroupTable is None:
+            self._loadReadGroupInfo()
+        return self._readGroupTable
+
+    def readGroupInfo(self, rgId):
+        """
+        Access information about a movie whose reads are represented
+        in the file.
+
+        The returned value is a record from the :attr:`readGroupTable`
+        """
+        # TODO: add doctest
+        if self._readGroupDict is None:
+            self._loadReadGroupInfo()
+        return self._readGroupDict[rgId]
+
+
+    def _movieInfo(self, movieId):
+        return self._movieDict[movieId]
+
+    def movieInfo(self, movieId):
+        """
+        .. deprecated:: 0.9.2
+           Use :attr:`readGroupInfo`, which is compatible with BAM usage
+
+        Access information about a movie whose reads are represented
+        in the file.
+
+        The returned value is a record from the :attr:`movieInfoTable`
+        """
+        return self._movieInfo(movieId)
+
+    def referenceInfo(self, key):
+        """
+        Access information about a reference that was aligned against.
+        Key can be reference ID (integer), name ("ref000001"), full
+        name (e.g. "lambda_NEB3011"), truncated full name (full name
+        up to the first whitespace, following the samtools convention)
+        or MD5 sum hex string (e.g. "a1319ff90e994c8190a4fe6569d0822a").
+
+        The returned value is a record from the :ref:referenceInfoTable .
+
+        .. doctest::
+
+            >>> ri = c.referenceInfo("ref000001")
+            >>> ri.FullName
+            'lambda_NEB3011'
+            >>> ri.MD5
+            'a1319ff90e994c8190a4fe6569d0822a'
+
+        """
+        return self._referenceDict[key]
+
+    def readsInRange(self, refKey, refStart, refEnd, justIndices=False):
+        """
+        Get a list of reads overlapping (i.e., intersecting---not
+        necessarily spanning) a given reference window.
+
+        If `justIndices` is ``False``, the list returned will contain
+        `CmpH5Alignment` objects.
+
+        If `justIndices` is ``True``, the list returned will contain
+        row numbers in the alignment index table.  Slicing the
+        `CmpH5Reader` object with these row numbers can be used to get
+        the corresponding `CmpH5Alignment` objects.
+
+        The contig key can be either the ``RefID``, or the short name
+        (FASTA header up to first space).
+
+        .. doctest::
+
+            >>> c.readsInRange(1, 0, 1000) # doctest: +NORMALIZE_WHITESPACE
+            [CmpH5 alignment: -    1          0        290,
+             CmpH5 alignment: +    1          0        365]
+
+            >>> rowNumbers = c.readsInRange(1, 0, 1000, justIndices=True)
+            >>> rowNumbers
+            array([0, 1], dtype=uint32)
+        """
+
+        if not self.isSorted:
+            raise Exception, "CmpH5 is not sorted"
+        rowNumbers = self._readLocatorByKey[refKey](refStart, refEnd, justIndices=True)
+        if justIndices:
+            return rowNumbers
+        else:
+            return self[rowNumbers]
+
+    def hasPulseFeature(self, featureName):
+        """
+        Are the datasets for pulse feature `featureName` loaded in
+        this file?  Specifically, is it loaded for all movies within
+        this cmp.h5?
+
+        .. doctest::
+
+            >>> c.hasPulseFeature("InsertionQV")
+            True
+            >>> c.hasPulseFeature("MergeQV")
+            False
+
+        """
+        return all(featureName in alnGroup.keys()
+                   for alnGroup in self._alignmentGroupById.values())
+
+    def pulseFeaturesAvailable(self):
+        """
+        What pulse features are available in this cmp.h5 file?
+
+        .. doctest::
+
+            >>> c.pulseFeaturesAvailable()
+            [u'QualityValue', u'IPD', u'PulseWidth', u'InsertionQV', u'DeletionQV']
+
+        """
+        pulseFeaturesByMovie = [ alnGroup.keys()
+                                 for alnGroup in self._alignmentGroupById.values() ]
+        pulseFeaturesAvailableAsSet = set.intersection(*map(set, pulseFeaturesByMovie))
+        pulseFeaturesAvailableAsSet.discard("AlnArray")
+        return list(pulseFeaturesAvailableAsSet)
+
+    @property
+    def barcode(self):
+        """
+        Returns a dict mapping of barcode name to integer barcode.
+        Behavior undefined if file is not barcoded.
+        """
+        return self._barcode
+
+    @property
+    def barcodeName(self):
+        """
+        Returns a dict mapping of barcode integer id to name.
+        Behavior undefined if file is not barcoded.
+        """
+        return self._barcodeName
+
+    @property
+    def barcodes(self):
+        """
+        Returns an array of barcode integer ids, of the same length as the
+        alignment array.
+
+        Behavior undefined if file is not barcoded.
+        """
+        return self._barcodes
+
+    @property
+    def qId(self):
+        # Forward compatibility with BAM API
+        return self.ReadGroupID
+
+    @property
+    def holeNumber(self):
+        # Forward compatibility with BAM API
+        return self.HoleNumber
+
+    @property
+    def mapQV(self):
+        # Forward compatibility with BAM API
+        return self.MapQV
+
+    def __getitem__(self, rowNumbers):
+        if (isinstance(rowNumbers, int) or
+            issubclass(type(rowNumbers), np.integer)):
+            return CmpH5Alignment(self, rowNumbers)
+        elif isinstance(rowNumbers, slice):
+            return [CmpH5Alignment(self, r)
+                    for r in xrange(*rowNumbers.indices(len(self)))]
+        elif isinstance(rowNumbers, list) or isinstance(rowNumbers, np.ndarray):
+            if len(rowNumbers) == 0:
+                return []
+            else:
+                entryType = type(rowNumbers[0])
+                if entryType == int or issubclass(entryType, np.integer):
+                    return [CmpH5Alignment(self, r) for r in rowNumbers]
+                elif entryType == bool or issubclass(entryType, np.bool_):
+                    return [CmpH5Alignment(self, r) for r in np.flatnonzero(rowNumbers)]
+        raise TypeError, "Invalid type for CmpH5Reader slicing"
+
+    def __iter__(self):
+        return (self[i] for i in xrange(len(self)))
+
+    def __len__(self):
+        return len(self.alignmentIndex)
+
+    def __getattr__(self, key):
+        # Avoid infinite recursion in degenerate cases.
+        return self.__getattribute__("alignmentIndex")[key]
+
+    def close(self):
+        if hasattr(self, "file") and self.file is not None:
+            self.file.close()
+            self.file = None
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def __dir__(self):
+        # Special magic improving IPython completion
+        return ALIGNMENT_INDEX_COLUMNS
diff --git a/pbcore/io/align/PacBioBamIndex.py b/pbcore/io/align/PacBioBamIndex.py
new file mode 100644
index 0000000..12e4750
--- /dev/null
+++ b/pbcore/io/align/PacBioBamIndex.py
@@ -0,0 +1,121 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+import h5py
+import numpy as np
+from os.path import abspath, expanduser
+from functools import wraps
+from collections import namedtuple
+
+class PacBioBamIndex(object):
+    """
+    The PacBio BAM index is a companion file allowing modest
+    *semantic* queries on PacBio BAM files without iterating over the
+    entire file.  By convention, the PacBio BAM index has extension
+    "bam.pbi".
+
+    The bam.pbi index is an HDF5 file containing two data frames
+    (groups containing arrays (frame columns) of common length):
+
+      - A table with a row per BAM record, columns reflecting
+        precomputed statistics per record
+
+      - A table with a row per reference contig (tid) in the BAM,
+        indicating the range of rows pertaining to the
+    """
+    def _loadColumns(self, f):
+        g = f["PacBioBamIndex/Columns"]
+        columnNamesAndColumns = sorted([ (k, v[:]) for (k, v) in g.iteritems() ])
+        columnNames, columns = zip(*columnNamesAndColumns)
+        return np.rec.fromarrays(columns, names=columnNames)
+
+    def _loadVersion(self, f):
+        return f["PacBioBamIndex"].attrs["Version"]
+
+    def _loadOffsets(self, f):
+        pass
+
+    def __init__(self, pbiFilename):
+        pbiFilename = abspath(expanduser(pbiFilename))
+        with h5py.File(pbiFilename, "r") as f:
+            try:
+                self._version = self._loadVersion(f)
+                self._columns = self._loadColumns(f)
+                self._offsets = self._loadOffsets(f)
+            except Exception as e:
+                raise IOError, "Malformed bam.pbi file: " + str(e)
+
+
+    @property
+    def version(self):
+        return self._version
+
+    @property
+    def columnNames(self):
+        return list(self._columns.dtype.names)
+
+    def __getattr__(self, columnName):
+        if columnName in self.columnNames:
+            return self._columns[columnName]
+        else:
+            raise AttributeError, "pbi has no column named '%s'" % columnName
+
+    def __getitem__(self, rowNumber):
+        return self._columns[rowNumber]
+
+    def __dir__(self):
+        # Special magic for IPython tab completion
+        return self.columnNames
+
+    def __len__(self):
+        return len(self._columns)
+
+    def __iter__(self):
+        for i in xrange(len(self)):
+            yield self[i]
+
+    def rangeQuery(self, winId, winStart, winEnd):
+        #
+        # A read overlaps the window if winId == tid and
+        #
+        #  (tStart < winEnd) && (tEnd > winStart)     (1)
+        #
+        # We are presently doing this naively right now, just
+        # computing the predicate over all rows. If/when we determine
+        # this is too slow, we can accelerate using the nBackread
+        # approach we use int he cmph5, doing binary search to
+        # identify a candidate range and then culling the range.
+        #
+        ix = np.flatnonzero((self.tId    == winId)  &
+                            (self.tStart  < winEnd) &
+                            (self.tEnd    > winStart))
+        return ix
diff --git a/pbcore/io/align/_AlignmentMixin.py b/pbcore/io/align/_AlignmentMixin.py
new file mode 100644
index 0000000..d5e3500
--- /dev/null
+++ b/pbcore/io/align/_AlignmentMixin.py
@@ -0,0 +1,210 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+__all__ = [ "AlignmentReaderMixin",
+            "AlignmentRecordMixin",
+            "IndexedAlignmentReaderMixin" ]
+
+from pbcore.io import BasH5Collection
+import numpy as np
+
+class AlignmentReaderMixin(object):
+    """
+    Mixin class for higher-level functionality of alignment file
+    readers.
+    """
+    def attach(self, fofnFilename):
+        """
+        Attach the actual movie data files that were used to create this
+        alignment file.
+        """
+        self.basH5Collection = BasH5Collection(fofnFilename)
+
+    @property
+    def moviesAttached(self):
+        return (hasattr(self, "basH5Collection") and self.basH5Collection is not None)
+
+
+class IndexedAlignmentReaderMixin(AlignmentReaderMixin):
+    """
+    Mixin class for alignment readers that have access to an alignment
+    index.
+    """
+    def readsByName(self, query):
+        """
+        Identifies reads by name query.  The name query is interpreted as follows:
+
+         - "movieName/holeNumber[/[*]]"      => gets all records from a chosen movie, ZMW
+         - "movieName/holeNumber/rStart_rEnd => gets all records *overlapping* read range query in movie, ZMW
+         - "movieName/holeNumber/ccs"        => gets CCS records from chose movie, ZMW (zero or one)
+
+        Records are returned in a list in ascending order of rStart
+        """
+        def rgIDs(movieName):
+            return self.readGroupTable.ID[self.readGroupTable.MovieName == movieName]
+            #return self.movieInfoTable.ID[self.movieInfoTable.Name == movieName]
+
+        def rangeOverlap(w1, w2):
+            s1, e1 = w1
+            s2, e2 = w2
+            return (e1 > s2) and (e2 > s1)
+
+        def rQueryMatch(readName, rQuery):
+            if rQuery == "*" or rQuery == "":
+                return True
+            elif rQuery == "ccs":
+                return readName.endswith("ccs")
+            elif readName.endswith("ccs"):
+                return False
+            else:
+                q = map(int, rQuery.split("_"))
+                r = map(int, readName.split("/")[-1].split("_"))
+                return rangeOverlap(q, r)
+
+        fields = query.split("/")
+        movieName = fields[0]
+        holeNumber = int(fields[1])
+        if len(fields) > 2: rQuery = fields[2]
+        else:               rQuery = "*"
+
+        rgs = rgIDs(movieName)
+        rns = np.flatnonzero(np.in1d(self.qId, rgs) &
+                             (self.holeNumber == holeNumber))
+        alns = [ a for a in self[rns]
+                 if rQueryMatch(a.readName, rQuery) ]
+        return sorted(alns, key=lambda a: a.readStart)
+
+
+class AlignmentRecordMixin(object):
+    """
+    Mixin class providing some higher-level functionality for
+    alignment records.
+    """
+    @property
+    def zmw(self):
+        if not self.reader.moviesAttached:
+            raise ValueError("Movies not attached!")
+        return self.reader.basH5Collection[self.zmwName]
+
+    @property
+    def zmwRead(self):
+        if not self.reader.moviesAttached:
+            raise ValueError("Movies not attached!")
+        return self.reader.basH5Collection[self.readName]
+
+    @property
+    def referenceStart(self):
+        """
+        The left bound of the alignment, in reference coordinates.
+        """
+        return self.tStart
+
+    @property
+    def referenceEnd(self):
+        """
+        The right bound of the alignment, in reference coordinates.
+        """
+        return self.tEnd
+
+    @property
+    def readStart(self):
+        """
+        The left bound of the alignment, in read coordinates (from the BAS.H5 file).
+        """
+        return self.aStart
+
+    @property
+    def readEnd(self):
+        """
+        The right bound of the alignment, in read coordinates (from the BAS.H5 file).
+        """
+        return self.aEnd
+
+    @property
+    def referenceSpan(self):
+        """
+        The length along the reference implied by this alignment.
+        """
+        return self.tEnd - self.tStart
+
+    @property
+    def readLength(self):
+        """
+        The length of the read.
+        """
+        return self.aEnd - self.aStart
+
+    def __len__(self):
+        return self.readLength
+
+    @property
+    def readName(self):
+        """
+        Return the name of the read that was aligned, in standard
+        PacBio format.
+        """
+        zmwName = self.zmwName
+        if self.readType == "CCS":
+            return "%s/ccs" % (zmwName,)
+        else:
+            return "%s/%d_%d" % (zmwName, self.aStart, self.aEnd)
+
+    @property
+    def zmwName(self):
+        return "%s/%d" % (self.movieName, self.HoleNumber)
+
+    def spansReferencePosition(self, pos):
+        """
+        Does the alignment span the given reference position?
+        """
+        return self.tStart <= pos < self.tEnd
+
+    def spansReferenceRange(self, start, end):
+        """
+        Does the alignment span the given reference range, in its entirety?
+        """
+        assert start <= end
+        return (self.tStart <= start <= end <= self.tEnd)
+
+    def overlapsReferenceRange(self, start, end):
+        """
+        Does the alignment overlap the given reference interval?
+        """
+        assert start <= end
+        return (self.tStart < end) and (self.tEnd > start)
+
+    def containedInReferenceRange(self, start, end):
+        """
+        Is the alignment wholly contained within a given reference interval?
+        """
+        assert start <= end
+        return (start <= self.tStart <= self.tEnd <= end)
diff --git a/pbcore/io/align/_BamSupport.py b/pbcore/io/align/_BamSupport.py
new file mode 100644
index 0000000..3175e49
--- /dev/null
+++ b/pbcore/io/align/_BamSupport.py
@@ -0,0 +1,127 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Author: David Alexander
+
+import numpy as np
+
+class UnavailableFeature(Exception): pass
+class Unimplemented(Exception):      pass
+class ReferenceMismatch(Exception):  pass
+class IncompatibleFile(Exception):   pass
+
+
+PULSE_FEATURE_TAGS = { "InsertionQV"    : ("iq", "qv",   np.uint8),
+                       "DeletionQV"     : ("dq", "qv",   np.uint8),
+                       "DeletionTag"    : ("dt", "base", np.int8 ),
+                       "SubstitutionQV" : ("sq", "qv",   np.uint8),
+                       "MergeQV"        : ("mq", "qv",   np.uint8),
+                       "IPD"            : ("ip", "time", np.uint8),
+                       "PulseWidth"     : ("pw", "time", np.uint8) }
+
+COMPLEMENT_MAP = { "A" : "T",
+                   "T" : "A",
+                   "C" : "G",
+                   "G" : "C",
+                   "N" : "N",
+                   "-" : "-" }
+
+def complementAscii(a):
+    return np.array([ord(COMPLEMENT_MAP[chr(b)]) for b in a], dtype=np.int8)
+
+def reverseComplementAscii(a):
+    return complementAscii(a)[::-1]
+
+
+BAM_CMATCH     = 0
+BAM_CINS       = 1
+BAM_CDEL       = 2
+BAM_CREF_SKIP  = 3
+BAM_CSOFT_CLIP = 4
+BAM_CHARD_CLIP = 5
+BAM_CPAD       = 6
+BAM_CEQUAL     = 7
+BAM_CDIFF      = 8
+
+
+
+#
+# qId calculation from RG ID string
+#
+def rgAsInt(rgIdString):
+    return np.int32(int(rgIdString, 16))
+
+#
+# Kinetics: decode the scheme we are using to encode approximate frame
+# counts in 8-bits.
+#
+def _makeFramepoints():
+    B = 2
+    t = 6
+    T = 2**t
+
+    framepoints = []
+    next = 0
+    for i in range(256/T):
+        grain = B**i
+        nextOnes = next + grain * np.arange(0, T)
+        next = nextOnes[-1] + grain
+        framepoints = framepoints + list(nextOnes)
+    return np.array(framepoints, dtype=int)
+
+def _makeLookup(framepoints):
+    # (frame -> code) involves some kind of rounding
+    # basic round-to-nearest
+    frameToCode = np.empty(shape=max(framepoints)+1, dtype=int)
+    for i, (fl, fu) in enumerate(zip(framepoints, framepoints[1:])):
+        if (fu > fl + 1):
+            m = (fl + fu)/2
+            for f in xrange(fl, m):
+                frameToCode[f] = i
+            for f in xrange(m, fu):
+                frameToCode[f] = i + 1
+        else:
+            frameToCode[fl] = i
+    # Extra entry for last:
+    frameToCode[fu] = i + 1
+    return frameToCode, fu
+
+_framepoints = _makeFramepoints()
+_frameToCode, _maxFramepoint = _makeLookup(_framepoints)
+
+def framesToCode(nframes):
+    nframes = np.minimum(_maxFramepoint, nframes)
+    return _frameToCode[nframes]
+
+def codeToFrames(code):
+    return _framepoints[code]
+
+def downsampleFrames(nframes):
+    return codeToFrames(framesToCode(nframes))
diff --git a/pbcore/io/align/__init__.py b/pbcore/io/align/__init__.py
new file mode 100644
index 0000000..8cf3971
--- /dev/null
+++ b/pbcore/io/align/__init__.py
@@ -0,0 +1,34 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+from CmpH5IO      import *
+from BamIO        import *
+from BamAlignment import *
+from BlasrIO      import *
diff --git a/pbcore/io/base.py b/pbcore/io/base.py
new file mode 100644
index 0000000..292812c
--- /dev/null
+++ b/pbcore/io/base.py
@@ -0,0 +1,109 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# Base classes for readers and writers.
+# Author: David Alexander
+
+from __future__ import absolute_import
+import gzip
+from os.path import abspath, expanduser
+
+__all__ = [ "ReaderBase", "WriterBase" ]
+
+def isFileLikeObject(o):
+    return hasattr(o, "read") and hasattr(o, "write")
+
+def getFileHandle(filenameOrFile, mode="r"):
+    """
+    Given a filename not ending in ".gz", open the file with the
+    appropriate mode.
+
+    Given a filename ending in ".gz", return a filehandle to the
+    unzipped stream.
+
+    Given a file object, return it unless the mode is incorrect--in
+    that case, raise an exception.
+    """
+    assert mode in ("r", "w")
+
+    if isinstance(filenameOrFile, basestring):
+        filename = abspath(expanduser(filenameOrFile))
+        if filename.endswith(".gz"):
+            return gzip.open(filename, mode)
+        else:
+            return open(filename, mode)
+    elif isFileLikeObject(filenameOrFile):
+        return filenameOrFile
+    else:
+        raise Exception("Invalid type to getFileHandle")
+
+class ReaderBase(object):
+    def __init__(self, f):
+        """
+        Prepare for iteration through the records in the file
+        """
+        self.file = getFileHandle(f, "r")
+
+    def close(self):
+        """
+        Close the underlying file
+        """
+        self.file.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def __repr__(self):
+        return "<%s for %s>" % (type(self).__name__, self.filename)
+
+class WriterBase(object):
+    def __init__(self, f):
+        """
+        Prepare for output to the file
+        """
+        self.file = getFileHandle(f, "w")
+
+    def close(self):
+        """
+        Close the underlying file
+        """
+        self.file.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def __repr__(self):
+        return "<%s for %s>" % (type(self).__name__, self.filename)
diff --git a/pbcore/io/opener.py b/pbcore/io/opener.py
new file mode 100644
index 0000000..094b173
--- /dev/null
+++ b/pbcore/io/opener.py
@@ -0,0 +1,134 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+
+__all__ = [ "openAlignmentFile",
+            "openIndexedAlignmentFile",
+            "openFasta",
+            "openIndexedFasta",
+            "entryPoint" ]
+
+from pbcore.io import (FastaTable, FastaReader,
+                       BaxH5Reader, BasH5Reader, BasH5Collection,
+                       CmpH5Reader, BamReader, IndexedBamReader,
+                       GffReader, FastqReader)
+
+def openIndexedAlignmentFile(fname, referenceFasta=None):
+    """
+    Factory function to get a handle to a reader for an alignment file
+    (cmp.h5 or BAM), requiring index capability (built-in for cmp.h5;
+    requires bam.pbi index for BAM
+
+    The reference FASTA, if provided, must have a FASTA index
+    (fasta.fai).
+    """
+    if fname.endswith("cmp.h5"):
+        return CmpH5Reader(fname)
+    elif fname.endswith("bam"):
+        return IndexedBamReader(fname, referenceFasta)
+    else:
+        raise ValueError, "Invalid alignment file suffix"
+
+def openAlignmentFile(fname, referenceFasta=None):
+    """
+    Factory function to get a handle to a reader for an alignment file
+    (cmp.h5 or BAM), not requiring index capability
+    """
+    if fname.endswith("cmp.h5"):
+        return CmpH5Reader(fname)
+    elif fname.endswith("bam"):
+        try:
+            return IndexedBamReader(fname, referenceFasta)
+        except IOError:
+            return BamReader(fname, referenceFasta)
+
+def openIndexedFasta(fname):
+    """
+    Factory function to get a handle to a FASTA reader, requiring
+    random access capability via the fasta.fai index.
+    """
+    return FastaTable(fname)
+
+def openFasta(fname):
+    """
+    Factory function to get a handle to a FASTA reader, requiring only
+    iteration over the contigs.
+    """
+    try:
+        return FastaTable(fname)
+    except IOError:
+        # TODO: would be better to have a more specific error type
+        return FastaReader(fname)
+
+def _openerFor(ext):
+    if   ext == "gff":           return GffReader
+    elif ext in ("fq", "fastq"): return FastqReader
+    elif ext in ("fa", "fasta"): return openFasta
+    elif ext == "cmp.h5":        return CmpH5Reader
+    elif ext == "bas.h5":        return BasH5Reader
+    elif ext == "bax.h5":        return BaxH5Reader
+    elif ext == "fofn":          return BasH5Collection
+    elif ext == "bam":           return openAlignmentFile
+    else:
+        raise ValueError, ("No known opener class for extension %s" % ext)
+
+def _extension(fname):
+    parts = fname.split(".")
+    if parts[-1] == "h5":
+        return ".".join(parts[-2:])
+    else:
+        return parts[-1]
+
+def _openAny(fname, *extraArgs):
+    ext = _extension(fname)
+    opener = _openerFor(ext)
+    return opener(fname, *extraArgs)
+
+def entryPoint():
+    """
+    This entry point (callable from the command line as ".open")
+    provides a convenient way to load up a data file for inspection.
+    """
+    import sys, code
+
+    if len(sys.argv) < 2:
+        print "Requires at least one argument!"
+        return 1
+
+    fname = sys.argv[1]
+    extraArgs = sys.argv[2:]
+
+    f = _openAny(fname, *extraArgs)
+    banner = "Your file has been opened as object 'f'"
+    try:
+        from IPython import embed
+        embed(banner1=banner)
+    except ImportError:
+        code.InteractiveConsole(locals=locals()).interact(banner=banner)
diff --git a/pbcore/io/rangeQueries.py b/pbcore/io/rangeQueries.py
new file mode 100644
index 0000000..0832696
--- /dev/null
+++ b/pbcore/io/rangeQueries.py
@@ -0,0 +1,182 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+import h5py as h
+import numpy as n
+import bisect
+
+def rightmostBinSearch(vec, val):
+    """
+    Return the rightmost position in the vector vec of val. If val is
+    absent then we return the leftmost position of the value:
+    min(vec[vec > val]). If val is greater than all elements in vec we
+    return len(vec).
+    """
+    assert(len(vec) > 0)
+
+    i = bisect.bisect_left(vec, val)
+
+    if (len(vec) == i):
+        return(i)
+
+    while (i + 1 < len(vec) and vec[i + 1] == val):
+        i += 1
+
+    return(i)
+
+def leftmostBinSearch(vec, val):
+    """
+    Return the leftmost position in the vector vec of val. If val is
+    absent then we return the lefternmost position for the value:
+    max(vec[vec < val]). The time complexity here is potentially worse
+    than log(n) because of the extra step of walking backwards.
+    """
+    assert(len(vec) > 0)
+    i = bisect.bisect_left(vec, val)
+
+    if (i == 0):
+        return(i)
+    elif (i == len(vec)):
+        v = vec[i-1]
+        i -= 1
+    else:
+        v = vec[i]
+
+    if (v > val):
+        i -= 1
+
+    while (i > 0 and vec[i-1] == vec[i]):
+        i -= 1
+
+    return(i)
+
+
+def getOverlappingRanges(tStart, tEnd, nBack, nOverlap, rangeStart, rangeEnd):
+    """
+    Return indices overlapping the range defined by [rangeStart,
+    rangeEnd). Here tStart, tEnd, nBack, nOverlap are vectors of
+    length n sorted according to tStart and tEnd. The vectors nBack
+    and nOverlap are typically produced by computeIndices[DP].
+    """
+    assert(rangeEnd > rangeStart and
+           len(tStart) == len(tEnd) == len(nBack) == len(nOverlap))
+
+    lM = leftmostBinSearch(tStart, rangeStart)
+    lM = lM - nBack[lM]
+    rM = rightmostBinSearch(tStart, rangeEnd - .5)
+
+    assert(rM >= lM and rM >= 0 and lM >= 0)
+
+    if (lM == rM):
+        return(n.array([], dtype = "uint32"))
+    else:
+        # We only keep the reads in the range lM .. rM that
+        # actually overlap the range, as determined by
+        # tEnd > rangeStart
+        idxs   = n.arange(lM, rM, dtype = "uint32")   # lM .. rM
+        toKeep = tEnd[idxs] > rangeStart
+        return(idxs[toKeep])
+
+def projectIntoRange(tStart, tEnd, winStart, winEnd):
+    """
+    Find coverage in the range [winStart, winEnd) implied by tStart,
+    tEnd vectors.  Coverage can be most efficiently calculated by
+    first obtaining all reads overlapping the range using the
+    getOverlappingRanges function then projecting them into the same
+    or smaller range
+    """
+    assert(len(tStart) == len(tEnd))
+    res = n.zeros(shape=winEnd-winStart, dtype=n.uint)
+    # Clip to window and translate.
+    # Be careful to avoid underflow!
+    tStart_ = n.clip(tStart, winStart, winEnd) - winStart
+    tEnd_   = n.clip(tEnd,   winStart, winEnd) - winStart
+    for (s, e) in zip(tStart_, tEnd_):
+        res[s:e] += 1
+    return res
+
+def makeReadLocator(cmpH5, refSeq):
+    """
+    Return a function which can be called iteratively to find reads
+    quickly.
+    """
+    if not cmpH5.isSorted: raise Exception, "CmpH5 is not sorted"
+    offsets = cmpH5.file["/RefGroup/OffsetTable"].value
+    offStart, offEnd = offsets[offsets[:,0] == refSeq, 1:3].ravel()
+
+    if (offEnd - offStart > 0):
+        refAlignIdx = cmpH5.alignmentIndex[offStart:offEnd, ]
+        returnEmpty = False
+    else:
+        refAlignIdx = cmpH5.alignmentIndex[1:2, ]
+        returnEmpty = True
+
+    def f(rangeStart, rangeEnd, justIndices = False):
+        if returnEmpty:
+            ## This looks strange, but the idea is that a rowless matrix
+            ## still has columns and these are what I want to preserve --
+            ## h5py objects cannot be subset by a vector of length 0,
+            ## however, numpy allows this.
+            idxs = n.array([], dtype = 'uint32')
+        else:
+            idxs = getOverlappingRanges(refAlignIdx.tStart, refAlignIdx.tEnd,
+                                        refAlignIdx.nBackRead, refAlignIdx.nReadOverlap,
+                                        rangeStart, rangeEnd)
+        if justIndices:
+            return(idxs + offStart)
+        else:
+            return(refAlignIdx[idxs,])
+    return f
+
+def getReadsInRange(cmpH5, coords, justIndices = False):
+    """
+    Return an ndarray representing the portion of the reads which
+    overlap the range specfied by coords, where coords is a
+    three-tuple composed of (refSeqID, rangeStart, rangeEnd).  Here,
+    cmpH5 is an hdf5 object representing a pointer to a sorted cmp.h5
+    file.
+    """
+    if not cmpH5.isSorted: raise Exception, "CmpH5 is not sorted"
+    return makeReadLocator(cmpH5, coords[0])(coords[1], coords[2], justIndices)
+
+def getCoverageInRange(cmpH5, coords, rowNumbers=None):
+    """
+    Return a vector of length: coords[2] - coords[1] where each
+    element represents the number of reads overlapping that position
+    in the cmp.h5 file.
+    """
+    if not cmpH5.isSorted: raise Exception, "CmpH5 is not sorted"
+    if rowNumbers==None:
+        rowNumbers  = getReadsInRange(cmpH5, coords, justIndices=True)
+    if (len(rowNumbers))==0:
+        return n.array([0]*(coords[2] - coords[1]))
+    else:
+        return(projectIntoRange(cmpH5.tStart[rowNumbers], cmpH5.tEnd[rowNumbers], coords[1], coords[2]))
+
diff --git a/pbcore/model/__init__.py b/pbcore/model/__init__.py
new file mode 100644
index 0000000..6861c47
--- /dev/null
+++ b/pbcore/model/__init__.py
@@ -0,0 +1,29 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
diff --git a/pbcore/sequence.py b/pbcore/sequence.py
new file mode 100644
index 0000000..b3aa1b1
--- /dev/null
+++ b/pbcore/sequence.py
@@ -0,0 +1,62 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+# sequence.py: module of basic sequence methods
+# Authors: Brett Bowman, David Alexander
+
+__all__ = [ "complement",
+            "reverseComplement"]
+
+from string import maketrans
+import re
+
+DNA_COMPLEMENT = maketrans('agctAGCT-N', 'tcgaTCGA-N')
+
+def reverse( sequence ):
+    """Return the reverse of any sequence
+    """
+    return sequence[::-1]
+
+def complement( sequence ):
+    """
+    Return the complement of a sequence
+    NOTE: This only currently supports DNA
+    """
+    if re.search('[^AGCTNagctn-]', sequence):
+        raise ValueError("Sequence contains invalid DNA characters - "
+                         "only [AGCTN-] allowed")
+    return sequence.translate( DNA_COMPLEMENT )
+
+def reverseComplement( sequence ):
+    """
+    Return the reverse-complement of a sequence
+    NOTE: This only currently supports DNA
+    """
+    return complement(sequence)[::-1]
diff --git a/pbcore/util/Process.py b/pbcore/util/Process.py
new file mode 100644
index 0000000..bd57878
--- /dev/null
+++ b/pbcore/util/Process.py
@@ -0,0 +1,68 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+__doc__="""Useful functions for interacting with processes."""
+import sys
+import os
+import subprocess
+
+def backticks( cmd, merge_stderr=True ):
+    """
+    Simulates the perl backticks (``) command with error-handling support
+    Returns ( command output as sequence of strings, error code, error message )
+    """
+    if merge_stderr:
+        _stderr = subprocess.STDOUT
+    else:
+        _stderr = subprocess.PIPE
+
+    p = subprocess.Popen( cmd, shell=True, stdin=subprocess.PIPE,
+                          stdout=subprocess.PIPE, stderr=_stderr,
+                          close_fds=True )
+
+    out = [ l[:-1] for l in p.stdout.readlines() ]
+
+    p.stdout.close()
+    if not merge_stderr:
+        p.stderr.close()
+
+    # need to allow process to terminate
+    p.wait()
+
+    errCode = p.returncode and p.returncode or 0
+    if p.returncode>0:
+        errorMessage = os.linesep.join(out)
+        output = []
+    else:
+        errorMessage = ''
+        output = out
+
+    return output, errCode, errorMessage
+
diff --git a/pbcore/util/ToolRunner.py b/pbcore/util/ToolRunner.py
new file mode 100644
index 0000000..37ba71e
--- /dev/null
+++ b/pbcore/util/ToolRunner.py
@@ -0,0 +1,115 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+import argparse, cProfile, logging, pstats
+
+
+LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
+
+
+class PBToolRunner(object):
+
+    #
+    # Interface to be overridden in subclasses (client code)
+    #
+    def getVersion(self):
+        raise NotImplementedError()
+
+    def run(self):
+        raise NotImplementedError()
+
+    def validateArgs(self):
+        '''
+        Method to validate args
+        '''
+        pass
+
+    #
+    # Methods below should not be overriden
+    #
+    def __init__(self, description):
+        self._setupParsers(description)
+        self.parser.add_argument(
+            "--verbose", "-v",
+            dest="verbosity", action="count",
+            help="Set the verbosity level")
+        self.parser.add_argument(
+            '--version',
+            action='version', version= '%(prog)s ' + self.getVersion())
+        self.parser.add_argument(
+            "--profile", action="store_true",
+            help="Print runtime profile at exit")
+        self.parser.add_argument(
+            "--debug", action="store_true",
+            help="Catch exceptions in debugger (requires ipdb)")
+
+    def _setupParsers(self, description):
+        self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+                                              description=description)
+
+    def _parseArgs(self):
+        self.args = self.parser.parse_args()
+
+    def _setupLogging(self):
+        if self.args.verbosity >= 2:
+            logLevel = logging.DEBUG
+        elif self.args.verbosity == 1:
+            logLevel = logging.INFO
+        else:
+            logLevel = logging.WARN
+        logging.basicConfig(level=logLevel, format=LOG_FORMAT)
+
+    def start(self):
+        self._parseArgs()
+        self._setupLogging()
+        self.validateArgs()
+
+        if self.args.debug:
+            try:
+                import ipdb
+            except ImportError:
+                print "--debug requires module 'ipdb'"
+                return -1
+            with ipdb.launch_ipdb_on_exception():
+                self.run()
+
+        elif self.args.profile:
+            l = locals()
+            cProfile.runctx("_rv=self.run()", globals(), l, "profile.out")
+            pstats.Stats("profile.out").sort_stats("time").print_stats(20)
+            return l["_rv"]
+        else:
+            return self.run()
+
+class PBMultiToolRunner(PBToolRunner):
+    def _setupParsers(self, description):
+        self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+                                              description=description)
+        self.subParsers = self.parser.add_subparsers(dest="subCommand")
diff --git a/pbcore/util/__init__.py b/pbcore/util/__init__.py
new file mode 100644
index 0000000..6861c47
--- /dev/null
+++ b/pbcore/util/__init__.py
@@ -0,0 +1,29 @@
+#################################################################################
+# Copyright (c) 2011-2015, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
diff --git a/pbcore/util/decorators.py b/pbcore/util/decorators.py
new file mode 100644
index 0000000..660ed9f
--- /dev/null
+++ b/pbcore/util/decorators.py
@@ -0,0 +1,17 @@
+import warnings
+
+def deprecated(func):
+    '''This is a decorator which can be used to mark functions
+    as deprecated. It will result in a warning being emitted
+    when the function is used.'''
+    def new_func(*args, **kwargs):
+        if not new_func.__called:
+            warnings.warn('Call to deprecated function "{0}".'.format(func.__name__),
+                          stacklevel=2)
+            new_func.__called = True
+        return func(*args, **kwargs)
+    new_func.__name__ = func.__name__
+    new_func.__doc__ = func.__doc__
+    new_func.__dict__.update(func.__dict__)
+    new_func.__called = False
+    return new_func
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..4f5c568
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,32 @@
+from setuptools import setup, Extension, find_packages
+import sys
+
+if ("install" in sys.argv) and sys.version_info < (2, 7, 0):
+    print "pbcore requires Python 2.7"
+    sys.exit(-1)
+
+globals = {}
+execfile("pbcore/__init__.py", globals)
+__VERSION__ = globals["__VERSION__"]
+
+setup(
+    name = 'pbcore',
+    version=__VERSION__,
+    author='Pacific Biosciences',
+    author_email='devnet at pacificbiosciences.com',
+    license=open('LICENSES.txt').read(),
+    packages = find_packages('.'),
+    package_dir = {'':'.'},
+    package_data = {'pbcore': ['data/*.h5', 'data/*.gff', 'data/*.fasta',
+                               'data/*.fasta.fai', 'data/*.fofn', 'data/*.m4',
+                               'data/*.fa', 'data/*.fa.fai',
+                               'data/*.m5', 'data/*.bam', 'data/*.bam.bai', "data/*.bam.pbi",
+                               'chemistry/resources/*.xml']
+                               },
+    zip_safe = False,
+    entry_points = { "console_scripts" : [ ".open = pbcore.io.opener:entryPoint" ] },
+    install_requires=[
+        'h5py >= 2.0.1',
+        'numpy >= 1.6.0',
+        'pysam == 0.8.1'
+    ])
diff --git a/tests/test_pbcore_data.py b/tests/test_pbcore_data.py
new file mode 100644
index 0000000..329ff36
--- /dev/null
+++ b/tests/test_pbcore_data.py
@@ -0,0 +1,12 @@
+from nose.tools import assert_equal
+from pbcore import data
+
+class TestGetCmpH5s:
+    def test_get_cmp_h5s(self):
+        for item in data.getCmpH5s():
+            assert 'cmph5' in item
+            assert 'bash5s' in item
+
+class TestGetCmpH5:
+    def test_get_cmp_h5(self):
+        assert data.getCmpH5().endswith(".cmp.h5")
diff --git a/tests/test_pbcore_io_AlnFileReaders.py b/tests/test_pbcore_io_AlnFileReaders.py
new file mode 100644
index 0000000..2ac62c5
--- /dev/null
+++ b/tests/test_pbcore_io_AlnFileReaders.py
@@ -0,0 +1,375 @@
+from numpy.testing import (assert_array_almost_equal as ASIM,
+                           assert_array_equal        as AEQ)
+from nose.tools import (nottest,
+                        assert_raises,
+                        assert_equal as EQ)
+from nose import SkipTest
+
+import numpy as np
+import bisect
+import h5py
+from collections import Counter
+
+from pbcore import data
+from pbcore.io import CmpH5Reader, BamReader, IndexedBamReader
+from pbcore.sequence import reverseComplement as RC
+from pbcore.chemistry import ChemistryLookupError
+
+
+class _BasicAlnFileReaderTests(object):
+    """
+    Abstract base class for tests of the basic reader
+    functionality---functionality not requiring the bam.pbi index.
+
+    The tests are pretty tailored to the BAM/cmp.h5 files in
+    pbcore.data.
+    """
+    READER_CONSTRUCTOR = None
+    CONSTRUCTOR_ARGS   = None
+    BAX_FILE           = data.getBaxForBam()
+
+    def __init__(self):
+        self.f = self.READER_CONSTRUCTOR(*self.CONSTRUCTOR_ARGS)
+        self.alns = list(self.f)
+        self.fwdAln = self.alns[70]
+        self.revAln = self.alns[71]
+
+    def testBasicOperations(self):
+        EQ(False, self.f.isEmpty)
+        EQ(True,  self.f.isSorted)
+        EQ(115,   len(self.f))
+
+    def testStrandOrientation(self):
+        EQ(True,  self.fwdAln.isForwardStrand)
+        EQ(False, self.fwdAln.isReverseStrand)
+        EQ(False, self.revAln.isForwardStrand)
+        EQ(True,  self.revAln.isReverseStrand)
+
+    def testReadName(self):
+        EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957/9681_9727",
+           self.fwdAln.readName)
+        EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957/9561_9619",
+           self.revAln.readName)
+
+    def testAlignedRead(self):
+        expectedFwdNative = "TACGGTCATCATCTGACACTACAGACTCTGGCATCGCTGTGAAGAC"
+        EQ(expectedFwdNative, self.fwdAln.read(aligned=True))
+        EQ(expectedFwdNative, self.fwdAln.read())
+        EQ(expectedFwdNative, self.fwdAln.read(orientation="genomic"))
+        expectedRevNative = "CTTGTGAAAATGCTGAATTCT-GCGTCG-CTTCACCAGCGATGCCA-AGTCTGTAGTGTCA"
+        EQ(expectedRevNative, self.revAln.read(aligned=True))
+        EQ(expectedRevNative, self.revAln.read())
+        EQ(RC(expectedRevNative), self.revAln.read(orientation="genomic"))
+
+    def testUnalignedRead(self):
+        expectedFwdNative = "TACGGTCATCATCTGACACTACAGACTCTGGCATCGCTGTGAAGAC"
+        EQ(expectedFwdNative, self.fwdAln.read(aligned=False))
+        EQ(expectedFwdNative, self.fwdAln.read(aligned=False, orientation="genomic"))
+        expectedRevNative = "CTTGTGAAAATGCTGAATTCTGCGTCGCTTCACCAGCGATGCCAAGTCTGTAGTGTCA"
+        EQ(expectedRevNative, self.revAln.read(aligned=False))
+        EQ(RC(expectedRevNative), self.revAln.read(aligned=False, orientation="genomic"))
+
+    def testAlignedReference(self):
+        expectedFwdNative = "TACGGTCATCATCTGACACTACAGACTCTGGCATCGCTGTGAAGAC"
+        EQ(expectedFwdNative, self.fwdAln.reference(aligned=True))
+        EQ(expectedFwdNative, self.fwdAln.reference())
+        EQ(expectedFwdNative, self.fwdAln.reference(orientation="genomic"))
+        expectedRevNative = "CTTGTGAAAATGCTGAATT-TCGCGTCGTCTTCA-CAGCGATGCCAGAGTCTGTAGTGTCA"
+        EQ(expectedRevNative, self.revAln.reference(aligned=True))
+        EQ(expectedRevNative, self.revAln.reference())
+        EQ(RC(expectedRevNative), self.revAln.reference(orientation="genomic"))
+
+    def testUnalignedReference(self):
+        expectedFwdNative = "TACGGTCATCATCTGACACTACAGACTCTGGCATCGCTGTGAAGAC"
+        EQ(expectedFwdNative, self.fwdAln.reference(aligned=False))
+        EQ(expectedFwdNative, self.fwdAln.reference(aligned=False, orientation="genomic"))
+        expectedRevNative = "CTTGTGAAAATGCTGAATTTCGCGTCGTCTTCACAGCGATGCCAGAGTCTGTAGTGTCA"
+        EQ(expectedRevNative, self.revAln.reference(aligned=False))
+        EQ(RC(expectedRevNative), self.revAln.reference(aligned=False, orientation="genomic"))
+
+    def testDeletionQV(self):
+        expectedFwdNative = [ 17,  17,   7,  17,  17,   6,  17,  17,  17,  17,  17,  17,  17,
+                              17,  17,  17,  17,  17,  17,  17,  17,  17,  17,  17,  17,  17,
+                              17,  17,  17,  17,  17,  17,   7,  17,  17,  17,  17,  17,  17,
+                              17,  17,  17,  17,  17,  17,  17 ]
+        AEQ(expectedFwdNative, self.fwdAln.DeletionQV(aligned=True))
+        AEQ(expectedFwdNative, self.fwdAln.DeletionQV())
+        AEQ(expectedFwdNative, self.fwdAln.DeletionQV(orientation="genomic"))
+
+        expectedRevNative = [ 17,  17,  17,  17,  17,  17,  17,  17,  17,  17,  17,  17,  17,
+                              17,  17,  17,  17,  17,  17,  17,  17, 255,   7,  17,  17,  17,
+                              17,  17, 255,   6,  17,  17,  17,  17,  17,  17,  17,  17,  17,
+                              17,  17,  17,  17,  17,  17,  17, 255,  17,  17,  17,  17,  17,
+                              17,  17,  17,  17,  17,  17,  17,  17,  17 ]
+        AEQ(expectedRevNative, self.revAln.DeletionQV(aligned=True))
+        AEQ(expectedRevNative, self.revAln.DeletionQV())
+        AEQ(expectedRevNative[::-1], self.revAln.DeletionQV(orientation="genomic"))
+
+
+    # def testInsertionQV(self):
+    #     pass
+
+    # def testSubstitutionQV(self):
+    #     pass
+
+    # def testIPD(self):
+    #     pass
+
+    def testDeletionTag(self):
+        expectedFwdNative = [78, 78, 84, 78, 78, 67, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+                             78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 65, 78,
+                             78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78]
+        AEQ(expectedFwdNative, self.fwdAln.DeletionTag(aligned=True))
+        AEQ(expectedFwdNative, self.fwdAln.DeletionTag())
+        AEQ(expectedFwdNative, self.fwdAln.DeletionTag(orientation="genomic"))
+
+        expectedRevNative = [78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+                             78, 78, 78, 78, 45, 67, 78, 78, 78, 78, 78, 45, 84, 78, 78, 78, 78,
+                             78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 45, 78, 78, 78, 78,
+                             78, 78, 78, 78, 78, 78, 78, 78, 78, 78]
+        AEQ(expectedRevNative, self.revAln.DeletionTag(aligned=True))
+        AEQ(expectedRevNative, self.revAln.DeletionTag())
+
+        # TODO: what is the correct behavior here?
+        #AEQ(expectedRevNative[::-1], self.revAln.DeletionTag(orientation="genomic"))
+
+    def testClippedAlignments(self):
+        # Get a more interesting (more gappy) fwd strand aln
+        a = self.alns[2]
+        EQ([(980, 'C', 'C'),
+            (981, 'C', 'C'),
+            (982, 'T', 'T'),
+            (983, 'A', '-'),
+            (984, 'C', 'C'),
+            (985, '-', 'G'),
+            (985, 'T', 'T'),
+            (986, 'T', 'T') ],
+           zip(a.referencePositions(), a.reference(), a.read())[308:316])
+
+        ac1 = a.clippedTo(983, 985)
+        EQ(983, ac1.referenceStart)
+        EQ(985, ac1.referenceEnd)
+        EQ([(983, 'A', '-'),
+            (984, 'C', 'C')],
+           zip(ac1.referencePositions(), ac1.reference(), ac1.read()))
+
+        ac2 = a.clippedTo(982, 986)
+        EQ(982, ac2.referenceStart)
+        EQ(986, ac2.referenceEnd)
+        EQ([(982, 'T', 'T'),
+            (983, 'A', '-'),
+            (984, 'C', 'C'),
+            (985, '-', 'G'),
+            (985, 'T', 'T')],
+           zip(ac2.referencePositions(), ac2.reference(), ac2.read()))
+
+        ac3 = a.clippedTo(984, 985)
+        EQ(984, ac3.referenceStart)
+        EQ(985, ac3.referenceEnd)
+        EQ([(984, 'C', 'C')],
+           zip(ac3.referencePositions(), ac3.reference(), ac3.read()))
+
+        # Get a more interesting (more gappy) rev strand aln
+        b = self.alns[3]
+        EQ([(2216, 'G', 'G'),
+            (2215, 'G', 'G'),
+            (2214, '-', 'C'),
+            (2214, 'C', 'C'),
+            (2213, 'A', 'A'),
+            (2212, 'T', 'T'),
+            (2211, 'G', 'G'),
+            (2210, 'C', 'C'),
+            (2209, 'T', 'T'),
+            (2208, 'G', '-'),
+            (2207, 'G', 'G'),
+            (2206, 'C', 'C')],
+           zip(b.referencePositions(), b.reference(), b.read())[188:200])
+
+        bc1 = b.clippedTo(2208, 2214)
+        EQ([(2213, 'A', 'A'),
+            (2212, 'T', 'T'),
+            (2211, 'G', 'G'),
+            (2210, 'C', 'C'),
+            (2209, 'T', 'T'),
+            (2208, 'G', '-')],
+           zip(bc1.referencePositions(), bc1.reference(), bc1.read()))
+
+        bc2 = b.clippedTo(2207, 2215)
+        EQ([(2214, 'C', 'C'),
+            (2213, 'A', 'A'),
+            (2212, 'T', 'T'),
+            (2211, 'G', 'G'),
+            (2210, 'C', 'C'),
+            (2209, 'T', 'T'),
+            (2208, 'G', '-'),
+            (2207, 'G', 'G')],
+           zip(bc2.referencePositions(), bc2.reference(), bc2.read()))
+
+        bc3 = b.clippedTo(2209, 2214)
+        EQ([(2213, 'A', 'A'),
+            (2212, 'T', 'T'),
+            (2211, 'G', 'G'),
+            (2210, 'C', 'C'),
+            (2209, 'T', 'T')],
+           zip(bc3.referencePositions(), bc3.reference(), bc3.read()))
+
+
+        # Test clipping in a large deletion
+        d = self.alns[52]
+        EQ([(16191, 'C', 'C'),
+            (16192, 'A', 'A'),
+            (16193, 'G', 'G'),
+            (16194, 'C', 'C'),
+            (16195, 'A', 'A'),
+            (16196, 'G', '-'),
+            (16197, 'G', '-'),
+            (16198, 'T', '-'),
+            (16199, 'G', 'G'),
+            (16200, 'A', 'A'),
+            (16201, 'G', 'G')],
+           zip(d.referencePositions(), d.reference(), d.read())[129:140])
+        dc1 = d.clippedTo(16196, 16198)
+
+        # where's the test code?
+
+    def testBaxAttaching(self):
+        # Before attaching, should get sane exceptions
+        with assert_raises(ValueError):
+           self.fwdAln.zmw
+
+        with assert_raises(ValueError):
+           self.fwdAln.zmwRead
+
+        # Now attach
+        self.f.attach(self.BAX_FILE)
+        EQ('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957/9681_9727',
+           self.fwdAln.readName)
+        EQ('m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957'
+           , self.fwdAln.zmwName)
+        EQ('<Zmw: m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957>',
+           repr(self.fwdAln.zmw))
+        EQ('<ZmwRead: m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/7957/9681_9727>',
+           repr(self.fwdAln.zmwRead))
+
+        # Check read contents, for every aln.
+        for aln in self.alns:
+            EQ(aln.read(aligned=False, orientation="native"), aln.zmwRead.basecalls())
+
+
+    def testClippingsVsBaxData(self):
+        self.f.attach(self.BAX_FILE)
+        for aln in [self.alns[52], self.alns[8]]:
+            for cS in xrange(aln.tStart, aln.tEnd + 1):
+                for cE in xrange(cS + 1, min(aln.tEnd, cS + 10)):
+                    ca = aln.clippedTo(cS, cE)
+                    EQ(ca.zmwRead.basecalls(),
+                       ca.read(aligned=False, orientation="native"))
+
+    def testReadsInRange(self):
+        wLen = 1000
+        for wStart in xrange(0, 50000, wLen):
+            wEnd = wStart + wLen
+            expectedNames = set([ a.readName for a in self.alns
+                                  if (a.referenceName == "lambda_NEB3011" and
+                                      a.overlapsReferenceRange(wStart, wEnd)) ])
+            EQ(expectedNames,
+               set([ a.readName for a in self.f.readsInRange("lambda_NEB3011", wStart, wEnd) ]))
+
+    def testReadGroupTable(self):
+        rgFwd = self.fwdAln.readGroupInfo
+        EQ([('ID', '<i4'), ('MovieName', 'O'), ('ReadType', 'O'), ('SequencingChemistry', 'O')], rgFwd.dtype)
+        EQ("P6-C4", rgFwd.SequencingChemistry)
+        EQ("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0", rgFwd.MovieName)
+        #EQ("bar", rgFwd.ReadType)
+
+    def testSequencingChemistry(self):
+        EQ(["P6-C4"], self.f.sequencingChemistry)
+        EQ("P6-C4", self.fwdAln.sequencingChemistry)
+        EQ("P6-C4", self.revAln.sequencingChemistry)
+
+
+
+class _IndexedAlnFileReaderTests(_BasicAlnFileReaderTests):
+    """
+    Abstract base class for tests of the reader functionality
+    requiring an alignment index (or bam.pbi index)
+    """
+
+    def testMapQV(self):
+        c = Counter(self.f.mapQV)
+        EQ(Counter({254: 115}), c)
+
+    def testHoleNumbers(self):
+        c  = Counter([a.holeNumber for a in self.f])   # from records
+        c2 = Counter(self.f.holeNumber)                # from index
+        expected = Counter({37134: 14, 6251: 10, 32861: 8, 14743: 4, 35858: 3,
+                            39571: 3, 13473: 3, 32560: 3, 46835: 3, 47698: 3, 16996: 3,
+                            30983: 2, 38025: 2, 36363: 2, 7957: 2, 49050: 2, 23454: 2,
+                            49194: 2, 24494: 2, 20211: 2, 50621: 2, 12736: 2, 19915: 2,
+                            6469: 2, 31174: 2, 32328: 2, 42827: 2, 7247: 2, 50257: 2,
+                            2771: 2, 1650: 2, 45203: 2, 24962: 1, 32901: 1, 36628: 1,
+                            26262: 1, 15641: 1, 19360: 1, 42165: 1, 44356: 1, 51534: 1,
+                            29843: 1, 38754: 1, 52206: 1, 49521: 1, 7670: 1, 54396: 1,
+                            19837: 1})
+        EQ(expected, c)
+        EQ(expected, c2)
+
+    def testAlignedIdentity(self):
+        pass
+
+    def testReadsByName(self):
+        reads2771_1 = self.f.readsByName("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771/*")
+        reads2771_2 = self.f.readsByName("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771")
+        reads2771_3 = self.f.readsByName("m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771/")
+
+        expectedReadNames = ["m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771/8741_8874",
+                             "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0/2771/8942_9480"]
+
+        EQ(expectedReadNames, [r.readName for r in reads2771_1])
+        EQ(expectedReadNames, [r.readName for r in reads2771_2])
+        EQ(expectedReadNames, [r.readName for r in reads2771_3])
+
+
+class TestCmpH5(_IndexedAlnFileReaderTests):
+    READER_CONSTRUCTOR = CmpH5Reader
+    CONSTRUCTOR_ARGS   = (data.getBamAndCmpH5()[1],)
+
+    #
+    # Test behaviors specific to CmpH5Reader, which should be few.
+    #
+    def testLazyChemistryResolution(self):
+        """
+        The CmpH5Reader allows reading of files that have missing
+        chemistry information---an exception will be thrown only upon
+        attempts to access the information.  We need to retain this
+        behavior for compatibility.  """
+        oldCmpH5 = data.getCmpH5()
+
+        C = CmpH5Reader(oldCmpH5) # no exception here
+
+        with assert_raises(ChemistryLookupError):
+            C.sequencingChemistry
+
+        with assert_raises(ChemistryLookupError):
+            C[0].sequencingChemistry
+
+
+class TestBasicBam(_BasicAlnFileReaderTests):
+    READER_CONSTRUCTOR = BamReader
+    CONSTRUCTOR_ARGS   = (data.getBamAndCmpH5()[0], data.getLambdaFasta())
+
+    def testSpecVersion(self):
+        EQ("3.0b3",     self.f.version)
+
+    # def testNoLegacyBamTags(self):
+    #     # junk from older PacBio BAM spec versions doesn't belong
+    #     tagNames = [x[0] for x in self.fwdAln.peer.tags]
+    #     EQ(set(["RG",
+    #             "qs", "qe", "zm", "np", "rq",
+    #             "dq", "dt", "iq", "mq", "sq"]),
+    #        set(tagNames))
+
+
+class TestIndexedBam(_IndexedAlnFileReaderTests):
+    READER_CONSTRUCTOR = IndexedBamReader
+    CONSTRUCTOR_ARGS   = (data.getBamAndCmpH5()[0], data.getLambdaFasta())
diff --git a/tests/test_pbcore_io_BarcodeH5Reader.py b/tests/test_pbcore_io_BarcodeH5Reader.py
new file mode 100644
index 0000000..eb26f89
--- /dev/null
+++ b/tests/test_pbcore_io_BarcodeH5Reader.py
@@ -0,0 +1,141 @@
+import nose.tools
+import numpy
+import numpy.testing
+
+import pbcore.data
+
+from pbcore.data import MOVIE_NAME_BC
+from pbcore.io.BarcodeH5Reader import BarcodeH5Reader, BarcodeH5Fofn, MPBarcodeH5Reader, LabeledZmw
+
+class TestBarcodeH5Reader(object):
+    """Tests of BarcodeH5Reader against a generic BarcodeH5 file
+    """
+
+    def __init__(self):
+        bcFiles = pbcore.data.getBcH5s()
+        print bcFiles
+        self.bc1, self.bc2, self.bc3 = map(BarcodeH5Reader, bcFiles)
+
+    def test_BarcodeH5Reader_basicTest(self):
+        """Test that BcH5Reader correctly sets movie name, barcode labels, and hole numbers
+        """
+
+        nose.tools.assert_equal(MOVIE_NAME_BC, self.bc1.movieName)
+        numpy.testing.assert_array_equal(["F3--R3", "F4--R4", "F6--R6", "F7--R7"],
+                                         self.bc1.barcodeLabels)
+        numpy.testing.assert_array_equal([ 922, 1416, 1436, 1466, 1480, 1551,
+                                          1561, 1564, 1765, 1902, 1925, 1982,
+                                          2111, 2133, 2136, 2139, 2210, 2306],
+                                          self.bc1.holeNumbers)
+
+        nose.tools.assert_equal(MOVIE_NAME_BC, self.bc2.movieName)
+        numpy.testing.assert_array_equal(["F3--R3", "F4--R4", "F6--R6", "F7--R7"],
+                                         self.bc2.barcodeLabels)
+        numpy.testing.assert_array_equal([54505, 54506, 54507, 54516, 54535, 54542,
+                                          54543, 54547, 54562, 54588, 54618, 54622,
+                                          54632, 54633, 54645, 54650, 54653, 54658],
+                                          self.bc2.holeNumbers)
+
+        nose.tools.assert_equal(MOVIE_NAME_BC, self.bc3.movieName)
+        numpy.testing.assert_array_equal(["F3--R3", "F4--R4", "F6--R6", "F7--R7"],
+                                         self.bc3.barcodeLabels)
+        numpy.testing.assert_array_equal([108990, 109015, 109016, 109017, 109021, 109023,
+                                          109029, 109031, 109032, 109033, 109036, 109040,
+                                          109042, 109045, 109047, 109071, 109075, 109081],
+                                          self.bc3.holeNumbers)
+
+    def test_BarcodeH5Reader_iterator(self):
+        """Test that BcH5Reader correctly iterates over it's labeled ZMWs
+        """
+
+        labeledZmws1 = [ lZmw for lZmw in self.bc1.labeledZmws.values() ]
+        sortedZmws1 = sorted(labeledZmws1, key=lambda z: z.holeNumber)
+        nose.tools.assert_equal(sortedZmws1, list(self.bc1))
+
+        labeledZmws2 = [ lZmw for lZmw in self.bc2.labeledZmws.values() ]
+        sortedZmws2 = sorted(labeledZmws2, key=lambda z: z.holeNumber)
+        nose.tools.assert_equal(sortedZmws2, list(self.bc2))
+
+        labeledZmws3 = [ lZmw for lZmw in self.bc3.labeledZmws.values() ]
+        sortedZmws3 = sorted(labeledZmws3, key=lambda z: z.holeNumber)
+        nose.tools.assert_equal(sortedZmws3, list(self.bc3))
+
+class TestBarcodeH5Fofn(object):
+    """Tests of BarcodeH5RFofn against a generic 3 generic BarcodeH5 file
+    """
+
+    def __init__(self):
+        bcFofn = pbcore.data.getBcFofn()
+        print bcFofn
+        self.bcFofn = BarcodeH5Fofn(bcFofn)
+        print self.bcFofn
+
+    def test_BasH5Fofn_basicTest(self):
+        """Test that BcH5Fofn correctly sets movie name, barcode labels, and hole numbers
+        """
+
+        nose.tools.assert_equal(1, len(self.bcFofn.movieNames))
+        numpy.testing.assert_array_equal(MOVIE_NAME_BC, self.bcFofn.movieNames[0])
+        numpy.testing.assert_array_equal(["F3--R3", "F4--R4", "F6--R6", "F7--R7"],
+                                         self.bcFofn.barcodeLabels)
+        nose.tools.assert_equal("paired", self.bcFofn.scoreMode)
+
+        numpy.testing.assert_array_equal([ 922, 1416, 1436, 1466, 1480, 1551,
+                                          1561, 1564, 1765, 1902, 1925, 1982,
+                                          2111, 2133, 2136, 2139, 2210, 2306,
+                                          54505, 54506, 54507, 54516, 54535, 54542,
+                                          54543, 54547, 54562, 54588, 54618, 54622,
+                                          54632, 54633, 54645, 54650, 54653, 54658,
+                                          108990, 109015, 109016, 109017, 109021, 109023,
+                                          109029, 109031, 109032, 109033, 109036, 109040,
+                                          109042, 109045, 109047, 109071, 109075, 109081],
+                                          self.bcFofn.holeNumbers)
+
+    def test_BcH5Fofn_iterator(self):
+        """Test that BcH5Fofn correctly iterates over it's labeled ZMWs
+        """
+
+        labeledZmws = [ lZmw for reader in self.bcFofn._bcH5s
+                             for lZmw in reader ]
+        nose.tools.assert_equal(labeledZmws, list(self.bcFofn))
+
+    def test_BcH5Fofn_indexing(self):
+        """Test that BcH5Fofn's indexing correctly slices and returns its contents
+        """
+
+        holeNumTest = self.bcFofn[922]
+        nose.tools.assert_true(isinstance(holeNumTest, LabeledZmw))
+        nose.tools.assert_equal(holeNumTest.holeNumber, 922)
+
+        barcodeTest = self.bcFofn["F3--R3"]
+        nose.tools.assert_true(isinstance(barcodeTest, list))
+        barcodeTestHoleNums = [lzmw.holeNumber for lzmw in barcodeTest]
+        numpy.testing.assert_array_equal([ 1416,  1551,  1561,   1765,   1902,   1925,   2133,
+                                          54506, 54588, 54618, 109033, 109036, 109071, 109081],
+                                         barcodeTestHoleNums)
+
+        movieTest = self.bcFofn[MOVIE_NAME_BC]
+        nose.tools.assert_true(isinstance(movieTest, MPBarcodeH5Reader))
+        movieTestHoleNums = [lzmw.holeNumber for lzmw in movieTest]
+        numpy.testing.assert_array_equal([ 922, 1416, 1436, 1466, 1480, 1551,
+                                          1561, 1564, 1765, 1902, 1925, 1982,
+                                          2111, 2133, 2136, 2139, 2210, 2306,
+                                          54505, 54506, 54507, 54516, 54535, 54542,
+                                          54543, 54547, 54562, 54588, 54618, 54622,
+                                          54632, 54633, 54645, 54650, 54653, 54658,
+                                          108990, 109015, 109016, 109017, 109021, 109023,
+                                          109029, 109031, 109032, 109033, 109036, 109040,
+                                          109042, 109045, 109047, 109071, 109075, 109081],
+                                         movieTestHoleNums)
+
+        movieBarcodeTest = self.bcFofn[MOVIE_NAME_BC + "/F3--R3"]
+        movieBarcodeTestHoleNums = [lzmw.holeNumber for lzmw in movieBarcodeTest]
+        numpy.testing.assert_array_equal([ 1416,  1551,  1561,   1765,   1902,   1925,   2133,
+                                          54506, 54588, 54618, 109033, 109036, 109071, 109081],
+                                         movieBarcodeTestHoleNums)
+
+        zmwTest = self.bcFofn[MOVIE_NAME_BC + "/922"]
+        nose.tools.assert_equal(zmwTest.holeNumber, 922)
+
+        subreadTest = self.bcFofn[MOVIE_NAME_BC + "/922/0_1000"]
+        nose.tools.assert_equal(subreadTest.holeNumber, 922)
\ No newline at end of file
diff --git a/tests/test_pbcore_io_BasH5Collection.py b/tests/test_pbcore_io_BasH5Collection.py
new file mode 100644
index 0000000..ef6554e
--- /dev/null
+++ b/tests/test_pbcore_io_BasH5Collection.py
@@ -0,0 +1,28 @@
+from nose.tools import assert_equal, assert_true, assert_false
+from numpy.testing import assert_array_equal
+from StringIO import StringIO
+
+from pbcore.io import BasH5Collection
+from pbcore import data
+
+def lookupSomeReadsByName(bc):
+    pass
+
+def test():
+    for fofn in data.getFofns():
+        bc = BasH5Collection(fofn)
+
+        for zmw in bc:
+            zmwAgain = bc[zmw.zmwName]
+            assert_equal(zmw.zmwName, zmwAgain.zmwName)
+
+
+
+def test_read_iterators():
+    for fofn in data.getFofns():
+        bc = BasH5Collection(fofn)
+
+        # TODO Add some meaningful tests here
+        list(bc.subreads())
+        list(bc.reads())
+        list(bc.ccsReads())
diff --git a/tests/test_pbcore_io_BasH5Reader.py b/tests/test_pbcore_io_BasH5Reader.py
new file mode 100644
index 0000000..a1d5e10
--- /dev/null
+++ b/tests/test_pbcore_io_BasH5Reader.py
@@ -0,0 +1,494 @@
+import inspect
+import os
+
+import h5py
+import nose.tools
+import numpy
+import numpy.testing
+
+import pbcore.data
+
+from pbcore.io.BasH5IO import BasH5Reader, Zmw, ZmwRead, CCSZmwRead
+from pbcore.chemistry import ChemistryLookupError
+
+class TestBasH5Reader_14:
+    """Tests of BasH5Reader against a 1.4 bas.h5 file, no multipart with
+    CCS.
+    """
+
+    def __init__(self):
+        self.cmpH5 = pbcore.io.CmpH5Reader(pbcore.data.getCmpH5())
+        basFiles = pbcore.data.getBasH5s()
+        self.bas1, self.bas2 = map(pbcore.io.BasH5Reader, basFiles)
+
+    def test_BasH5Reader_basicTest(self):
+        """Test that BasH5Reader correctly sets moviename, identifies the
+        sequencingZmws, and finds the subreads for each Zmw.
+        """
+
+        nose.tools.assert_equal(pbcore.data.MOVIE_NAME_14, self.bas1.movieName)
+        numpy.testing.assert_array_equal([   7,    8,    9, 1000, 1006, 1007,
+                                          2001, 2003, 2007, 2008, 3004, 3006,
+                                          3008, 4004, 4005, 4006, 4007, 4009],
+                                          self.bas1.sequencingZmws)
+        numpy.testing.assert_array_equal([   7,    8,    9, 1000, 1001, 1002,
+                                          1003, 1004, 1005, 1006, 1007, 1008,
+                                          1009, 2000, 2001, 2002, 2003, 2004,
+                                          2005, 2006, 2007, 2008, 2009, 3000,
+                                          3001, 3002, 3003, 3004, 3005, 3006,
+                                          3007, 3008, 3009, 4000, 4001, 4002,
+                                          4003, 4004, 4005, 4006, 4007, 4008,
+                                          4009],
+                                          self.bas1.allSequencingZmws)
+
+        for zmw in self.bas1:
+            nose.tools.assert_greater(len(zmw.subreads), 0)
+
+    def test_BasH5Reader_basecallsVsCmpH5(self):
+        """Compare datasets in the bas.h5 file against those in a corresponding
+        cmp.h5 file.
+        """
+
+        aln = self.cmpH5[2]
+        nose.tools.assert_equal(os.path.join(pbcore.data.MOVIE_NAME_14, "2001", "3580_3922"),
+                                aln.readName)
+
+        zmwRead = self.bas1[2001].read(3580, 3922)
+        nose.tools.assert_equal(os.path.join(pbcore.data.MOVIE_NAME_14, "2001", "3580_3922"),
+                                zmwRead.readName)
+
+        # Verify that the bases and a couple of quality values are the same
+        nose.tools.assert_equal(aln.read(aligned=False), zmwRead.basecalls())
+        numpy.testing.assert_array_equal(aln.InsertionQV(aligned=False),
+                                         zmwRead.InsertionQV())
+        numpy.testing.assert_array_equal(aln.DeletionQV(aligned=False),
+                                         zmwRead.DeletionQV())
+        numpy.testing.assert_array_equal(aln.QualityValue(aligned=False),
+                                         zmwRead.QualityValue())
+
+    def test_BasH5Reader_regionTableAccessors(self):
+        """Test that BasH5Reader can read the region table and find
+        HQ, insert, and adapter regions.
+        """
+
+        zmw = self.bas1[7]
+        numpy.testing.assert_array_equal(
+            numpy.array([[   7,    1,    0,  299,   -1],
+                         [   7,    1,  343,  991,   -1],
+                         [   7,    1, 1032, 1840,   -1],
+                         [   7,    0,  299,  343,  681],
+                         [   7,    0,  991, 1032,  804],
+                         [   7,    2,    0, 1578,    0]], dtype=numpy.int32),
+            zmw.regionTable.view(dtype=(numpy.int32, 5)))
+
+        nose.tools.assert_equal((0, 1578), zmw.hqRegion)
+        nose.tools.assert_equal([(299, 343), (991, 1032)], zmw.adapterRegions)
+        nose.tools.assert_equal([(0, 299), (343, 991), (1032, 1578)],
+                                zmw.insertRegions)
+
+    def test_BasH5Reader_ccs(self):
+        """Test that BasH5Reader can read the CCS bases."""
+
+        nose.tools.assert_equal(self.bas1[4006].ccsRead.basecalls(),
+           ''.join(['GGCGCACGGAGGAGCAAGCGTGACAGTCCCACGTCATGCCCGCCGACG',
+                    'ATATCGAGCTCGCGCTCACCGCCAGGGTGTGAAGTGAATTCACGGTGC',
+                    'CGCCGAAAGCTGGGCCGGCTTTCGTTCCTTCGCCGGTCAGGAGAAGGC',
+                    'GGACCCCGTCGTGGGCCATTCCGAGCCTGGAGACAGCGGTCGAAAAAG',
+                    'CCTTCGCCAAGCCGGTGGCCAAATGGTCGGCCAGCGAGAATCCGTGC']))
+
+    def test_BasH5Reader_productivity(self):
+        nose.tools.assert_equal(1, self.bas1[4006].productivity)
+
+    def test_BasH5Reader_readScore(self):
+        nose.tools.assert_almost_equal(0.7822426, self.bas1[4006].readScore)
+
+    @nose.tools.raises(ChemistryLookupError)
+    def test_14_missing_chemistry(self):
+        """Tests that we raise an exception when we can't find chemistry information"""
+        self.bas1.sequencingChemistry
+
+    def test_ZmwRead_len(self):
+        """Test that ZmwRead objects have the correct len."""
+        nose.tools.assert_equal(1126, len(self.bas1[4006].read().basecalls()))
+        nose.tools.assert_equal(1126, len(self.bas1[4006].read()))
+        nose.tools.assert_equal(464,
+                                len(self.bas1[4006].subreads[0].basecalls()))
+        nose.tools.assert_equal(464, len(self.bas1[4006].subreads[0]))
+        nose.tools.assert_equal(239, len(self.bas1[4006].ccsRead.basecalls()))
+        nose.tools.assert_equal(239, len(self.bas1[4006].ccsRead))
+
+class CommonTests(object):
+
+    ZMW_ATTRIBUTES = ['QualityValue', 'InsertionQV', 'DeletionQV',
+                      'DeletionTag', 'SubstitutionQV', 'SubstitutionTag',
+                      'MergeQV', 'IPD', 'PreBaseFrames', 'PulseWidth',
+                      'WidthInFrames']
+
+    def test_all_fields_accessible(self):
+        # Test that zmws have correct pulse/quality attributes
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+
+        for zmw in reader.sequencingZmws:
+            read = reader[zmw].read()
+            for attribute in self.ZMW_ATTRIBUTES:
+                nose.tools.assert_is_instance(getattr(read, attribute)(),
+                                              numpy.ndarray)
+            numpy.testing.assert_array_equal(read.IPD(), read.PreBaseFrames())
+            numpy.testing.assert_array_equal(read.PulseWidth(),
+                                             read.WidthInFrames())
+
+    def test_zmw_region_table(self):
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+
+        sequencing_zmws = set(reader.sequencingZmws)
+
+        for zmw in reader.allSequencingZmws:
+            region_table = reader[zmw].regionTable.tolist()
+            hq_entry = [k for k in region_table if k[1] == 2][0]
+
+            hq_size = hq_entry[3] - hq_entry[2]
+            # Sequencing Zmws should have an HQ region
+            if zmw not in sequencing_zmws:
+                nose.tools.assert_equal(hq_size, 0)
+            else:
+                nose.tools.assert_greater(hq_size, 0)
+
+            for entry in region_table:
+                nose.tools.assert_equal(entry[0], zmw)
+                nose.tools.assert_less_equal(entry[2], entry[3])
+        reader.close()
+
+class ReadIteratorTests(object):
+
+    def test_read_iterators(self):
+        for fname in [self.bash5_filename] + self.baxh5_filenames:
+            reader = pbcore.io.BasH5Reader(self.bash5_filename)
+
+            if reader.hasConsensusBasecalls:
+                ccsReads = [ zmw.ccsRead
+                             for zmw in reader
+                             if zmw.ccsRead is not None ]
+                nose.tools.assert_equal(ccsReads, list(reader.ccsReads()))
+            else:
+                nose.tools.assert_equal([], list(reader.ccsReads()))
+
+            if reader.hasRawBasecalls:
+                subreads = [ subread
+                             for zmw in reader
+                             for subread in zmw.subreads ]
+                nose.tools.assert_equal(subreads, list(reader.subreads()))
+
+                reads = [ zmw.read()
+                          for zmw in reader ]
+                nose.tools.assert_equal(reads, list(reader.reads()))
+            else:
+                nose.tools.assert_equal([], list(reader.reads()))
+                nose.tools.assert_equal([], list(reader.subreads()))
+
+class CommonMultiPartTests(object):
+
+    def test_multipart_constructor_bash5(self):
+        # Test the constuctor of a multipart bas.h5 file
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+        nose.tools.assert_is_instance(reader.file, h5py.File)
+
+        # Should have three parts for v2.0 and v2.1
+        nose.tools.assert_equal(len(reader.parts), 3)
+        nose.tools.assert_list_equal(self.baxh5_filenames,
+                [k.filename for k in reader.parts])
+
+        # All bas.h5 files should have raw base calls. 2.1 bas.h5 files don't
+        # have consensus base calls
+        nose.tools.assert_true(reader.hasRawBasecalls)
+
+
+        for zmw in reader.sequencingZmws:
+            nose.tools.assert_in(zmw, reader.allSequencingZmws)
+            nose.tools.assert_is_instance(reader[zmw], Zmw)
+
+        nose.tools.assert_less_equal(len(reader.sequencingZmws),
+                                        len(reader.allSequencingZmws))
+
+        reader.close()
+
+    def test_multippart_constructor_baxh5(self):
+        # Test constructor of baxh5 files
+        for filename in self.baxh5_filenames:
+            reader = pbcore.io.BasH5Reader(filename)
+            nose.tools.assert_is_instance(reader.file, h5py.File)
+
+            nose.tools.assert_equal(len(reader.parts), 1)
+            nose.tools.assert_true(reader.hasRawBasecalls)
+
+            for zmw in reader.sequencingZmws:
+                nose.tools.assert_in(zmw, reader.allSequencingZmws)
+                nose.tools.assert_is_instance(reader[zmw], Zmw)
+
+            nose.tools.assert_less_equal(len(reader.sequencingZmws),
+                                            len(reader.allSequencingZmws))
+
+            reader.close()
+
+    def test_multipart_hole_lookup(self):
+        # Test that multipart files look up files and hole numbers correctly
+        hole_number_to_filename = {}
+        for filename in self.baxh5_filenames:
+            f = h5py.File(filename, 'r')
+            for hole_number in f['PulseData/BaseCalls/ZMW/HoleNumber']:
+                hole_number_to_filename[hole_number] = filename
+            f.close()
+
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+
+        for hole_number in hole_number_to_filename:
+            zmw = reader[hole_number]
+            nose.tools.assert_equal(zmw.baxH5.filename,
+                                    hole_number_to_filename[hole_number])
+            nose.tools.assert_is_instance(zmw, Zmw)
+
+        reader.close()
+
+    def _clip_region(self, region, hq_region):
+        end = min(region[1], hq_region[1])
+        start = max(region[0], hq_region[0])
+        if start >= end:
+            return None
+        else:
+            return (start, end)
+
+    def test_zmw_multipart_regions(self):
+
+        regions = []
+
+        # First read in the regions from the h5 files directly
+        for filename in self.baxh5_filenames:
+            with h5py.File(filename, 'r') as f:
+                region_table = f['PulseData/Regions']
+                regions.extend(region_table.value.tolist())
+
+        # Now see what BasH5Reader reports for regions
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+        for zmw in reader.allSequencingZmws:
+            region_table = reader[zmw].regionTable.tolist()
+
+            true_regions = [k for k in regions if k[0] == zmw]
+            true_hq_region = [k for k in true_regions if k[1] == 2][0]
+
+            reported_hq_region = reader[zmw].hqRegion
+            nose.tools.assert_equal(reported_hq_region[0], true_hq_region[2])
+            nose.tools.assert_equal(reported_hq_region[1], true_hq_region[3])
+
+            # Check the reported adapter regions
+            reported_adapter_regions = reader[zmw].adapterRegions
+            true_adapter_regions = [k for k in true_regions if k[1] == 0]
+            region_count = 0
+            for region in true_adapter_regions:
+                bound = (region[2], region[3])
+                clipped_region = self._clip_region(bound, reported_hq_region)
+                if clipped_region:
+                    nose.tools.assert_in(clipped_region,
+                                         reported_adapter_regions)
+                    region_count += 1
+            nose.tools.assert_equal(region_count, len(reported_adapter_regions))
+
+            # And the reported insert regions
+            reported_insert_regions = reader[zmw].insertRegions
+            true_insert_regions = [k for k in true_regions if k[1] == 1]
+            region_count = 0
+            for region in true_insert_regions:
+                bound = (region[2], region[3])
+                clipped_region = self._clip_region(bound, reported_hq_region)
+                if clipped_region:
+                    nose.tools.assert_in(clipped_region,
+                                         reported_insert_regions)
+                    region_count += 1
+            nose.tools.assert_equal(region_count, len(reported_insert_regions))
+
+
+class TestBasH5Reader_20(CommonTests, CommonMultiPartTests, ReadIteratorTests):
+    """Tests of BasH5Reader against a 2.0 ba[sx].h5 files, consisting of a
+    bas.h5 file and three bas.h5 files. The bax.h5 files also contain CCS.
+    """
+
+    def __init__(self):
+        """Get the full paths to the bas and bax.h5 files."""
+
+        self.bash5_filename = pbcore.data.getBasH5_v20()
+        self.baxh5_filenames = pbcore.data.getBaxH5_v20()
+
+
+    def test_20_constructor_bash5(self):
+        # Tests specific to the v2.0 bas.h5 constructor
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+        nose.tools.assert_true(reader.hasConsensusBasecalls)
+        nose.tools.assert_equal(reader.movieName, pbcore.data.MOVIE_NAME_20)
+
+        reader.close()
+
+    def test_20_constructor_baxh5(self):
+        # Tests specific to the v2.0 bax.h5 constructor
+        for filename in self.baxh5_filenames:
+            reader = pbcore.io.BasH5Reader(filename)
+            nose.tools.assert_true(reader.hasConsensusBasecalls)
+            nose.tools.assert_equal(reader.movieName, pbcore.data.MOVIE_NAME_20)
+            reader.close()
+
+    @nose.tools.raises(ChemistryLookupError)
+    def test_20_missing_chemistry(self):
+        """Tests that we raise an exception when we can't find chemistry information"""
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+        reader.sequencingChemistry
+
+    def test_productivity(self):
+        """Test that productivities are set correctly for the ZMW objects."""
+        productivities = {}
+        for filename in self.baxh5_filenames:
+            f = h5py.File(filename, 'r')
+            hn_to_prod = dict(zip(f["PulseData/BaseCalls/ZMW/HoleNumber"],
+                                  f["PulseData/BaseCalls/ZMWMetrics/Productivity"]))
+            productivities.update(hn_to_prod)
+            f.close()
+
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+
+        for hn in productivities:
+            nose.tools.assert_equal(reader[hn].productivity,
+                                    productivities[hn])
+
+
+class TestBasH5Reader_21(CommonTests, CommonMultiPartTests, ReadIteratorTests):
+    """Tests of BasH5Reader against a 2.1 ba[sx].h5 files, consisting of a
+    bas.h5 file and three bas.h5 files. The bax.h5 files do not contain CCS.
+    """
+
+    def __init__(self):
+        """Get the full paths to the bas and bax.h5 files."""
+        self.bash5_filename = pbcore.data.getBasH5_v21()
+        self.baxh5_filenames = pbcore.data.getBaxH5_v21()
+
+    def test_21_constructor_bash5(self):
+        # Tests specific to the v2.0 bas.h5 constructor
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+        nose.tools.assert_false(reader.hasConsensusBasecalls)
+        nose.tools.assert_equal(reader.movieName, pbcore.data.MOVIE_NAME_21)
+
+        reader.close()
+
+    def test_21_constructor_baxh5(self):
+        # Tests specific to the v2.0 bax.h5 constructor
+        for filename in self.baxh5_filenames:
+            reader = pbcore.io.BasH5Reader(filename)
+            nose.tools.assert_false(reader.hasConsensusBasecalls)
+            nose.tools.assert_equal(reader.movieName, pbcore.data.MOVIE_NAME_21)
+            reader.close()
+
+    def test_21_external_region_baxh5(self):
+        """Test the optional region file override"""
+        for baxfile in self.baxh5_filenames:
+            # Count of the subreads using internal region table
+            reader = pbcore.io.BaxH5Reader(baxfile)
+            bax_subread_count = len([x for x in reader.subreads()])
+
+            # Count of subreads using external region table
+            rgnfile = baxfile.replace('bax.h5','rgn.h5')
+            reader.loadExternalRegions(rgnfile)
+            rgn_subread_count = len([x for x in reader.subreads()])
+
+            nose.tools.assert_true(rgn_subread_count < bax_subread_count)
+
+    @nose.tools.raises(ChemistryLookupError)
+    def test_21_missing_chemistry(self):
+        """Tests that we raise an exception when we can't find chemistry information"""
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+        reader.sequencingChemistry
+
+    @nose.tools.raises(IOError)
+    def test_21_bad_external_region_baxh5(self):
+        """Tests that we raise an exception when incorrect region file given"""
+        baxfiles = self.baxh5_filenames
+        baxfile = baxfiles[0]
+        rgnfile = baxfiles[1].replace('bax.h5','rgn.h5')
+        pbcore.io.BaxH5Reader(baxfile, regionH5Filename=rgnfile)
+
+class TestBasH5Reader_23(CommonTests, CommonMultiPartTests, ReadIteratorTests):
+    """Tests of BasH5Reader against a 2.3 ba[sx].h5 files, consisting of a
+    bas.h5 file and three bas.h5 files. The bax.h5 files do not contain CCS,
+    but do contain Chemistry information
+    """
+
+    def __init__(self):
+        """Get the full paths to the bas and bax.h5 files."""
+        self.bash5_filename = pbcore.data.getBasH5_v23()
+        self.baxh5_filenames = pbcore.data.getBaxH5_v23()
+
+    def test_23_constructor_bash5(self):
+        # Tests specific to the v2.0 bas.h5 constructor
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+        nose.tools.assert_false(reader.hasConsensusBasecalls)
+        nose.tools.assert_equal(reader.movieName, pbcore.data.MOVIE_NAME_23)
+        nose.tools.assert_equal(reader.sequencingChemistry, 'P6-C4')
+
+        reader.close()
+
+    def test_23_constructor_baxh5(self):
+        # Tests specific to the v2.0 bax.h5 constructor
+        for filename in self.baxh5_filenames:
+            reader = pbcore.io.BasH5Reader(filename)
+            nose.tools.assert_false(reader.hasConsensusBasecalls)
+            nose.tools.assert_equal(reader.movieName, pbcore.data.MOVIE_NAME_23)
+            nose.tools.assert_equal(reader.sequencingChemistry, 'P6-C4')
+            reader.close()
+
+class TestBasH5Reader_CCS(ReadIteratorTests):
+    """Test BasH5Reader with a ccs.h5 file produced by P_CCS."""
+
+    def __init__(self):
+        """Get the full paths to the bas and bax.h5 files."""
+        self.bash5_filename = pbcore.data.getCCSH5()
+        self.baxh5_filenames = []
+
+    def test_constructor_ccsh5(self):
+        # Test that BasH5Reader initializes correctly with a ccs.h5 file
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+        nose.tools.assert_is_instance(reader.file, h5py.File)
+
+        nose.tools.assert_true(reader.hasConsensusBasecalls)
+        nose.tools.assert_false(reader.hasRawBasecalls)
+        nose.tools.assert_equal(reader.movieName, pbcore.data.MOVIE_NAME_CCS)
+
+        nose.tools.assert_equal(len(reader.parts), 1)
+
+        for zmw in reader.sequencingZmws:
+            nose.tools.assert_in(zmw, reader.allSequencingZmws)
+            nose.tools.assert_is_instance(reader[zmw], Zmw)
+
+        nose.tools.assert_less_equal(len(reader.sequencingZmws),
+                                        len(reader.allSequencingZmws))
+
+        reader.close()
+
+    def test_ccs_zmw(self):
+        # Test Zmw objects derived from a BasH5Reader reading a ccs.h5
+        reader = pbcore.io.BasH5Reader(self.bash5_filename)
+
+        sequencing_zmws = set(reader.sequencingZmws)
+        for zmw in reader.allSequencingZmws:
+            region_table = reader[zmw].regionTable
+            nose.tools.assert_equal(len(region_table), 1)
+            nose.tools.assert_equal(region_table[0][0], zmw)
+            nose.tools.assert_equal(region_table[0][1], 2)
+
+            nose.tools.assert_equal(len(reader[zmw].insertRegions), 0)
+            nose.tools.assert_equal(len(reader[zmw].adapterRegions), 0)
+
+            with nose.tools.assert_raises(ValueError):
+                reader[zmw].subreads
+
+            with nose.tools.assert_raises(ValueError):
+                reader[zmw].read()
+
+            if zmw in sequencing_zmws:
+                nose.tools.assert_is_instance(reader[zmw].ccsRead,
+                                              CCSZmwRead)
+            else:
+                nose.tools.assert_is_none(reader[zmw].ccsRead)
diff --git a/tests/test_pbcore_io_BlasrIO.py b/tests/test_pbcore_io_BlasrIO.py
new file mode 100644
index 0000000..abbb0bf
--- /dev/null
+++ b/tests/test_pbcore_io_BlasrIO.py
@@ -0,0 +1,10 @@
+
+from pbcore.io import M4Reader, M5Reader
+import pbcore.data as D
+
+
+def test_m4():
+    l = list(M4Reader(D.getBlasrM4()))
+
+def test_m5():
+    l = list(M5Reader(D.getBlasrM5()))
diff --git a/tests/test_pbcore_io_FastaIO.py b/tests/test_pbcore_io_FastaIO.py
new file mode 100644
index 0000000..ae14241
--- /dev/null
+++ b/tests/test_pbcore_io_FastaIO.py
@@ -0,0 +1,133 @@
+from nose.tools import assert_equal, assert_true, assert_false
+from pbcore import data
+from pbcore.io import FastaReader, FastaWriter, FastaRecord
+from StringIO import StringIO
+
+class TestFastaRecord:
+
+    def setup(self):
+        self.header = "chr1|blah|blah\tblah blah"
+        self.rc_header = "chr1|blah|blah\tblah blah [revcomp]"
+        self.id = "chr1|blah|blah"
+        self.comment = "blah blah"
+        self.sequence = "GATTACA" * 20
+        self.rc_sequence = "TGTAATC" * 20
+        self.length = 140
+        self.expected__str__ = (
+            ">chr1|blah|blah\tblah blah\n"
+            "GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATT\n"
+            "ACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAG\n"
+            "ATTACAGATTACAGATTACA")
+        self.rc1_expected__str__ = (
+            ">chr1|blah|blah\tblah blah [revcomp]\n"
+            "TGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTA\n"
+            "ATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCT\n"
+            "GTAATCTGTAATCTGTAATC")
+        self.rc2_expected__str__ = (
+            ">chr1|blah|blah\tblah blah\n"
+            "TGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTA\n"
+            "ATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCT\n"
+            "GTAATCTGTAATCTGTAATC")
+        self.record = FastaRecord(self.header, self.sequence)
+        self.rc1_record = self.record.reverseComplement()
+        self.rc2_record = self.record.reverseComplement(True)
+
+    def test__init__(self):
+        assert_equal(self.header, self.record.header)
+        assert_equal(self.sequence, self.record.sequence)
+        assert_equal(self.id, self.record.id)
+        assert_equal(self.comment, self.record.comment)
+
+    def test__str__(self):
+        assert_equal(self.expected__str__, str(self.record))
+
+    def test_fromString(self):
+        recordFromString = FastaRecord.fromString(self.expected__str__)
+        assert_equal(self.header, recordFromString.header)
+        assert_equal(self.sequence, recordFromString.sequence)
+
+    def test_md5(self):
+        assert_equal("67fc75ce599ed0ca1fc8ed2dcbccc95d",
+                     self.record.md5)
+
+    def test_reverse_complement1(self):
+        assert_equal(self.rc1_record.header, self.rc_header)
+        assert_equal(self.rc1_record.sequence, self.rc_sequence)
+        assert_equal(self.rc1_expected__str__, str(self.rc1_record))
+
+    def test_reverse_complement2(self):
+        assert_equal(self.rc2_record.header, self.header)
+        assert_equal(self.rc2_record.sequence, self.rc_sequence)
+        assert_equal(self.rc2_expected__str__, str(self.rc2_record))
+
+    def test_len(self):
+        assert_equal(self.length, len(self.record))
+        assert_equal(self.length, len(self.rc1_record))
+        assert_equal(self.length, len(self.rc2_record))
+
+    def test_eq(self):
+        header = 'r1'
+        seq = 'ACGT'
+        r1 = FastaRecord(header, seq)
+        r2 = FastaRecord(header, seq)
+        assert_true(r1 == r2)
+
+    def test_not_equal(self):
+        r1 = FastaRecord('r1', 'ACGT')
+        r2 = FastaRecord('r2', 'ACGT')
+        r3 = FastaRecord('r1', 'ACGT')
+        assert_true(r1 != r2)
+        assert_false(r1 != r3)
+
+
+class TestFastaReader:
+
+    def test_readFasta(self):
+        f = FastaReader(data.getFasta())
+        entries = list(f)
+        assert_equal(48, len(entries))
+        assert_equal("ref000001|EGFR_Exon_2", entries[0].header)
+        assert_equal("TTTCTTCCAGTTTGCCAAGGCACGAGTAACAAGCTCACGCAGTTGGGCACTTT"
+                     "TGAAGATCATTTTCTCAGCCTCCAGAGGATGTTCAATAACTGTGAGGTGGTCC"
+                     "TTGGGAATTTGGAAATTACCTATGTGCAGAGGAATTATGATCTTTCCTTCTTA"
+                     "AAGGTTGGTGACTTTGATTTTCCT",
+                     entries[0].sequence)
+        assert_equal("e3912e9ceacd6538ede8c1b2adda7423",
+                     entries[0].md5)
+
+    def test_dosLineEndingsFasta(self):
+        f = FastaReader(data.getDosFormattedFasta())
+        entries = list(f)
+        for e in entries:
+            assert_true("\r" not in e.header)
+            assert_equal(16, len(e.sequence))
+
+
+
+class TestFastaWriter:
+
+    def setup(self):
+        self.fasta1 = StringIO(
+            ">chr1|blah|blah\n"                                              \
+            "GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATT\n" \
+            "ACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAG\n" \
+            "ATTACAGATTACAGATTACA\n")
+        self.fasta2 = StringIO(self.fasta1.getvalue() + "\n" +               \
+            ">chr2|blah|blah\n"                                              \
+            "GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATT\n" \
+            "ACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAG\n" \
+            "ATTACAGATTACAGATTACA\n")
+
+    def test_writeFasta1(self):
+        f = StringIO()
+        w = FastaWriter(f)
+        for record in FastaReader(self.fasta1):
+            w.writeRecord(record)
+        assert_equal(self.fasta1.getvalue(), f.getvalue())
+
+    def test_writeFasta2(self):
+        f = StringIO()
+        w = FastaWriter(f)
+        for record in FastaReader(self.fasta1):
+            w.writeRecord(record.header, record.sequence)
+        assert_equal(self.fasta1.getvalue(), f.getvalue())
diff --git a/tests/test_pbcore_io_FastaTable.py b/tests/test_pbcore_io_FastaTable.py
new file mode 100644
index 0000000..7321f11
--- /dev/null
+++ b/tests/test_pbcore_io_FastaTable.py
@@ -0,0 +1,80 @@
+from nose.tools import assert_equal, assert_true, assert_false
+from pbcore import data
+from pbcore.io import FastaReader, FastaWriter, IndexedFastaReader
+
+
+class TestIndexedFastaReader:
+
+    def setup(self):
+        self.fastaPath = data.getFasta()
+
+    def testIteration(self):
+        ft = IndexedFastaReader(self.fastaPath)
+        fr = FastaReader(self.fastaPath)
+        ftContigs = list(ft)
+        frContigs = list(fr)
+        assert_equal(len(frContigs), len(ftContigs))
+        assert_equal(48, len(ftContigs))
+        for ftC, frC in zip(ftContigs, frContigs):
+            assert_equal(frC.header, ftC.header)
+            assert_equal(frC.sequence, ftC.sequence[:])
+
+        # Unlike FastaReader, IndexedFastaReader iteration is repeatable.
+        assert_equal(48, len(list(ft)))
+
+    def testAccessByName(self):
+        ft = IndexedFastaReader(self.fastaPath)
+        r000021 = ft["ref000021|EGFR_Exon_22\tMetadataTest"]
+        assert_equal("ref000021|EGFR_Exon_22\tMetadataTest", r000021.header)
+        assert_equal("ref000021|EGFR_Exon_22", r000021.id)
+        assert_equal("MetadataTest", r000021.comment)
+        assert_equal("CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT"
+                     "TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA"
+                     "TGCTAATGAGTTTGTACTGAGGCCAAGCTGG",
+                     r000021.sequence[:])
+
+    def testAccessById(self):
+        ft = IndexedFastaReader(self.fastaPath)
+        r000021 = ft["ref000021|EGFR_Exon_22"]
+        assert_equal("ref000021|EGFR_Exon_22\tMetadataTest", r000021.header)
+        assert_equal("ref000021|EGFR_Exon_22", r000021.id)
+        assert_equal("MetadataTest", r000021.comment)
+        assert_equal("CACTGCCTCATCTCTCACCATCCCAAGGTGCCTATCAAGTGGATGGCATTGGAATCAATT"
+                     "TTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGTGAGTCATAATCCTGA"
+                     "TGCTAATGAGTTTGTACTGAGGCCAAGCTGG",
+                     r000021.sequence[:])
+
+    def testAccessByPosition(self):
+        ft = IndexedFastaReader(self.fastaPath)
+        r000001 = ft[0]
+        assert_equal("<IndexedFastaRecord: ref000001|EGFR_Exon_2>", repr(r000001))
+        firstTwo = ft[:2]
+        assert_equal([ft[0], ft[1]], firstTwo)
+        lastTwo = ft[-2:]
+        assert_equal([ft[-2], ft[-1]], lastTwo)
+
+    def testSlice(self):
+        ft = IndexedFastaReader(self.fastaPath)
+        r000021 = ft["ref000021|EGFR_Exon_22"]
+        sequence = r000021.sequence
+        assert_equal("CACTGCCTCA",
+                     sequence[0:10])
+        assert_equal("GCCAAGCTGG",
+                     sequence[-10:])
+        assert_equal("G", sequence[-1])
+        assert_equal("T", sequence[-3])
+        assert_equal("C", sequence[0])
+        assert_equal("A", sequence[1])
+
+
+    def test_dosLineEndingsFasta(self):
+        fr = FastaReader(data.getDosFormattedFasta())
+        frEntries = list(fr)
+
+        ft = IndexedFastaReader(data.getDosFormattedFasta())
+        ftEntries = list(ft)
+
+        assert_equal(len(frEntries), len(ftEntries))
+        for (frE, ftE) in zip(frEntries, ftEntries):
+            assert_equal(frE.header, ftE.header)
+            assert_equal(frE.sequence, ftE.sequence[:])
diff --git a/tests/test_pbcore_io_FastqIO.py b/tests/test_pbcore_io_FastqIO.py
new file mode 100644
index 0000000..15e68bc
--- /dev/null
+++ b/tests/test_pbcore_io_FastqIO.py
@@ -0,0 +1,183 @@
+from nose.tools import assert_equal, assert_true, assert_false
+from numpy.testing import assert_array_equal
+from pbcore import data
+from StringIO import StringIO
+
+from pbcore.io.FastqIO import *
+
+
+# Test QV <-> string conversion routines
+class TestQvConversion:
+    def setup(self):
+        self.ascii = \
+            "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`" + \
+            "abcdefghijklmnopqrstuvwxyz{|}~"
+        self.qvs = range(0, 94)
+
+    def testAsciiFromQvs(self):
+        assert_equal(self.ascii, asciiFromQvs(self.qvs))
+
+    def testQvsFromAscii(self):
+        assert_array_equal(self.qvs, qvsFromAscii(self.ascii))
+
+
+class TestFastqRecord:
+
+    def setup(self):
+        self.header = "chr1|blah|blah\tblah blah"
+        self.rc_header = "chr1|blah|blah\tblah blah [revcomp]"
+        self.id = "chr1|blah|blah"
+        self.comment = "blah blah"
+        self.sequence = "GATTACA" * 20
+        self.rc_sequence = "TGTAATC" * 20
+        self.length = 140
+        self.quality  = [10,11,12,13,14,15,16] * 20
+        self.rc_quality = [16,15,14,13,12,11,10] * 20
+        self.qualityString = "+,-./01" * 20
+        self.rc_qualityString = "10/.-,+" * 20
+        self.expected__str__ = (
+            "@chr1|blah|blah\tblah blah\n"
+            "GATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATT"
+            "ACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAGATTACAG"
+            "ATTACAGATTACAGATTACA\n"
+            "+\n"
+            "+,-./01+,-./01+,-./01+,-./01+,-./01+,-./01+,-./01+,-./01+,-."
+            "/01+,-./01+,-./01+,-./01+,-./01+,-./01+,-./01+,-./01+,-./01+"
+            ",-./01+,-./01+,-./01")
+        self.rc1_expected__str__ = (
+            "@chr1|blah|blah\tblah blah [revcomp]\n"
+            "TGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTA"
+            "ATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCT"
+            "GTAATCTGTAATCTGTAATC\n"
+            "+\n"
+            "10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/."
+            "-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+1"
+            "0/.-,+10/.-,+10/.-,+")
+        self.rc2_expected__str__ = (
+            "@chr1|blah|blah\tblah blah\n"
+            "TGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTA"
+            "ATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCTGTAATCT"
+            "GTAATCTGTAATCTGTAATC\n"
+            "+\n"
+            "10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/."
+            "-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+10/.-,+1"
+            "0/.-,+10/.-,+10/.-,+")
+        self.record = FastqRecord(self.header, self.sequence, self.quality)
+        self.record2 = FastqRecord(self.header, self.sequence,
+                                   qualityString=self.qualityString)
+        self.rc1_record = self.record.reverseComplement()
+        self.rc2_record = self.record.reverseComplement(True)
+
+    def test__init__(self):
+        assert_equal(self.header, self.record.header)
+        assert_equal(self.sequence, self.record.sequence)
+        assert_equal(self.id, self.record.id)
+        assert_equal(self.comment, self.record.comment)
+        assert_array_equal(self.quality, self.record.quality)
+        assert_equal(self.record, self.record2)
+
+    def test__str__(self):
+        assert_equal(self.expected__str__, str(self.record))
+
+    def test_fromString(self):
+        recordFromString = FastqRecord.fromString(self.expected__str__)
+        assert_equal(self.header, recordFromString.header)
+        assert_equal(self.sequence, recordFromString.sequence)
+        assert_array_equal(self.quality, recordFromString.quality)
+
+    def test_reverse_complement1(self):
+        assert_equal(self.rc1_record.header, self.rc_header)
+        assert_equal(self.rc1_record.sequence, self.rc_sequence)
+        assert_equal(self.rc1_record.quality, self.rc_quality)
+        assert_equal(self.rc1_record.qualityString, self.rc_qualityString)
+        assert_equal(str(self.rc1_record), self.rc1_expected__str__)
+
+    def test_reverse_complement2(self):
+        assert_equal(self.rc2_record.header, self.record.header)
+        assert_equal(self.rc2_record.sequence, self.rc_sequence)
+        assert_equal(self.rc2_record.quality, self.rc_quality)
+        assert_equal(self.rc2_record.qualityString, self.rc_qualityString)
+        assert_equal(str(self.rc2_record), self.rc2_expected__str__)
+
+    def test_len(self):
+        assert_equal(self.length, len(self.record))
+        assert_equal(self.length, len(self.rc1_record))
+        assert_equal(self.length, len(self.rc2_record))
+
+    def test_eq(self):
+        header = 'r1'
+        seq = 'ACGT'
+        qvs = list(xrange(10, 10 + len(seq)))
+        r1 = FastqRecord(header, seq, qvs)
+        r2 = FastqRecord(header, seq, qvs)
+        assert_true(r1 == r2)
+        assert_false(r1 != r2)
+
+    def test_not_equal(self):
+        header = 'r1'
+        seq = 'ACGT'
+        qvs = list(xrange(10, 10 + len(seq)))
+        r1 = FastqRecord(header, seq, qvs)
+        r2 = FastqRecord('r2', seq, qvs)
+        assert_true(r1 != r2)
+
+
+class TestFastqReader:
+
+    def setup(self):
+        self.fastq1 = StringIO("@seq1\n"   +
+                               "GATTACA\n" +
+                               "+\n"       +
+                               "789:;<=\n")
+        self.fastq2 = StringIO(self.fastq1.getvalue() +
+                               "@seq2\n"   +
+                               "CATTAGA\n" +
+                               "+\n"       +
+                               "@@@@@@@\n")
+
+    def test_readFastq1(self):
+        r1 = FastqReader(self.fastq1)
+        l = list(r1)
+        assert_equal([FastqRecord("seq1", "GATTACA", range(22, 29))], l)
+
+    def test_readFastq2(self):
+        r2 = FastqReader(self.fastq2)
+        l = list(r2)
+        assert_equal([FastqRecord("seq1", "GATTACA", range(22, 29)),
+                      FastqRecord("seq2", "CATTAGA", [31]*7) ],
+                     l)
+
+
+class TestFastqWriter:
+
+    def setup(self):
+        self.fastq1 = StringIO("@seq1\n"   +
+                               "GATTACA\n" +
+                               "+\n"       +
+                               "789:;<=\n")
+        self.fastq2 = StringIO(self.fastq1.getvalue() +
+                               "@seq2\n"   +
+                               "CATTAGA\n" +
+                               "+\n"       +
+                               "@@@@@@@\n")
+
+    def test_writeFastq1(self):
+        f = StringIO()
+        w = FastqWriter(f)
+        for record in FastqReader(self.fastq1):
+            w.writeRecord(record)
+        assert_equal(self.fastq1.getvalue(), f.getvalue())
+
+    def test_writeFastq2(self):
+        f = StringIO()
+        w = FastqWriter(f)
+        for record in FastqReader(self.fastq2):
+            w.writeRecord(record)
+        assert_equal(self.fastq2.getvalue(), f.getvalue())
+
+    def test_writeFastq3(self):
+        f = StringIO()
+        w = FastqWriter(f)
+        for record in FastqReader(self.fastq2):
+            w.writeRecord(record.header, record.sequence, record.quality)
+        assert_equal(self.fastq2.getvalue(), f.getvalue())
diff --git a/tests/test_pbcore_io_FofnIO.py b/tests/test_pbcore_io_FofnIO.py
new file mode 100644
index 0000000..1df14cd
--- /dev/null
+++ b/tests/test_pbcore_io_FofnIO.py
@@ -0,0 +1,22 @@
+from nose.tools import assert_equal, assert_true, assert_false
+from numpy.testing import assert_array_equal
+from StringIO import StringIO
+from os.path import isabs
+
+from pbcore import data
+from pbcore.io import readFofn
+
+def test_simple():
+    fofn = StringIO("/a/b\n/c/d")
+    lst = list(readFofn(fofn))
+    assert_array_equal(["/a/b", "/c/d"], lst)
+
+def test_empty_lines():
+    fofn = StringIO("/a/b\n \n/c/d\n ")
+    lst = list(readFofn(fofn))
+    assert_array_equal(["/a/b", "/c/d"], lst)
+
+def test_absolutifying():
+    for fofnPath in data.getFofns():
+        for filePath in readFofn(fofnPath):
+            assert_true(isabs(filePath))
diff --git a/tests/test_pbcore_io_GffIO.py b/tests/test_pbcore_io_GffIO.py
new file mode 100644
index 0000000..5f822e7
--- /dev/null
+++ b/tests/test_pbcore_io_GffIO.py
@@ -0,0 +1,100 @@
+from nose.tools import assert_equal, assert_raises
+from StringIO import StringIO
+from pbcore.io import GffWriter, Gff3Record, GffReader
+from pbcore import data
+
+class TestGff3Record:
+
+    def setup(self):
+        self.record = Gff3Record("chr1", 10, 11, "insertion",
+                                 attributes=[("cat", "1"), ("dog", "2")])
+
+    def test_str(self):
+        assert_equal("chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2",
+                     str(self.record))
+
+    def test_modification(self):
+        record = self.record.copy()
+        record.dog = 3
+        record.cat = 4
+        record.mouse = 5
+        record.start = 100
+        record.end = 110
+        assert_equal("chr1\t.\tinsertion\t100\t110\t.\t.\t.\tcat=4;dog=3;mouse=5",
+                     str(record))
+
+    def test_fromString(self):
+        newRecord = Gff3Record.fromString(str(self.record))
+        assert_equal(str(self.record),  str(newRecord))
+
+    def test_get(self):
+        """
+        Verify field access behavior
+        """
+        record = self.record
+        record.dog = 3
+        record.cat = 4
+        record.mouse = 5
+        record.start = 100
+        record.end = 110
+
+        assert_equal(3, record.dog)
+        assert_equal(100, record.start)
+        with assert_raises(AttributeError):
+            record.god
+
+        assert_equal(3, record.get("dog"))
+        assert_equal(None, record.get("god"))
+        assert_equal(100, record.get("start", 100))
+
+
+
+
+class TestGffReader:
+    def setup(self):
+        self.rawFile = open(data.getGff3())
+        self.reader = GffReader(data.getGff3())
+
+    def test_headers(self):
+        assert_equal(["##gff-version 3",
+                      "##pacbio-variant-version 2.1",
+                      "##date Sat Mar 22 12:16:13 2014",
+                      "##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12",
+                      "##source GenomicConsensus 0.8.0",
+                      "##source-commandline /Users/dalexander/.virtualenvs/VE/bin/variantCaller.py --algorithm=plurality -q20 -x5 pbcore/data/aligned_reads_1.cmp.h5 -r /Users/dalexander/Data/lambdaNEB.fa -o /tmp/v.gff",
+                      "##source-alignment-file /Users/dalexander/Dropbox/Sources/git/pbcore/pbcore/data/aligned_reads_1.cmp.h5",
+                      "##source-reference-file /Users/dalexander/Data/lambdaNEB.fa",
+                      "##sequence-region lambda_NEB3011 1 48502"],
+                     self.reader.headers)
+
+    def test__iter__(self):
+        records = list(self.reader)
+        rawLines = self.rawFile.readlines()[9:]
+        for record, rawLine in zip(records, rawLines):
+            # No newlines or whitespace allowed in records
+            assert_equal(str(record).strip(), str(record))
+            # Make sure record matches line
+            assert_equal(rawLine.strip(), str(record))
+
+
+class TestGffWriter:
+    def setup(self):
+        self.outfile = StringIO()
+        self.record1 = Gff3Record("chr1", 10, 11, "insertion",
+                                  attributes=[("cat", "1"), ("dog", "2")])
+        self.record2 = Gff3Record("chr1", 200, 201, "substitution",
+                                  attributes=[("mouse", "1"), ("moose", "2")])
+        self.gffWriter = GffWriter(self.outfile)
+
+    def test_writeHeader(self):
+        self.gffWriter.writeHeader("##foo bar")
+        assert_equal("##gff-version 3\n##foo bar\n",
+                     self.outfile.getvalue())
+
+    def test_writeRecord(self):
+        self.gffWriter.writeRecord(self.record1)
+        self.gffWriter.writeRecord(self.record2)
+        expected = ("##gff-version 3\n" +
+                    "chr1\t.\tinsertion\t10\t11\t.\t.\t.\tcat=1;dog=2\n" +
+                    "chr1\t.\tsubstitution\t200\t201\t.\t.\t.\tmouse=1;moose=2\n")
+        assert_equal(expected, self.outfile.getvalue())
diff --git a/tests/test_pbcore_io_rangeQueries.py b/tests/test_pbcore_io_rangeQueries.py
new file mode 100644
index 0000000..6a5e1d8
--- /dev/null
+++ b/tests/test_pbcore_io_rangeQueries.py
@@ -0,0 +1,71 @@
+from nose.tools import assert_equal
+from numpy.testing import assert_array_equal
+
+import pbcore.io.rangeQueries as RQ
+from pbcore import data
+from pbcore.io import CmpH5Reader
+
+import bisect
+from numpy import *
+
+def brute_force_lm_search(vec, val):
+    if (val not in vec):
+        nvec = vec[ vec < val ]
+        if (len(nvec) == 0):
+            return(0)
+        val = max(nvec)
+    for i in range(0, len(vec)):
+        if (vec[i] == val):
+            break
+    return(i)
+
+def brute_force_rm_search(vec, val):
+    if (val not in vec):
+        nvec = vec[ vec > val ]
+        if (len(nvec) == 0):
+            return(len(vec))
+        val = min(nvec)
+        return(bisect.bisect_left(vec, val))
+    else:
+        return(bisect.bisect_right(vec, val) - 1)
+
+class TestProjectIntoRange:
+    def test_project_into_range(self):
+        tStart = array([1,1,1,1,1,2,2,2,2,10,20])
+        tEnd   = array([2,3,4,5,6,3,4,5,6,15,25])
+        assert_equal(True, all(RQ.projectIntoRange(tStart, tEnd, 1, 6) == array([5, 8, 6, 4, 2])))
+        assert_equal(True, all(RQ.projectIntoRange(tStart, tEnd, 20, 26) == array([1, 1, 1, 1, 1, 0])))
+
+def brute_force_reads_in_range(rangeStart, rangeEnd, tStart, tEnd):
+    mask = ((tEnd   > rangeStart) &
+            (tStart < rangeEnd))
+    return flatnonzero(mask)
+
+class TestGetReadsInRange:
+    def __init__(self):
+        self.h5FileName = data.getCmpH5()
+        self.cmpH5 = CmpH5Reader(self.h5FileName)
+
+    def test_get_reads_in_range(self):
+        assert(len(RQ.getReadsInRange(self.cmpH5, (1, 0, 100000), justIndices = True)) == 84)
+
+    def test_get_coverage_in_range(self):
+        assert(all(RQ.getCoverageInRange(self.cmpH5, (1, 0, 100)) == 2))
+
+    def test_reads_in_range2(self):
+        # This is a brute force check that reads in range returns the
+        # right answer for 50-base windows of lambda
+        for BLOCKSIZE in [50, 77]:
+            for winStart in xrange(0, 45000, BLOCKSIZE):
+                winEnd = winStart + BLOCKSIZE
+                assert_array_equal(brute_force_reads_in_range(winStart, winEnd, self.cmpH5.tStart, self.cmpH5.tEnd),
+                                   self.cmpH5.readsInRange(1, winStart, winEnd, justIndices=True))
+
+
+
+    def test_coverage_in_range2(self):
+        # Brute force over lambda
+        for winStart in xrange(0, 45000, 50):
+            winEnd = winStart + 1
+            assert_array_equal([len(brute_force_reads_in_range(winStart, winEnd, self.cmpH5.tStart, self.cmpH5.tEnd))],
+                               RQ.getCoverageInRange(self.cmpH5, (1, winStart, winEnd)))
diff --git a/tests/test_pbcore_io_unaligned_bam.py b/tests/test_pbcore_io_unaligned_bam.py
new file mode 100644
index 0000000..d9857a3
--- /dev/null
+++ b/tests/test_pbcore_io_unaligned_bam.py
@@ -0,0 +1,68 @@
+from numpy.testing import (assert_array_almost_equal as ASIM,
+                           assert_array_equal        as AEQ)
+from nose.tools import (nottest,
+                        assert_raises,
+                        assert_equal as EQ)
+from nose import SkipTest
+
+import numpy as np
+import bisect
+import h5py
+from collections import Counter
+
+from pbcore import data
+from pbcore.io import BamReader, BaxH5Reader
+from pbcore.io.align._BamSupport import UnavailableFeature
+
+from pbcore.sequence import reverseComplement as RC
+
+class TestUnalignedBam(object):
+
+    def __init__(self):
+        self.bam = BamReader  (data.getUnalignedBam())
+        self.bax = BaxH5Reader(data.getBaxForBam())
+
+        self.baxRead0 = next(self.bax.subreads())
+        self.bamRead0 = next(iter(self.bam))
+
+    def testInvalidOperations(self):
+
+        # These kinds of things presently work.  Do we want them to
+        # fail?
+
+        # with assert_raises(UnavailableFeature):
+        #     self.bamRead0.isForwardStrand
+        # with assert_raises(UnavailableFeature):
+        #     self.bamRead0.tStart
+
+        # attempts to get read aligned or oriented
+        with assert_raises(UnavailableFeature):
+            self.bamRead0.read(aligned=True, orientation="native")
+        with assert_raises(UnavailableFeature):
+            self.bamRead0.read(aligned=False, orientation="genomic")
+        with assert_raises(UnavailableFeature):
+            self.bamRead0.read()
+        with assert_raises(UnavailableFeature):
+            self.bamRead0.InsertionQV(aligned=True, orientation="native")
+        with assert_raises(UnavailableFeature):
+            self.bamRead0.InsertionQV(aligned=False, orientation="genomic")
+        with assert_raises(UnavailableFeature):
+            self.bamRead0.InsertionQV()
+
+    def testReadAccess(self):
+        EQ(self.bamRead0.read(aligned=False, orientation="native"), self.baxRead0.basecalls())
+
+    def testQvAccess(self):
+        AEQ(self.bamRead0.SubstitutionQV(aligned=False, orientation="native"), self.baxRead0.SubstitutionQV())
+        AEQ(self.bamRead0.InsertionQV(aligned=False, orientation="native"),    self.baxRead0.InsertionQV())
+        AEQ(self.bamRead0.DeletionTag(aligned=False, orientation="native"),    self.baxRead0.DeletionTag())
+
+    def testZmwInfo(self):
+        # WAT.  Need to make these accessors more uniform.  This is
+        # totally crazy.
+        EQ(self.bamRead0.HoleNumber, self.baxRead0.holeNumber)
+        EQ(self.bamRead0.qStart,     self.baxRead0.readStart)
+        EQ(self.bamRead0.qEnd,       self.baxRead0.readEnd)
+
+    def testNames(self):
+        EQ(self.bamRead0.queryName, self.baxRead0.readName)
diff --git a/tests/test_pbcore_util_sequences.py b/tests/test_pbcore_util_sequences.py
new file mode 100644
index 0000000..7c475f2
--- /dev/null
+++ b/tests/test_pbcore_util_sequences.py
@@ -0,0 +1,48 @@
+import nose
+from nose.tools import assert_equal, assert_true, assert_false
+from pbcore import sequence
+
+class TestReverseComplement:
+
+    def setup(self):
+        self.sequence = "GATTACA" * 20
+        self.reverse = "ACATTAG" * 20
+        self.complement = "CTAATGT" * 20
+        self.reverse_complement = "TGTAATC" * 20
+        self.bad_sequence = "AGCTR" * 20
+
+    def test_reverse(self):
+        assert_equal(self.sequence,
+                     sequence.reverse(sequence.reverse(self.sequence)))
+        assert_equal(self.reverse,
+                     sequence.reverse(self.sequence))
+        assert_equal(self.complement,
+                     sequence.reverse(self.reverse_complement))
+
+    def test_complement(self):
+        assert_equal(self.sequence,
+                     sequence.complement(self.complement))
+        assert_equal(self.complement,
+                     sequence.complement(self.sequence))
+        assert_equal(self.reverse,
+                     sequence.complement(self.reverse_complement))
+
+    def test_reverseComplement(self):
+        assert_equal(self.reverse_complement,
+                     sequence.reverseComplement(self.sequence))
+        assert_equal(self.sequence,
+                     sequence.reverseComplement(self.reverse_complement))
+
+    @nose.tools.raises(ValueError)
+    def test_complement_error(self):
+        sequence.complement(self.bad_sequence)
+
+    @nose.tools.raises(ValueError)
+    def test_reverse_complement_error(self):
+        sequence.reverseComplement(self.bad_sequence)
+
+
+class TestSplitRecordName:
+
+    def setup(self):
+        pass

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pbcore.git



More information about the debian-med-commit mailing list